What is This?
VAE(とAE)の中間層の状態を見てみたいなと思い、可視化してgifにしました.
以前からVAEでmnistの複数の数字を入れたらわかりにくいと感じていたので、1文字で学習させています.
可視化コードを加えただけですが「コード」のタブにソースコードを置いておきました. 使いまわせる方がいると幸いです.
gifにはImageMagickをインストールして以下でgif化
下記では、全て中間層(隠れ層)を2次元にしています.
VAE Visualize Results
VAE with 10 digits
まずは通常の10種類のmnistの学習です.少しずつ真ん中に全体の山が寄って行きます.
可視化に使用している画像はテストデータで学習には使用していません.

VAE with 1 digits
次に数字の3の学習だけを行い、テスト画像にも3の数字だけを入力しています.
こころなしか周辺にあまりお目にかからない崩れた3が来ていて、真ん中にはよくみる3が来ているように見えなくもない...
全体的にガウス分布になろうとしている感じがあります.

VAE with 1 digits & other digits
次に数字の3の学習だけを行い、テスト画像には3の数字と他の数字も入力として入れてみてます.
私の期待では,3以外の数字は全て外側に来て内側は3しかない状態になることを思い描いていましたが
やはり潜在変数2が少なすぎるせいかそこまで上手くは行きませんでした...
2とか6とかがやや侵入してきています.

AE Visualize Results
AE with 10 digits
まずは通常の10種類のmnistの学習です.0~1の間に収まっているのはsigmoidのおかげです.
ReLuにしているものをあまり見かけなかったのでここのままにしています.

AE with 1 digits (FAIL)
次に3だけにしましたが、どうも上手く学習されなくなった気がします.
原因はまだ調べているところです.特にできなくなる理由は見つからない気がするのですが...

コードの大部分はaymericdamienさんのコードをベースにしています.
Gist等含めて右UrusuLambda's Github
Code (VAE just visualize 2d hidden layer. Multiple Digits)
10種類のMNISTをVAEで学習させているときのコードです.元のコードに可視化用のコードを加えて、中間層から値を取得して2Dにプロットしています.
Code (VAE just visualize 2d hidden layer. Single Digits)
1種類のMNISTをVAEで学習させているときのコードです.(大して一つめと変わりません)
#!/usr/bin/env python | |
from __future__ import division, print_function, absolute_import | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy.stats import norm | |
import tensorflow as tf | |
from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage, | |
AnnotationBbox) | |
# Import MNIST data | |
from tensorflow.examples.tutorials.mnist import input_data | |
mnist = input_data.read_data_sets("/tmp/data/") | |
# Parameters | |
learning_rate = 0.001 | |
num_steps = 60000 | |
batch_size = 64 | |
# Network Parameters | |
image_dim = 784 # MNIST images are 28x28 pixels | |
hidden_dim = 512 | |
latent_dim = 2 | |
# A custom initialization (see Xavier Glorot init) | |
def glorot_init(shape): | |
return tf.random_normal(shape=shape, stddev=1. / tf.sqrt(shape[0] / 2.)) | |
# Variables | |
weights = { | |
'encoder_h1': tf.Variable(glorot_init([image_dim, hidden_dim])), | |
'z_mean': tf.Variable(glorot_init([hidden_dim, latent_dim])), | |
'z_std': tf.Variable(glorot_init([hidden_dim, latent_dim])), | |
'decoder_h1': tf.Variable(glorot_init([latent_dim, hidden_dim])), | |
'decoder_out': tf.Variable(glorot_init([hidden_dim, image_dim])) | |
} | |
biases = { | |
'encoder_b1': tf.Variable(glorot_init([hidden_dim])), | |
'z_mean': tf.Variable(glorot_init([latent_dim])), | |
'z_std': tf.Variable(glorot_init([latent_dim])), | |
'decoder_b1': tf.Variable(glorot_init([hidden_dim])), | |
'decoder_out': tf.Variable(glorot_init([image_dim])) | |
} | |
# Building the encoder | |
input_image = tf.placeholder(tf.float32, shape=[None, image_dim]) | |
encoder = tf.matmul(input_image, weights['encoder_h1']) + biases['encoder_b1'] | |
encoder = tf.nn.tanh(encoder) | |
z_mean = tf.matmul(encoder, weights['z_mean']) + biases['z_mean'] | |
z_std = tf.matmul(encoder, weights['z_std']) + biases['z_std'] | |
# Sampler: Normal (gaussian) random distribution | |
eps = tf.random_normal(tf.shape(z_std), dtype=tf.float32, mean=0., stddev=1.0, | |
name='epsilon') | |
z = z_mean + tf.exp(z_std / 2) * eps | |
# Building the decoder (with scope to re-use these layers later) | |
decoder = tf.matmul(z, weights['decoder_h1']) + biases['decoder_b1'] | |
decoder = tf.nn.tanh(decoder) | |
decoder = tf.matmul(decoder, weights['decoder_out']) + biases['decoder_out'] | |
decoder = tf.nn.sigmoid(decoder) | |
# Define VAE Loss | |
def vae_loss(x_reconstructed, x_true): | |
# Reconstruction loss | |
encode_decode_loss = x_true * tf.log(1e-10 + x_reconstructed) \ | |
+ (1 - x_true) * tf.log(1e-10 + 1 - x_reconstructed) | |
encode_decode_loss = -tf.reduce_sum(encode_decode_loss, 1) | |
# KL Divergence loss | |
kl_div_loss = 1 + z_std - tf.square(z_mean) - tf.exp(z_std) | |
kl_div_loss = -0.5 * tf.reduce_sum(kl_div_loss, 1) | |
return tf.reduce_mean(encode_decode_loss + kl_div_loss) | |
loss_op = vae_loss(decoder, input_image) | |
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) | |
train_op = optimizer.minimize(loss_op) | |
# Initialize the variables (i.e. assign their default value) | |
init = tf.global_variables_initializer() | |
plt.ion() | |
testnums = 200 | |
teimages = mnist.test.images | |
telabels = mnist.test.labels | |
test_images = {} | |
test_labels = {} | |
for i in range(0, 10): | |
test_idx = telabels == i | |
test_images[i] = teimages[test_idx] | |
test_labels[i] = telabels[test_idx] | |
index = 0 | |
target_index = 3 | |
target_test_images = test_images[target_index][:testnums] | |
target_test_labels = test_labels[target_index][:testnums] | |
for i in range(0, 10): | |
if i != 3: | |
target_test_images = np.append(target_test_images, test_images[i][:5],axis=0) | |
target_test_labels = np.append(target_test_labels, test_labels[i][:5],axis=0) | |
# Start training | |
with tf.Session() as sess: | |
# Run the initializer | |
sess.run(init) | |
for i in range(1, num_steps+1): | |
# Prepare Data | |
# Get the next batch of MNIST data (only images are needed, not labels) | |
batch_x, batch_y = mnist.train.next_batch(batch_size) | |
idx = batch_y == target_index | |
batch_x = batch_x[idx] | |
# Train | |
feed_dict = {input_image: batch_x} | |
_, l = sess.run([train_op, loss_op], feed_dict=feed_dict) | |
if i % 1000 == 0 or i == 1: | |
print('Step %i, Loss: %f' % (i, l)) | |
if i % 200 == 0: | |
feed_dict = {input_image: target_test_images} | |
results = sess.run(z_mean, feed_dict=feed_dict) | |
xlist = [result[0] for result in results] | |
ylist = [result[1] for result in results] | |
plt.clf() | |
fig = plt.figure(figsize=(8, 8)) | |
ax = fig.add_subplot(1,1,1) | |
ax.set_xlim([-4,4]) | |
ax.set_ylim([-4,4]) | |
colors = ["red", "sienna", "gold", "olivedrab", "darkgreen", "paleturquoise", "deepskyblue", "navy", "m", "mediumvioletred"] | |
for j in range(testnums + 45): | |
aimg = target_test_images[j] | |
aimg = aimg.reshape(28, 28) | |
imb0 = OffsetImage(aimg, cmap='gray', zoom=1.6) | |
imb0.image.axes = ax | |
ab0 = AnnotationBbox(imb0, results[j], xycoords="data", boxcoords="offset points", pad=0, frameon=True, bboxprops = {"linewidth":5, "edgecolor":colors[target_test_labels[j]]}) | |
ax.add_artist(ab0) | |
#plt.scatter(xlist, ylist, c=range(0,testnums)) | |
plt.savefig("svaimgs/frame_%04d.png" % index) | |
index += 1 | |
# Testing | |
# Generator takes noise as input | |
noise_input = tf.placeholder(tf.float32, shape=[None, latent_dim]) | |
# Rebuild the decoder to create image from noise | |
decoder = tf.matmul(noise_input, weights['decoder_h1']) + biases['decoder_b1'] | |
decoder = tf.nn.tanh(decoder) | |
decoder = tf.matmul(decoder, weights['decoder_out']) + biases['decoder_out'] | |
decoder = tf.nn.sigmoid(decoder) | |
# Building a manifold of generated digits | |
n = 20 | |
x_axis = np.linspace(-3, 3, n) | |
y_axis = np.linspace(-3, 3, n) | |
canvas = np.empty((28 * n, 28 * n)) | |
for i, yi in enumerate(x_axis): | |
for j, xi in enumerate(y_axis): | |
z_mu = np.array([[xi, yi]] * batch_size) | |
x_mean = sess.run(decoder, feed_dict={noise_input: z_mu}) | |
canvas[(n - i - 1) * 28:(n - i) * 28, j * 28:(j + 1) * 28] = \ | |
x_mean[0].reshape(28, 28) | |
plt.figure(figsize=(8, 10)) | |
Xi, Yi = np.meshgrid(x_axis, y_axis) | |
plt.imshow(canvas, origin="upper", cmap="gray") | |
plt.show() |
Code (AE just visualize 2d hidden layer. 10 Digits)
10種類のMNISTをAEで学習させているときのコードです.(大して一つめと変わりません)