from future import absolute_import
from future import division
from future import print_function
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import six
import edward as ed
import tensorflow as tf
from tensorflow.contrib.distributions.python.ops import bijectors as bijector
from edward.models import (
Categorical, Dirichlet, Empirical, InverseGamma,
MultivariateNormalDiag, Normal, ParamMixture, Mixture,
NormalWithSoftplusScale)
true_pi = np.array([0.2, 0.8])
true_mus = [[1., 1.], [-1., -1.]]
true_stds = [[0.1, 0.1], [0.1, 0.1]]
def build_toy_dataset(N, pi=true_pi, mus=true_mus, stds=true_stds):
x = np.zeros((N, 2), dtype=np.float32)
for n in range(N):
k = np.argmax(np.random.multinomial(1, true_pi))
x[n, :] = np.random.multivariate_normal(true_mus[k], np.diag(true_stds[k]))
return x
N = 500 # number of data points
K = 2 # number of components
D = 2 # dimensionality of data
ed.set_seed(42)
x_train = build_toy_dataset(N)
transformedx_train = 5.0*x_train + 3.0
Mixture model
pi = Dirichlet(tf.ones(K))
mu = Normal(tf.zeros(D), tf.ones(D), sample_shape=K)
sigmasq = InverseGamma(tf.ones(D), tf.ones(D), sample_shape=K)
cat = Categorical(probs=pi, sample_shape=N)
components = [
MultivariateNormalDiag(mu[k], sigmasq[k], sample_shape=N)
for k in range(K)]
x = Mixture(cat=cat, components=components, sample_shape=N)
transformedx = ed.models.TransformedDistribution(
distribution=x,
bijector=tf.contrib.distributions.bijectors.Affine(
shift=3.,
scale_identity_multiplier=5.,
event_ndims=0),
name=“transformedx”, sample_shape = N)
# KLqp inference
qmu = Normal(loc=tf.Variable(tf.random_normal([K,D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([K,D]))))
qsigmasq = ed.models.TransformedDistribution(distribution = NormalWithSoftplusScale(tf.Variable(tf.zeros([K,D])), tf.Variable(tf.zeros([K,D]))), bijector=bijector.Exp())
T = 500 # number of MCMC samples
qpi = Empirical(tf.get_variable(
“qpi/params”, [T, K],
initializer=tf.constant_initializer(1.0 / K)))
qmu = Empirical(tf.get_variable(
“qmu/params”, [T, K, D],
initializer=tf.zeros_initializer()))
qsigmasq = Empirical(tf.get_variable(
“qsigmasq/params”, [T, K, D],
initializer=tf.ones_initializer()))
qz = Empirical(tf.get_variable(
“qz/params”, [T, N],
initializer=tf.zeros_initializer(),
dtype=tf.int32))
inference = ed.KLqp({mu:qmu, sigmasq:qsigmasq}, data={transformedx: transformedx_train})
inference = ed.SGLD({mu:qmu, sigmasq:qsigmasq}, data={transformedx: transformedx_train})
n_iter = 1000
n_print = 500
n_samples = 10
inference.initialize(n_iter=n_iter, n_print=n_print, n_samples=n_samples)
inference.initialize()
sess = ed.get_session()
init = tf.global_variables_initializer()
init.run()
learning_curve = []
for _ in range(inference.n_iter):
info_dict = inference.update()
if _%100 == 0:
print(qmu.loc.eval())
print(info_dict)
learning_curve.append(info_dict[‘loss’])
plt.semilogy(learning_curve)
plt.show()
Assignment clusters
M = 100
mu_sample = qmu.sample(M)
sigmasq_sample = qsigmasq.sample(M)
x_post = Normal(loc=tf.ones([N, 1, 1, 1]) * mu_sample,
scale=tf.ones([N, 1, 1, 1]) * tf.sqrt(sigmasq_sample))
x_broadcasted = tf.tile(tf.reshape(x_train, [N, 1, 1, D]), [1, M, K, 1])
Sum over latent dimension, then average over posterior samples.
log_liks
ends up with shape (N, K).
log_liks = x_post.log_prob(x_broadcasted)
print(log_liks.eval())
log_liks = tf.reduce_sum(log_liks, 3)
log_liks = tf.reduce_mean(log_liks, 1)
print(log_liks.eval())
clusters = tf.argmax(log_liks, 1).eval()
plt.scatter(transformedx_train[:, 0], transformedx_train[:, 1], c=clusters, cmap=cm.bwr)
postmu = qmu.loc.eval()
plt.scatter(5.0postmu[:, 0]+3.0, 5.0postmu[:, 1]+3.0, marker = ‘x’)
plt.show()