The result isn't right,someone give me advices

from future import absolute_import
from future import division
from future import print_function

import matplotlib.pyplot as plt
import matplotlib.cm as cm

import numpy as np
import six

import edward as ed
import tensorflow as tf

from tensorflow.contrib.distributions.python.ops import bijectors as bijector

from edward.models import (
Categorical, Dirichlet, Empirical, InverseGamma,
MultivariateNormalDiag, Normal, ParamMixture, Mixture,
NormalWithSoftplusScale)

true_pi = np.array([0.2, 0.8])
true_mus = [[1., 1.], [-1., -1.]]
true_stds = [[0.1, 0.1], [0.1, 0.1]]

def build_toy_dataset(N, pi=true_pi, mus=true_mus, stds=true_stds):
x = np.zeros((N, 2), dtype=np.float32)
for n in range(N):
k = np.argmax(np.random.multinomial(1, true_pi))
x[n, :] = np.random.multivariate_normal(true_mus[k], np.diag(true_stds[k]))
return x

N = 500 # number of data points
K = 2 # number of components
D = 2 # dimensionality of data
ed.set_seed(42)

x_train = build_toy_dataset(N)
transformedx_train = 5.0*x_train + 3.0

Mixture model

pi = Dirichlet(tf.ones(K))
mu = Normal(tf.zeros(D), tf.ones(D), sample_shape=K)
sigmasq = InverseGamma(tf.ones(D), tf.ones(D), sample_shape=K)

cat = Categorical(probs=pi, sample_shape=N)
components = [
MultivariateNormalDiag(mu[k], sigmasq[k], sample_shape=N)
for k in range(K)]

x = Mixture(cat=cat, components=components, sample_shape=N)

transformedx = ed.models.TransformedDistribution(
distribution=x,
bijector=tf.contrib.distributions.bijectors.Affine(
shift=3.,
scale_identity_multiplier=5.,
event_ndims=0),
name=“transformedx”, sample_shape = N)

# KLqp inference

qmu = Normal(loc=tf.Variable(tf.random_normal([K,D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([K,D]))))

qsigmasq = ed.models.TransformedDistribution(distribution = NormalWithSoftplusScale(tf.Variable(tf.zeros([K,D])), tf.Variable(tf.zeros([K,D]))), bijector=bijector.Exp())

T = 500 # number of MCMC samples
qpi = Empirical(tf.get_variable(
“qpi/params”, [T, K],
initializer=tf.constant_initializer(1.0 / K)))
qmu = Empirical(tf.get_variable(
“qmu/params”, [T, K, D],
initializer=tf.zeros_initializer()))
qsigmasq = Empirical(tf.get_variable(
“qsigmasq/params”, [T, K, D],
initializer=tf.ones_initializer()))
qz = Empirical(tf.get_variable(
“qz/params”, [T, N],
initializer=tf.zeros_initializer(),
dtype=tf.int32))

inference = ed.KLqp({mu:qmu, sigmasq:qsigmasq}, data={transformedx: transformedx_train})

inference = ed.SGLD({mu:qmu, sigmasq:qsigmasq}, data={transformedx: transformedx_train})

n_iter = 1000
n_print = 500
n_samples = 10

inference.initialize(n_iter=n_iter, n_print=n_print, n_samples=n_samples)

inference.initialize()
sess = ed.get_session()
init = tf.global_variables_initializer()
init.run()

learning_curve = []
for _ in range(inference.n_iter):
info_dict = inference.update()
if _%100 == 0:

print(qmu.loc.eval())

    print(info_dict)

learning_curve.append(info_dict[‘loss’])

plt.semilogy(learning_curve)

plt.show()

Assignment clusters

M = 100
mu_sample = qmu.sample(M)
sigmasq_sample = qsigmasq.sample(M)
x_post = Normal(loc=tf.ones([N, 1, 1, 1]) * mu_sample,
scale=tf.ones([N, 1, 1, 1]) * tf.sqrt(sigmasq_sample))
x_broadcasted = tf.tile(tf.reshape(x_train, [N, 1, 1, D]), [1, M, K, 1])

Sum over latent dimension, then average over posterior samples.

log_liks ends up with shape (N, K).

log_liks = x_post.log_prob(x_broadcasted)

print(log_liks.eval())

log_liks = tf.reduce_sum(log_liks, 3)
log_liks = tf.reduce_mean(log_liks, 1)
print(log_liks.eval())

clusters = tf.argmax(log_liks, 1).eval()
plt.scatter(transformedx_train[:, 0], transformedx_train[:, 1], c=clusters, cmap=cm.bwr)

postmu = qmu.loc.eval()

plt.scatter(5.0postmu[:, 0]+3.0, 5.0postmu[:, 1]+3.0, marker = ‘x’)

plt.show()