DPM model for clustering

Thanks @dustin ! I understand your final note in the previous message.
I’ve rewritten the code and it works (it does the computations) … well, not properly: the posterior mean does not change from (0,0)…I need spend some time here to understand what happens (maybe the hyperpriors needs to be reconfigured).

#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import numpy as np
import six
import tensorflow as tf
import pandas
import matplotlib.pyplot as plt

from edward.models import \
    Categorical, Dirichlet, Empirical, InverseGamma, Normal, ParamMixture, \
    MultivariateNormalDiag, Mixture, Gamma, Beta
from scipy.stats import norm


def build_toy_dataset(N):
  pi = np.array([0.4, 0.6])
  mus = [[1, 1], [-1, -1]]
  stds = [[0.1, 0.1], [0.1, 0.1]]
  x = np.zeros((N, 2))
  for n in range(N):
    k = np.argmax(np.random.multinomial(1, pi))
    x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k]))
  return x


N = 500  # number of data points
K = 5  # number of components
D = 2  # dimensionality of data
ed.set_seed(42)

# DATASET
# -------
x_data = build_toy_dataset(N)

# MODEL
# -----
# pi = Dirichlet(concentration=tf.ones(K)/K)
alpha = Gamma(concentration=2.0, rate=2.0, name = "alpha")
beta = Beta(concentration1=1.0, concentration0=alpha, sample_shape = K, name = "beta")
pi = tf.concat([tf.reshape(beta[0],[1]), tf.reshape(tf.multiply(beta[1:],tf.cumprod(1 - beta[:-1])), [K-1])], 0)

mu = Normal(loc=tf.zeros(D), scale=tf.ones(D), sample_shape = K)
sigma = InverseGamma(concentration=tf.ones([D]), rate=tf.ones([D]), sample_shape = K)
c = Categorical(probs=pi, sample_shape = N)
x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c))

# INFERENCE
# ---------
T = 5000
# qpi = Empirical(params=tf.Variable(tf.ones([T, K]) / K))
qalpha = Empirical(params=tf.Variable(tf.ones([T])))
qbeta = Empirical(params=tf.Variable(tf.ones([T,K])))

qmu = Empirical(params=tf.Variable(tf.zeros([T, K, D])))
qsigma = Empirical(params=tf.Variable(tf.ones([T, K, D])))
qc = Empirical(params=tf.Variable(tf.zeros([T, N], dtype=tf.int32)))

# gpi = Dirichlet(concentration=tf.constant([2.0]*K))
galpha = Gamma(concentration=2.0, rate=2.0)
gbeta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=K)

gmu = Normal(loc=tf.ones([K,D]), scale=tf.ones([K,D]))
gsigma = InverseGamma(concentration=tf.ones([K,D]), rate=tf.ones([K,D]))
gc = Categorical(logits=tf.zeros([N, K]))

inference = ed.MetropolisHastings(
    latent_vars={alpha:qalpha, beta:qbeta, mu: qmu, sigma: qsigma, c: qc},
    proposal_vars={alpha:galpha, beta:gbeta, mu: gmu, sigma: gsigma, c: gc},
    data={x: x_data})

inference.initialize()

sess = ed.get_session()
tf.global_variables_initializer().run()

for _ in range(inference.n_iter):
  info_dict = inference.update()
  inference.print_progress(info_dict)
  t = info_dict['t']
  if t == 1 or t % inference.n_print == 0:
    qalpha_mean, qbeta_mean, qmu_mean = sess.run([qalpha.mean(), qbeta.mean(), qmu.mean()])
    print("")
    print("Inferred alpha mean:")
    print(qalpha_mean)
    print("")
    print("Inferred beta mean:")
    print(qbeta_mean)
    print("Inferred cluster means:")
    print(qmu_mean)
1 Like