Hi, I have been trying to implement Dirichlet process mixture model for data that follow a mixture of multivariate Bernoulli. I used Independent
and Mixture
to construct multivariate Bernoulli and mixture of multivariate Bernoulli respectively. Also, I used KLqp
for inference.
The problem is that I always get nan
s after a few iterations. The version of my tensorflow is ‘1.7.0’, the version of my Edward is ‘1.3.5’. Any suggestions would be very much appreciated.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import (
Categorical, Mixture, Gamma, Beta,
Bernoulli, Independent)
def build_toy_dataset2(N):
probs = np.array([[0.9, 0.9],[0.1, 0.9],[0.1, 0.1]])
x = np.zeros((N, 2), dtype=np.float32)
num_group1 = num_group2 = int(N/3)
num_group3 = N - num_group1 - num_group2
x[:num_group1,0] = np.random.choice([1,0],
size=num_group1,
p=[probs[0,0], 1-probs[0,0]])
x[:num_group1,1] = np.random.choice([1,0],
size=num_group1,
p=[probs[0,1], 1-probs[0,1]])
x[np.arange(num_group1,num_group1+num_group2),
0] = np.random.choice([1,0],
size=num_group2,
p=[probs[1,0], 1-probs[1,0]])
x[np.arange(num_group1,num_group1+num_group2),
1] = np.random.choice([1,0],
size=num_group2,
p=[probs[1,1], 1-probs[1,1]])
x[np.arange(num_group1+num_group2,N),
0] = np.random.choice([1,0],
size=num_group3,
p=[probs[2,0], 1-probs[2,0]])
x[np.arange(num_group1+num_group2,N),
1] = np.random.choice([1,0],
size=num_group3,
p=[probs[2,1], 1-probs[2,1]])
return x
ed.set_seed(42)
N = 500
D = 2
T = K = 5 # truncation level in DP
alpha = 0.5
x_train = build_toy_dataset2(N)
# MODEL
alpha = Gamma(concentration=6.0, rate=1.0, name="alpha")
beta = Beta(concentration1=1.0, concentration0=tf.ones(K)*alpha, name="beta")
pi = tf.concat([tf.reshape(beta[0],[1]), tf.reshape(tf.multiply(beta[1:],tf.cumprod(1 - beta[:-1])), [K-1])], 0, name="pi")
alpha0 = tf.Variable(tf.ones(D), dtype=tf.float32, trainable=False)
beta0 = tf.Variable(tf.ones(D), dtype=tf.float32, trainable=False)
prob = Beta(alpha0, beta0, sample_shape = K)
cat = Categorical(probs=pi, sample_shape=N)
comps = [Independent(distribution=Bernoulli(probs=prob[k]),
reinterpreted_batch_ndims=1, sample_shape=N)
for k in range(K)]
x = Mixture(cat=cat, components=comps, sample_shape=N)
# INFERENCE
qalpha = Gamma(concentration=tf.Variable(tf.constant(1.0),name="qalpha_concentration"), rate=tf.Variable(tf.constant(1.0), name="qalpha_rate"))
qbeta = Beta(tf.nn.softplus(tf.Variable(tf.random_normal([T]))) + 1e-5,
tf.nn.softplus(tf.Variable(tf.random_normal([T]))) + 1e-5)
qalpha0 = tf.nn.softplus(tf.Variable(tf.random_normal([K,D]))) + 1e-5
qbeta0 = tf.nn.softplus(tf.Variable(tf.random_normal([K,D]))) + 1e-5
qprob = Beta(qalpha0,
qbeta0)
inference = ed.KLqp({alpha:qalpha, beta: qbeta, prob: qprob}, data={x: x_train})
learning_rate = 1e-3
optimizer = tf.train.AdamOptimizer(learning_rate)
n_iter=1000
n_samples=10
inference.initialize(n_iter=n_iter, n_print=0, n_samples=n_samples, optimizer = optimizer)
sess = ed.get_session()
init = tf.global_variables_initializer()
init.run()
for _ in range(inference.n_iter):
info_dict = inference.update()
inference.print_progress(info_dict)
t = info_dict['t']
if t % inference.n_print == 0:
print(sess.run(qprob.mean()))
print(sess.run(qbeta.mean()))