Hi all,
I am trying to implement variational inference for linear combinations of mixtures of Gaussians in the flavor of
Attias, H. “Independent Factor Analysis.” Neural Computation 11.4 (1999): 803-851.
In the following code I am trying to implement a variational EM algorithm with an E step on the categorical latent variables and an M step on the remaining parameters (linear mixing matrix coefficients, mixture parameters, additive noise parameters) using deterministic posteriors.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import edward as ed
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import six
import tensorflow as tf
from edward.models import Categorical, InverseGamma, Mixture, \
MultivariateNormalDiag, Normal
from edward.models import PointMass
from edward.models import Dirichlet
from tensorflow.contrib import slim
plt.style.use('ggplot')
def build_toy_dataset(N):
pi = [[.4,.6],[0.4, 0.3,.3]]
mus = [[1., -1.], [-1., 1.,0.]]
stds = [[1., 1.], [1., 1.,2.]]
stds = [[x*.01 for x in stds[k]] for k in range(2)]
M= np.array([[1,2],[2, 1]])
x = np.zeros((N, 2), dtype=np.float32)
for n in range(N):
for kcomp in range(2):
k = np.argmax(np.random.multinomial(1, pi[kcomp]))
x[n, kcomp] = np.random.normal(mus[kcomp][k], stds[kcomp][k])
y= np.matmul(x,M.transpose())+ .5*np.random.randn(N,2)
return x, y
N = 500 # number of data points
D = 2 # dimensionality of data
numMod = [2,3]
# DATA
x_train, y_train = build_toy_dataset(N)
plt.hist(x_train[:,1])
plt.show()
plt.hist(x_train[:,0])
plt.show()
# Build model
mu = [(Normal(mu=tf.zeros([numMod[kcomp], 1]), sigma=tf.ones([numMod[kcomp], 1])))
for kcomp in range(D)]
sigma = [(InverseGamma(alpha=tf.ones([numMod[kcomp], 1]), beta=tf.ones([numMod[kcomp], 1])))
for kcomp in range(D)]
betaPar = [(Dirichlet(alpha=tf.ones([1,numMod[kcomp]])))
for kcomp in range(D)]
catVar = [Categorical(p=tf.ones([N, 1])*betaPar[kcomp])
for kcomp in range(D)]
x = [Normal(mu=tf.gather(mu[kcomp],catVar[kcomp]),
sigma=tf.gather(sigma[kcomp],catVar[kcomp]))
for kcomp in range(D)]
catx = tf.concat(x,1)
alpha = Normal(mu=tf.zeros([1]),sigma=tf.ones([1]))
beta = Normal(mu=tf.zeros([1]),sigma=tf.ones([1]))
M = tf.stack((tf.concat((tf.ones([1]), beta),axis=0), tf.concat((alpha,tf.ones([1])),axis=0)),axis=1)
signoise = InverseGamma(alpha=tf.ones([1,2]), beta=tf.ones([1,2]))
y = Normal(mu=tf.matmul(catx,M,transpose_b=True),sigma=tf.ones([N,1])*signoise)
# Define posterior approximations
qz0 = Categorical(logits=tf.Variable(tf.zeros([N, 1])))
qz1 = Categorical(logits=tf.Variable(tf.zeros([N, 1])))
qbeta1 = PointMass(params=tf.nn.softplus(tf.Variable(tf.ones([1,numMod[1]]))))
qbeta0 = PointMass(params=tf.nn.softplus(tf.Variable(tf.ones([1,numMod[0]]))))
qalpha = PointMass(params=tf.Variable(.5*tf.ones([1])))
qbeta = PointMass(params=tf.Variable(.5*tf.ones([1])))
qsignoise = PointMass(params=tf.nn.softplus(tf.Variable(tf.ones([1, 2]))))
qmu = [PointMass(params=tf.Variable(tf.zeros([numMod[kcomp], 1])))
for kcomp in range(D)]
qsigma = [PointMass(params=tf.nn.softplus(tf.Variable(tf.ones([numMod[kcomp], 1]))))
for kcomp in range(D)]
# Define Inference
inference_e = ed.KLqp({catVar[0]: qz0, catVar[1]: qz1}, data={y: y_train, betaPar[1]: qbeta1, betaPar[0]: qbeta0, alpha: qalpha, beta: qbeta, signoise: qsignoise, mu[0]: qmu[0], sigma[0]: qsigma[0], mu[1]: qmu[1], sigma[1]: qsigma[1]})
inference_m = ed.MAP({ betaPar[1]: qbeta1, betaPar[0]: qbeta0, alpha: qalpha, beta: qbeta, signoise: qsignoise, mu[0]: qmu[0], sigma[0]: qsigma[0], mu[1]: qmu[1], sigma[1]: qsigma[1]}, data={y: y_train, catVar[0]: qz0, catVar[1]: qz1})
init = tf.global_variables_initializer()
inference_e.initialize()
inference_m.initialize()
sess = ed.get_session()
init.run()
for _ in range(10):
inference_e.update()
inference_m.update()
The inferences initialize without error, but then the both inference update return an error message I have difficulties to interpret (I don’t know which variable in my code is involved in the first place).
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable_187
[[Node: Variable_187/read = Identity[T=DT_INT32, _class=["loc:@Variable_187"], _device="/job:localhost/replica:0/task:0/cpu:0"](Variable_187)]]
Any indication how to interpret or solve this would be much appreciated.
Thanks,
Michel