hi Dustin,
This is a follow up question to the one I asked yesterday on Gitter regarding implementing a graphical model with latent node in edward. I tried the Gibbs sampling for my problem as you suggested. Here is the code used
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import edward as ed
import matplotlib.pyplot as plt
import six
from time import time
from edward.models import InverseGamma, Normal, Empirical
from edward.util import get_blanket
"""
Directed Graphical model:
X->Y , Z->Y
X,Y,Z are assumed to be Gaussian distributions
data is observed on X,Y but not Z (latent node)
P(X,Y,Z) = P(Y|X,Z) * P(X)* P(Z)
P(Z) = N(loc=mu_z, scale=ig_z)
P(X) = N(loc=mu_x, scale=ig_x)
P(Y|X,Z) = N(loc=(b + w*X + wz*Z), scale=ig)
Estimate: mu_z,ig_z,mu_x,ig_x,b,w,wz
Data: Y_train ,X_train
"""
def build_toy_dataset(N, b, w_o, w_h, noise_std=1):
D = len(w_o)+len(w_h)
w = np.concatenate((w_o,w_h))
x = np.zeros((N,D))
x[:,0] = np.random.normal(2, 4, size=N) # observed node
x[:,1] = np.random.normal(5, 6, size=N) # hidden node
y = np.dot(x, w) + np.ones(N)*b +np.random.normal(0, noise_std, size=N)
x_o = np.zeros((N,len(w_o)))
x_o[:,0] = x[:,0]
return x_o, y
#ed.set_seed(42)
N = 1000 # number of data points
D = 1 # number of observed nodes
Dz = 1 # number of hidden nodes
# DATA
w_true = np.ones(D) * 5.0
w_hid = np.ones(D) * 3.0
X_train, y_train = build_toy_dataset(N, 1, w_true, w_hid)
#X_test, y_test = build_toy_dataset(N,1, w_true)
# MODEL
#X = tf.placeholder(tf.float32, [N, D])
## Latent node - Z
alpha_z = tf.Variable(0.5, trainable=False)
beta_z = tf.Variable(0.7, trainable=False)
ig_z = InverseGamma(alpha_z, beta_z)
mu_z = Normal(loc=0.0, scale=1.0)
z = Normal(loc=tf.ones(N) * mu_z, scale=tf.ones([N]) * tf.sqrt(ig_z))
## Observed node - X
alpha_x = tf.Variable(0.5, trainable=False)
beta_x = tf.Variable(0.7, trainable=False)
ig_x = InverseGamma(alpha_x, beta_x)
mu_x = Normal(loc=0.0, scale=1.0)
X = Normal(loc=tf.ones(N) * mu_x, scale=tf.ones([N]) * tf.sqrt(ig_x))
# Observed node - Y
alpha = tf.Variable(0.5, trainable=False)
beta = tf.Variable(0.7, trainable=False)
ig = InverseGamma(alpha, beta)
w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
wz = Normal(loc=tf.zeros(Dz), scale=tf.ones(Dz))
b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
y = Normal(loc=(X*w+z*wz+b), scale=tf.ones([N]) * tf.sqrt(ig))
# INFERENCE
T = 500 # number of MCMC samples
# Conditionals
muz_cond = ed.complete_conditional(mu_z)
igz_cond = ed.complete_conditional(ig_z)
mux_cond = ed.complete_conditional(mu_x)
igx_cond = ed.complete_conditional(ig_x)
ig_cond = ed.complete_conditional(ig)
b_cond = ed.complete_conditional(b)
w_cond = ed.complete_conditional(w)
wz_cond = ed.complete_conditional(wz)
sess = ed.get_session()
# Initialize randomly
muz_est,igz_est,w_est,wz_est,ig_est,b_est = sess.run([mu_z, ig_z, w, wz, ig, b])
print('Initial parameters:')
print(muz_est,igz_est,w_est,wz_est,ig_est,b_est)
# Gibbs sampler
cond_dict = {mu_z:muz_est, ig_z:igz_est, w:w_est, wz:wz_est, ig:ig_est, b:b_est, X: X_train, y: y_train}
t0 = time()
T = 500
for t in range(T):
muz_est = sess.run(muz_cond, cond_dict)
cond_dict[mu_z] = muz_est
igz_est = sess.run(igz_cond, cond_dict)
cond_dict[ig_z] = igz_est
w_est,wz_est,ig_est,b_est = sess.run([w_cond,wz_cond,ig_cond,b_cond], cond_dict)
cond_dict[w] = w_est
cond_dict[wz] = wz_est
cond_dict[ig] = ig_est
cond_dict[b] = b_est
print('took %.3f seconds to run %d iterations' % (time() - t0, T))
print()
print('Final sample for parameters::')
print(muz_est,igz_est,w_est,wz_est,ig_est,b_est)
but I get the following error:
'sufficient statistics.' % str(dist_key))
NotImplementedError: Conditional distribution has sufficient statistics (('#Add', ('#Mul', (2.0,),
(<tf.Tensor 'Normal_10/sample/Reshape:0' shape=(1000,) dtype=float32>,),
(<tf.Tensor 'Normal_13/sample/Reshape:0' shape=(1,) dtype=float32>,),
('#x',)), ('#Mul', (2.0,), (<tf.Tensor 'Normal_4/sample/Reshape:0' shape=(1000,) dtype=float32>,),
(<tf.Tensor 'Normal_16/sample/Reshape:0' shape=(1,) dtype=float32>,), ('#x',)), ('#CPow2.0000e+00',
('#x',))), ('#CPow2.0000e+00', ('#x',)), ('#x',)), but no available exponential-family distribution has those sufficient statistics.
The error indicates that the full conditionals are not able to be computed for b_cond, w_cond, wz_cond. I think this is because of the definition of loc in
y = Normal(loc=(X*w+z*wz+b), scale=tf.ones([N]) * tf.sqrt(ig))
However, this is similar to a Bayesian linear regression, so I defined the conjugate priors for b,w,wz accordingly. Do you have any insights on why this is happening and if I should define the complete conditionals for these variable separately?