Hi all,
I am trying to model deep generative models similar to Rezende et al. 2014 (https://arxiv.org/pdf/1401.4082.pdf) using Edward. In the toy problem below, I am trying to estimate P(Y|X) = \int_{H} P(Y,H|X) dH, where X and Y are univariate. I am wondering if this specification of model and inference in Edward are correct.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import edward as ed
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from edward.models import Empirical, Normal, InverseGamma, Uniform, TransformedDistribution
from edward.models import MultivariateNormalDiag
ds = tf.contrib.distributions
sess = ed.get_session()
ed.set_seed(42)
def build_toy_dataset():
D = 1
N = 100
x = np.random.randn(N, D)
x_orig = np.zeros((N, D))
b = np.array([2.0],dtype=np.float64)
d = np.array([np.sqrt(1)],dtype=np.float64)
x_data_gen2 = Normal(loc= b, scale=d)
x_orig[:,0] =sess.run(x_data_gen2.sample(N))[:,0]
x[:,0] = np.square(x_orig[:,0])
wTrue = np.array([3.0])
bTrue = 2.0
varTrue = 1.0
y_data_gen = Normal(loc=np.dot(x, wTrue) + np.ones(N)*bTrue, scale=np.ones([N])*np.sqrt(varTrue))
y = sess.run(y_data_gen.sample(1))
return x_orig, y
N = 100 # number of data points
D = 1 # number of features
Dth1 = 10
Dth2 = 10
# DATA
w_true = np.ones(D) * 1.0
X_train, y_train = build_toy_dataset()
y_train = y_train.T
#==============================================================================
# Model
#==============================================================================
X = tf.placeholder(tf.float64, [N, D])
ig1 = tf.constant(0.01,dtype=tf.float64)
#### LAYER 1
W1 = tf.ones([D,Dth1],dtype=tf.float64,name="W1") # D x Dth1
B1 = tf.ones([1,Dth1],dtype=tf.float64,name="B1") # 1 X Dth1
H1_loc = tf.nn.softplus(tf.matmul(X,W1) + B1) # N X Dth1
H1_scale_diag =tf.nn.softplus(tf.nn.softplus(tf.matmul(X,W1) + B1))# N diagonals of shape [Dth1 X 1]
H1 = MultivariateNormalDiag(loc=H1_loc,scale_diag=H1_scale_diag,name="H1") # N X Dth1
#### LAYER 2
W2 = tf.ones([Dth1,Dth2],dtype=tf.float64,name="W2") # Dth1 x Dth2
B2 = tf.ones([1,Dth2],dtype=tf.float64,name="B2") # 1 X Dth2
H2_loc = tf.nn.softplus(tf.matmul(H1,W2) + B2) # N X Dth2
H2_scale_diag = tf.nn.softplus(tf.nn.softplus(tf.matmul(H1,W2) + B2)) # N diagonals of shape [Dth2 X 1]
H2 = MultivariateNormalDiag(loc=H2_loc,scale_diag=H2_scale_diag,name="H2") # N X Dth2
#### OUTPUT
W0 = Normal(loc=tf.ones([Dth2,1],dtype=tf.float64), scale=tf.ones([Dth2,1],dtype=tf.float64),name="W0")#prior on w
B0 = Normal(loc=tf.ones(1,dtype=tf.float64), scale=tf.ones(1,dtype=tf.float64),name="B0") # prior on b
y_loc = tf.matmul(H2,W0)+ tf.ones([N,1],dtype=tf.float64)*B0
y_scale_diag = tf.nn.softplus(y_loc)
y = Normal(loc=y_loc, scale=y_scale_diag,name='y')
#==============================================================================
# Inference
#==============================================================================
qW1 = tf.Variable(tf.truncated_normal([D,Dth1],dtype=tf.float64),name='qW1')
qB1 = tf.Variable(tf.truncated_normal([1,Dth1],dtype=tf.float64),name='qB1')
qW2 = tf.Variable(tf.truncated_normal([Dth1,Dth2],dtype=tf.float64),name='qW2')
qB2 = tf.Variable(tf.truncated_normal([1,Dth2],dtype=tf.float64),name='qB2')
qH1 = MultivariateNormalDiag(loc=tf.nn.softplus(tf.matmul(X,qW1) + qB1),
scale_diag=tf.nn.softplus(tf.nn.softplus(tf.matmul(X,qW1) + qB1)),name="qH1") # N X Dth1
qH2 = MultivariateNormalDiag(loc=tf.nn.softplus(tf.matmul(qH1,qW2) + qB2),
scale_diag=tf.nn.softplus(tf.nn.softplus(tf.matmul(qH1,qW2) + qB2)),name="qH2") # N X Dth1
qW0 = Normal(loc=tf.Variable(tf.random_uniform([Dth2,1],dtype=tf.float64)),
scale=tf.nn.softplus(tf.Variable(tf.truncated_normal([Dth2,1],dtype=tf.float64))),name="qW0") #posterior on w1
qB0 = Normal(loc=tf.Variable(tf.random_uniform([1],dtype=tf.float64)),
scale=tf.nn.softplus(tf.Variable(tf.truncated_normal([1],dtype=tf.float64))),name="qB0") #posterior on b1
inference = ed.KLpq({H1:qH1,H2:qH2,W0:qW0,B0:qB0}, data={X:X_train,y: y_train})
inference.initialize(n_iter=5000, n_print=100, n_samples=100,logdir='log')
tf.global_variables_initializer().run()
for _ in range(inference.n_iter):
# Update and print progress of algorithm.
info_dict = inference.update()
inference.print_progress(info_dict)