Deep Generative Models in Edward

Sand33p · June 29, 2017, 3:16pm

Hi all,
I am trying to model deep generative models similar to Rezende et al. 2014 (https://arxiv.org/pdf/1401.4082.pdf) using Edward. In the toy problem below, I am trying to estimate P(Y|X) = \int_{H} P(Y,H|X) dH, where X and Y are univariate. I am wondering if this specification of model and inference in Edward are correct.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from edward.models import Empirical, Normal, InverseGamma, Uniform, TransformedDistribution
from edward.models import MultivariateNormalDiag
ds = tf.contrib.distributions
sess = ed.get_session() 
ed.set_seed(42)

def build_toy_dataset():
  D = 1
  N = 100
  x = np.random.randn(N, D)
  x_orig = np.zeros((N, D))
  b = np.array([2.0],dtype=np.float64)
  d = np.array([np.sqrt(1)],dtype=np.float64)
  x_data_gen2 = Normal(loc= b, scale=d)   
  x_orig[:,0] =sess.run(x_data_gen2.sample(N))[:,0]
  x[:,0] = np.square(x_orig[:,0])  
  wTrue = np.array([3.0])
  bTrue = 2.0
  varTrue = 1.0  
  y_data_gen = Normal(loc=np.dot(x, wTrue) + np.ones(N)*bTrue, scale=np.ones([N])*np.sqrt(varTrue))
                                                           
  y = sess.run(y_data_gen.sample(1))
  return x_orig, y
  


N = 100  # number of data points
D = 1  # number of features
Dth1 = 10
Dth2 = 10

# DATA
w_true = np.ones(D) * 1.0
X_train, y_train = build_toy_dataset()   
y_train = y_train.T

#==============================================================================
#   Model          
#==============================================================================

X = tf.placeholder(tf.float64, [N, D])                
ig1 = tf.constant(0.01,dtype=tf.float64)
#### LAYER 1
W1 = tf.ones([D,Dth1],dtype=tf.float64,name="W1") # D x Dth1
B1 = tf.ones([1,Dth1],dtype=tf.float64,name="B1") # 1 X Dth1
H1_loc = tf.nn.softplus(tf.matmul(X,W1) + B1)  # N X Dth1
H1_scale_diag =tf.nn.softplus(tf.nn.softplus(tf.matmul(X,W1) + B1))# N diagonals of shape [Dth1 X 1]
H1 = MultivariateNormalDiag(loc=H1_loc,scale_diag=H1_scale_diag,name="H1") # N X Dth1
#### LAYER 2                                                   
W2 = tf.ones([Dth1,Dth2],dtype=tf.float64,name="W2") # Dth1 x Dth2
B2 = tf.ones([1,Dth2],dtype=tf.float64,name="B2") # 1 X Dth2
H2_loc = tf.nn.softplus(tf.matmul(H1,W2) + B2) # N X Dth2
H2_scale_diag = tf.nn.softplus(tf.nn.softplus(tf.matmul(H1,W2) + B2))  # N diagonals of shape [Dth2 X 1]
H2 = MultivariateNormalDiag(loc=H2_loc,scale_diag=H2_scale_diag,name="H2") # N X Dth2
                                                   
#### OUTPUT                                                   
W0 = Normal(loc=tf.ones([Dth2,1],dtype=tf.float64), scale=tf.ones([Dth2,1],dtype=tf.float64),name="W0")#prior on w
B0 = Normal(loc=tf.ones(1,dtype=tf.float64), scale=tf.ones(1,dtype=tf.float64),name="B0") # prior on b  
y_loc = tf.matmul(H2,W0)+ tf.ones([N,1],dtype=tf.float64)*B0
y_scale_diag = tf.nn.softplus(y_loc)
y = Normal(loc=y_loc, scale=y_scale_diag,name='y')  

#==============================================================================
# Inference
#==============================================================================
qW1 = tf.Variable(tf.truncated_normal([D,Dth1],dtype=tf.float64),name='qW1')
qB1 = tf.Variable(tf.truncated_normal([1,Dth1],dtype=tf.float64),name='qB1') 
qW2 = tf.Variable(tf.truncated_normal([Dth1,Dth2],dtype=tf.float64),name='qW2')
qB2 = tf.Variable(tf.truncated_normal([1,Dth2],dtype=tf.float64),name='qB2') 

qH1 = MultivariateNormalDiag(loc=tf.nn.softplus(tf.matmul(X,qW1) + qB1),
                          scale_diag=tf.nn.softplus(tf.nn.softplus(tf.matmul(X,qW1) + qB1)),name="qH1") # N X Dth1
                                                   
qH2 = MultivariateNormalDiag(loc=tf.nn.softplus(tf.matmul(qH1,qW2) + qB2),
                          scale_diag=tf.nn.softplus(tf.nn.softplus(tf.matmul(qH1,qW2) + qB2)),name="qH2") # N X Dth1
                                                   
qW0 = Normal(loc=tf.Variable(tf.random_uniform([Dth2,1],dtype=tf.float64)),
            scale=tf.nn.softplus(tf.Variable(tf.truncated_normal([Dth2,1],dtype=tf.float64))),name="qW0") #posterior on w1
qB0 = Normal(loc=tf.Variable(tf.random_uniform([1],dtype=tf.float64)),
            scale=tf.nn.softplus(tf.Variable(tf.truncated_normal([1],dtype=tf.float64))),name="qB0") #posterior on b1

inference = ed.KLpq({H1:qH1,H2:qH2,W0:qW0,B0:qB0}, data={X:X_train,y: y_train})
inference.initialize(n_iter=5000, n_print=100, n_samples=100,logdir='log')
tf.global_variables_initializer().run()


for _ in range(inference.n_iter):
  # Update and print progress of algorithm.  
  info_dict = inference.update()                                                                                       
  inference.print_progress(info_dict)

dustin · July 1, 2017, 11:30pm

It is correct but slow. Some tips:

In your build_toy_dataset(), you’re building Edward random variables and then immediately evaluating them. This can be made more efficient using NumPy random calls or teasing out the random variable building from the function. Otherwise multiple calls will build extraneous nodes in the TensorFlow graph.
You might try using Normal instead of MultivariateNormalDiag—both work when I played with the code.
Setting n_samples=100 makes both the graph building and the computation during training very expensive. After reducing it to just n_samples=5, the code runs very fast.

Topic		Replies	Views
Multinomial on convolutionnal model returns nans (equivalent code with Pyro)	1	939	June 6, 2018
Bayesian regression	4	1196	June 26, 2017
Rookie problem (KLqp gets obviously wrong result)	7	1784	October 17, 2017
LDA with collapsed Gibbs Sampling	0	1576	November 14, 2018
Modify Bayesian Linear Model to use Gibbs, but got NotImplementedError	0	799	August 3, 2018

Deep Generative Models in Edward

Related topics