Different result betwen 2 ways of running of inference(inference.run() and inference.initialize....)

I am beginner in tensorflow(because I need to learn edward) and edward, so I am sorry if the question is basic. I am doing matrix factorization based on the example github.

from edward.models import Normal, Exponential, Gamma
from numpy.random import normal, exponential
import numpy as np
import tensorflow as tf
import edward as ed

def build_toy_dataset(U, V, N, M, noise_std=0.1):
    R = np.dot(np.transpose(U), V) + normal(0, noise_std, size=(N, M))
    return R

N = 50  # number of users
M = 60  # number of movies
D = 3  # number of latent factors

# true latent factors
U_true = normal(size=(D, N))
V_true = normal(size=(D, M))

# DATA
R_true = build_toy_dataset(U_true, V_true, N, M)

with tf.Session() as sess:
    # MODEL
    U = Normal(loc=tf.zeros([D, N]), scale=tf.ones([D, N]))
    V = Normal(loc=tf.zeros([D, M]), scale=tf.ones([D, M]))
    R = Normal(loc=tf.matmul(tf.transpose(U), V), scale=tf.ones([N, M]))
    # INFERENCE
    qU = Normal(loc=tf.Variable(tf.random_normal([D, N])),
             scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, N]))))
    qV = Normal(loc=tf.Variable(tf.random_normal([D, M])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, M]))))

    optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    inference = ed.KLqp({U: qU, V: qV}, data={R: R_true})
    inference.initialize(optimizer=optimizer)

    inference.run(n_iter=1000)

The loss is 3611 and the result is good. But when I want to do fine control of the inference procedure like this based on example in the website:

n_iter = 1000
with tf.Session() as sess:
    # MODEL
    U = Normal(loc=tf.zeros([D, N]), scale=tf.ones([D, N]))
    V = Normal(loc=tf.zeros([D, M]), scale=tf.ones([D, M]))
    R = Normal(loc=tf.matmul(tf.transpose(U), V), scale=tf.ones([N, M]))
    # INFERENCE
    qU = Normal(loc=tf.Variable(tf.random_normal([D, N])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, N]))))
    qV = Normal(loc=tf.Variable(tf.random_normal([D, M])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, M]))))

    optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    inference = ed.KLqp({U: qU, V: qV}, data={R: R_true})
    inference.initialize(optimizer=optimizer,n_iter=n_iter)
    loss = np.empty(n_iter, dtype=np.float32)
    tf.global_variables_initializer().run()
    for t in range(inference.n_iter):
        info_dict = inference.update()
        inference.print_progress(info_dict)
        loss[t] = info_dict["loss"]
    inference.finalize()

The loss become 13703 and the result is worse. Do anyone know why?

When you call inference.run it calls inference.initialize again, and passes keyword arguments like optimizer.

If you don’t pass optimizer=optimizer, it uses AdamOptimizer by default.

Now I understand, I reinitialize my inference in my first example so it became Adam optimizer. Apparently the optimizer in first example(Adam Optimizer) is better than Adagrad in the second example. Thank you so much.