HMC and KLpq fail on edward while pymc3 works fine

I am trying to create a simple model on uplift for customer- and purchase counts in A,B -test.
My goal is approximate the posterior for uplifts given the test result by MCMC. More details in the code.

The same logic and almost same code works fine in pymc3 and I get reasonable posterior samples from the MC. However, in Edward my Acceptance Rate: 0.000 and hence can’t get the posterior estimates.

I also tried to use KLpq inference in Edward but it gives me very wrong posterior and I’m not sure it is converging.

I have tried to play with the hyper-paramaters like learning-rates or step-sizes but can’t get it working.

Full runnable codes for pymc3 and edward below.

I would also be curious how to write this with edward2.

Any help would be appreciated. Thanks!

The failing edward-cde:

import edward as ed
from edward.models import Normal, Empirical
import tensorflow as tf
import numpy as np

sess = ed.get_session()

customers_a = 2e5

purchases_per_customer_mu = 1.5
purchases_per_customer_var = 1.0

obs_purchases_a = int(purchases_per_customer_mu * customers_a)
obs_customers_b = int(customers_a) * 1.01
obs_purchases_b = obs_customers_b * purchases_per_customer_mu * 1.01

variables = dict(
    latent_uplift=Normal(loc=0.0, scale=0.003),
    customer_uplift=Normal(loc=0.0, scale=0.003),
    purchase_uplift=Normal(loc=0.0, scale=0.003)
)
var_names = list(variables)

purchases_a = Normal(
    loc=customers_a * purchases_per_customer_mu,
    scale=(customers_a * purchases_per_customer_var) ** 0.5
)
customers_b = Normal(
    loc=customers_a
        * (1.0 + variables["latent_uplift"] + variables["customer_uplift"]),
    scale=(2 * customers_a) ** 0.5
)
purchases_b = Normal(
    loc=customers_b * purchases_per_customer_mu
        * (1.0 + variables["latent_uplift"] + variables["purchase_uplift"]),
    scale=(customers_b * purchases_per_customer_var) ** 0.5
)

data = {
    purchases_a: obs_purchases_a,
    customers_b: obs_customers_b,
    purchases_b: obs_purchases_b
}

##### HMC
N = 10000

with tf.variable_scope("traces", initializer=tf.zeros_initializer):
    latent_vars_empirical = {
        var: Empirical(params=tf.get_variable(name, (N,)))
        for name, var in variables.items()
    }

inference = ed.inferences.HMC(
    latent_vars=latent_vars_empirical,
    data=data
)
inference.run()

with tf.variable_scope("traces", reuse=True):
    for vname in variables:
        empirical = tf.get_variable(vname).eval()
        print(vname, empirical.mean(), empirical.std())

##### KL
with tf.variable_scope("q", initializer=tf.zeros_initializer):
    latent_vars_q = {
        var: Normal(
            loc=tf.get_variable("%s_loc" % name, shape=()),
            scale=tf.nn.softplus(tf.get_variable("%s_scale" % name, shape=()))
        )
        for name, var in variables.items()
    }

inference = ed.KLqp(
    latent_vars=latent_vars_q,
    data=data
)
inference.run(
    n_iter=10000,
    n_samples=5,
    optimizer=tf.train.AdamOptimizer(learning_rate=4e-4)
)

with tf.variable_scope("q", reuse=True):
    for vname in variables:
        qq_loc = tf.get_variable(vname + "_loc").eval()
        qq_scale = tf.nn.softplus(tf.get_variable(vname + "_scale")).eval()
        print(vname, qq_loc, qq_scale)

The working pymc3 code:

import pymc3 as pm
import seaborn as sns

customers_a = 2e5

purchases_per_customer_mu = 1.5
purchases_per_customer_var = 1.0

obs_purchases_a = int(purchases_per_customer_mu * customers_a)
obs_customers_b = int(customers_a) * 1.01
obs_purchases_b = obs_customers_b * purchases_per_customer_mu * 1.01

with pm.Model() as model:
    latent_uplift = pm.Normal("lu", mu=0.0, sd=0.003)
    customer_uplift = pm.Normal("cu", mu=0.0, sd=0.003)
    purchase_uplift = pm.Normal("pu", mu=0.0, sd=0.003)

    purchases_a = pm.Normal(
        "obs_pa",
        mu=customers_a * purchases_per_customer_mu,
        sd=(customers_a * purchases_per_customer_var) ** 0.5,
        observed=obs_purchases_a
    )
    customers_b = pm.Normal(
        "obs_cb",
        mu=customers_a * (1.0 + latent_uplift + customer_uplift),
        sd=(2 * customers_a) ** 0.5,
        observed=obs_customers_b
    )
    purchases_b = pm.Normal(
        "obs_pb",
        mu=customers_b * purchases_per_customer_mu * (1.0 + latent_uplift + purchase_uplift),
        sd=(customers_b * purchases_per_customer_var) ** 0.5,
        observed=obs_purchases_b
    )

with model:
    step = pm.HamiltonianMC()
    trace = pm.sample(10000, tune=10000, step=step)

print(
    trace['lu'].mean(),
    trace['cu'].mean(),
    trace['pu'].mean()
)

print(
    trace['lu'].std(),
    trace['cu'].std(),
    trace['pu'].std()
)

print("posterior total uplift: ",
    ((1 + trace["lu"] + trace["cu"])
     * (1 + trace["lu"] + trace["pu"])).mean())

sns.distplot(trace['lu'])
sns.distplot(trace['cu'])
sns.distplot(trace['pu'])