# HMC and KLpq fail on edward while pymc3 works fine

I am trying to create a simple model on uplift for customer- and purchase counts in A,B -test.
My goal is approximate the posterior for uplifts given the test result by MCMC. More details in the code.

The same logic and almost same code works fine in pymc3 and I get reasonable posterior samples from the MC. However, in Edward my Acceptance Rate: 0.000 and hence can’t get the posterior estimates.

I also tried to use KLpq inference in Edward but it gives me very wrong posterior and I’m not sure it is converging.

I have tried to play with the hyper-paramaters like learning-rates or step-sizes but can’t get it working.

Full runnable codes for pymc3 and edward below.

I would also be curious how to write this with edward2.

Any help would be appreciated. Thanks!

The failing edward-cde:

import edward as ed
from edward.models import Normal, Empirical
import tensorflow as tf
import numpy as np

sess = ed.get_session()

customers_a = 2e5

purchases_per_customer_mu = 1.5
purchases_per_customer_var = 1.0

obs_purchases_a = int(purchases_per_customer_mu * customers_a)
obs_customers_b = int(customers_a) * 1.01
obs_purchases_b = obs_customers_b * purchases_per_customer_mu * 1.01

variables = dict(
latent_uplift=Normal(loc=0.0, scale=0.003),
customer_uplift=Normal(loc=0.0, scale=0.003),
purchase_uplift=Normal(loc=0.0, scale=0.003)
)
var_names = list(variables)

purchases_a = Normal(
loc=customers_a * purchases_per_customer_mu,
scale=(customers_a * purchases_per_customer_var) ** 0.5
)
customers_b = Normal(
loc=customers_a
* (1.0 + variables["latent_uplift"] + variables["customer_uplift"]),
scale=(2 * customers_a) ** 0.5
)
purchases_b = Normal(
loc=customers_b * purchases_per_customer_mu
* (1.0 + variables["latent_uplift"] + variables["purchase_uplift"]),
scale=(customers_b * purchases_per_customer_var) ** 0.5
)

data = {
purchases_a: obs_purchases_a,
customers_b: obs_customers_b,
purchases_b: obs_purchases_b
}

##### HMC
N = 10000

with tf.variable_scope("traces", initializer=tf.zeros_initializer):
latent_vars_empirical = {
var: Empirical(params=tf.get_variable(name, (N,)))
for name, var in variables.items()
}

inference = ed.inferences.HMC(
latent_vars=latent_vars_empirical,
data=data
)
inference.run()

with tf.variable_scope("traces", reuse=True):
for vname in variables:
empirical = tf.get_variable(vname).eval()
print(vname, empirical.mean(), empirical.std())

##### KL
with tf.variable_scope("q", initializer=tf.zeros_initializer):
latent_vars_q = {
var: Normal(
loc=tf.get_variable("%s_loc" % name, shape=()),
scale=tf.nn.softplus(tf.get_variable("%s_scale" % name, shape=()))
)
for name, var in variables.items()
}

inference = ed.KLqp(
latent_vars=latent_vars_q,
data=data
)
inference.run(
n_iter=10000,
n_samples=5,
)

with tf.variable_scope("q", reuse=True):
for vname in variables:
qq_loc = tf.get_variable(vname + "_loc").eval()
qq_scale = tf.nn.softplus(tf.get_variable(vname + "_scale")).eval()
print(vname, qq_loc, qq_scale)

The working pymc3 code:

import pymc3 as pm
import seaborn as sns

customers_a = 2e5

purchases_per_customer_mu = 1.5
purchases_per_customer_var = 1.0

obs_purchases_a = int(purchases_per_customer_mu * customers_a)
obs_customers_b = int(customers_a) * 1.01
obs_purchases_b = obs_customers_b * purchases_per_customer_mu * 1.01

with pm.Model() as model:
latent_uplift = pm.Normal("lu", mu=0.0, sd=0.003)
customer_uplift = pm.Normal("cu", mu=0.0, sd=0.003)
purchase_uplift = pm.Normal("pu", mu=0.0, sd=0.003)

purchases_a = pm.Normal(
"obs_pa",
mu=customers_a * purchases_per_customer_mu,
sd=(customers_a * purchases_per_customer_var) ** 0.5,
observed=obs_purchases_a
)
customers_b = pm.Normal(
"obs_cb",
mu=customers_a * (1.0 + latent_uplift + customer_uplift),
sd=(2 * customers_a) ** 0.5,
observed=obs_customers_b
)
purchases_b = pm.Normal(
"obs_pb",
mu=customers_b * purchases_per_customer_mu * (1.0 + latent_uplift + purchase_uplift),
sd=(customers_b * purchases_per_customer_var) ** 0.5,
observed=obs_purchases_b
)

with model:
step = pm.HamiltonianMC()
trace = pm.sample(10000, tune=10000, step=step)

print(
trace['lu'].mean(),
trace['cu'].mean(),
trace['pu'].mean()
)

print(
trace['lu'].std(),
trace['cu'].std(),
trace['pu'].std()
)

print("posterior total uplift: ",
((1 + trace["lu"] + trace["cu"])
* (1 + trace["lu"] + trace["pu"])).mean())

sns.distplot(trace['lu'])
sns.distplot(trace['cu'])
sns.distplot(trace['pu'])