I am trying to create a simple model on uplift for customer- and purchase counts in A,B -test.
My goal is approximate the posterior for uplifts given the test result by MCMC. More details in the code.
The same logic and almost same code works fine in pymc3 and I get reasonable posterior samples from the MC. However, in Edward my Acceptance Rate: 0.000
and hence can’t get the posterior estimates.
I also tried to use KLpq inference in Edward but it gives me very wrong posterior and I’m not sure it is converging.
I have tried to play with the hyper-paramaters like learning-rates or step-sizes but can’t get it working.
Full runnable codes for pymc3 and edward below.
I would also be curious how to write this with edward2.
Any help would be appreciated. Thanks!
The failing edward-cde:
import edward as ed
from edward.models import Normal, Empirical
import tensorflow as tf
import numpy as np
sess = ed.get_session()
customers_a = 2e5
purchases_per_customer_mu = 1.5
purchases_per_customer_var = 1.0
obs_purchases_a = int(purchases_per_customer_mu * customers_a)
obs_customers_b = int(customers_a) * 1.01
obs_purchases_b = obs_customers_b * purchases_per_customer_mu * 1.01
variables = dict(
latent_uplift=Normal(loc=0.0, scale=0.003),
customer_uplift=Normal(loc=0.0, scale=0.003),
purchase_uplift=Normal(loc=0.0, scale=0.003)
)
var_names = list(variables)
purchases_a = Normal(
loc=customers_a * purchases_per_customer_mu,
scale=(customers_a * purchases_per_customer_var) ** 0.5
)
customers_b = Normal(
loc=customers_a
* (1.0 + variables["latent_uplift"] + variables["customer_uplift"]),
scale=(2 * customers_a) ** 0.5
)
purchases_b = Normal(
loc=customers_b * purchases_per_customer_mu
* (1.0 + variables["latent_uplift"] + variables["purchase_uplift"]),
scale=(customers_b * purchases_per_customer_var) ** 0.5
)
data = {
purchases_a: obs_purchases_a,
customers_b: obs_customers_b,
purchases_b: obs_purchases_b
}
##### HMC
N = 10000
with tf.variable_scope("traces", initializer=tf.zeros_initializer):
latent_vars_empirical = {
var: Empirical(params=tf.get_variable(name, (N,)))
for name, var in variables.items()
}
inference = ed.inferences.HMC(
latent_vars=latent_vars_empirical,
data=data
)
inference.run()
with tf.variable_scope("traces", reuse=True):
for vname in variables:
empirical = tf.get_variable(vname).eval()
print(vname, empirical.mean(), empirical.std())
##### KL
with tf.variable_scope("q", initializer=tf.zeros_initializer):
latent_vars_q = {
var: Normal(
loc=tf.get_variable("%s_loc" % name, shape=()),
scale=tf.nn.softplus(tf.get_variable("%s_scale" % name, shape=()))
)
for name, var in variables.items()
}
inference = ed.KLqp(
latent_vars=latent_vars_q,
data=data
)
inference.run(
n_iter=10000,
n_samples=5,
optimizer=tf.train.AdamOptimizer(learning_rate=4e-4)
)
with tf.variable_scope("q", reuse=True):
for vname in variables:
qq_loc = tf.get_variable(vname + "_loc").eval()
qq_scale = tf.nn.softplus(tf.get_variable(vname + "_scale")).eval()
print(vname, qq_loc, qq_scale)
The working pymc3 code:
import pymc3 as pm
import seaborn as sns
customers_a = 2e5
purchases_per_customer_mu = 1.5
purchases_per_customer_var = 1.0
obs_purchases_a = int(purchases_per_customer_mu * customers_a)
obs_customers_b = int(customers_a) * 1.01
obs_purchases_b = obs_customers_b * purchases_per_customer_mu * 1.01
with pm.Model() as model:
latent_uplift = pm.Normal("lu", mu=0.0, sd=0.003)
customer_uplift = pm.Normal("cu", mu=0.0, sd=0.003)
purchase_uplift = pm.Normal("pu", mu=0.0, sd=0.003)
purchases_a = pm.Normal(
"obs_pa",
mu=customers_a * purchases_per_customer_mu,
sd=(customers_a * purchases_per_customer_var) ** 0.5,
observed=obs_purchases_a
)
customers_b = pm.Normal(
"obs_cb",
mu=customers_a * (1.0 + latent_uplift + customer_uplift),
sd=(2 * customers_a) ** 0.5,
observed=obs_customers_b
)
purchases_b = pm.Normal(
"obs_pb",
mu=customers_b * purchases_per_customer_mu * (1.0 + latent_uplift + purchase_uplift),
sd=(customers_b * purchases_per_customer_var) ** 0.5,
observed=obs_purchases_b
)
with model:
step = pm.HamiltonianMC()
trace = pm.sample(10000, tune=10000, step=step)
print(
trace['lu'].mean(),
trace['cu'].mean(),
trace['pu'].mean()
)
print(
trace['lu'].std(),
trace['cu'].std(),
trace['pu'].std()
)
print("posterior total uplift: ",
((1 + trace["lu"] + trace["cu"])
* (1 + trace["lu"] + trace["pu"])).mean())
sns.distplot(trace['lu'])
sns.distplot(trace['cu'])
sns.distplot(trace['pu'])