Hi:
I posted a question on Cross Validated https://stats.stackexchange.com/questions/321916/posterior-predictive-check-ppc-for-a-bayesian-linear-regression-model-edward, but feel that I can find more experts here.
I’m trying to build a simple Bayesian regression model to test Edward. However, I notice significant different between Edward’s PPC results and PyMC3’s.
Common code to generate a data set.
import pandas as pd
import numpy as np
def generateData(intercept=1, slope=3, noise=1, n_points=20):
df = pd.DataFrame({'x': np.random.uniform(-3, 3, size=n_points)})
df['y'] = intercept + slope * df['x'] + np.random.np.random.normal(0, scale=noise, size=n_points)
df = df.sort_values(['x'], ascending=True).reset_index(drop=True)
return df
intercept = 1
slope = 3
noise = 1
np.random.seed(99)
data = generateData(intercept=intercept, slope=slope, noise=noise)
The training data looks like the following:
After a Bayesian regression model is built via PyMC3 and Edward, respectively, I plot the PPC distributions, as shown below:
It seems a bit odd to me that Edward’s PPC distribution is so skewed and much wider than PyMC3’s result?
Appendix
PyMC3 code for a Bayesian linear regression
import pymc3 as pm
xvals = data['x'].values
yvals = data['y'].values
with pm.Model() as model:
BURN_IN_STEPS = 2000
MCMC_STEPS = 4000
# Specify priors
sigma = pm.HalfCauchy('sigma', beta=10, testval=1.0)
beta0 = pm.Normal('beta0', 0.0, sd=10.0)
beta1 = pm.Normal('beta1', 0.0, sd=10.0)
# Specify likelihood
likelihood = pm.Normal('y', mu=beta0 + beta1 * xvals, sd=sigma,
observed=yvals)
# MCMC
start = pm.find_MAP()
trace = pm.sample(BURN_IN_STEPS+MCMC_STEPS, start=start, step=pm.NUTS())
trace = trace[BURN_IN_STEPS:]
# Get PPC
posterior_predictive_checks = pm.sample_ppc(trace, model=model, samples=1000, progressbar=False)
y_replicas = [y_rep.mean() for y_rep in posterior_predictive_checks['y']]
Plot PyMC3 PPC
fig, ax = plt.subplots(1, figsize=(11, 6.5))
ax.hist(y_replicas, bins=20, alpha=0.5, color="#348ABD", histtype="stepfilled", label="replica")
ax.axvline(yvals.mean(), color="#A60628", label="data")
ax.legend(loc=2, fontsize=FONTSIZE)
ax.set_xlabel("mean(y)", fontsize=20, labelpad=15)
_ = ax.set_title("PPC in PyMC3", fontsize=FONTSIZE)
Edward code for a linear regression
import edward as ed
import tensorflow as tf
N = 20
D = 1
X = tf.placeholder(tf.float32, [N, D])
sigma = ed.models.Chi2(df=3.0)
w = ed.models.Normal(loc=tf.zeros(D), scale=tf.ones(D)*10.0)
b = ed.models.Normal(loc=tf.zeros(1), scale=tf.ones(1)*10.0)
y = ed.models.Normal(loc=ed.dot(X, w)+b, scale=tf.ones(N)*sigma)
T = 10000
qs = ed.models.Empirical(params=tf.Variable(tf.ones(T)))
qw = ed.models.Empirical(params=tf.Variable(tf.random_normal((T, D) )))
qb = ed.models.Empirical(params=tf.Variable(tf.random_normal((T, 1) )))
inference = ed.HMC({sigma: qs, w: qw, b: qb},
data={X: data[['x']].values, y: data['y'].values})
inference.run(step_size=3e-3)
# Get PPC
y_post = ed.copy(y, {w: qw, b: qb, sigma: qs})
T = lambda ys, _: tf.reduce_mean(y_post)
ppc_stats = ed.ppc(T,
data={X: data[['x']].values, y: data['y'].values},
latent_vars={w: qw, b: qb, sigma: qs},
n_samples=1000)
Plot Edward PPC:
fig, ax = plt.subplots(1, figsize=(11, 6.5))
ax.hist(ppc_stats[0], bins=20, alpha=0.5, color="#348ABD", histtype="stepfilled", label="replica")
ax.axvline(data['y'].values.mean(), color="#A60628", label="data")
ax.legend(loc=2, fontsize=FONTSIZE)
ax.set_xlabel("mean(y)", fontsize=FONTSIZE, labelpad=15)
_ = ax.set_title("PPC in Edward", fontsize=FONTSIZE)