Significant difference in PPC between Edward and PyMC3?


#1

Hi:
I posted a question on Cross Validated https://stats.stackexchange.com/questions/321916/posterior-predictive-check-ppc-for-a-bayesian-linear-regression-model-edward, but feel that I can find more experts here.

I’m trying to build a simple Bayesian regression model to test Edward. However, I notice significant different between Edward’s PPC results and PyMC3’s.

Common code to generate a data set.

import pandas as pd
import numpy as np

def generateData(intercept=1, slope=3, noise=1, n_points=20):
    df = pd.DataFrame({'x': np.random.uniform(-3, 3, size=n_points)})
    df['y'] = intercept + slope * df['x'] + np.random.np.random.normal(0, scale=noise, size=n_points)
    df = df.sort_values(['x'], ascending=True).reset_index(drop=True)
    return df

intercept = 1
slope = 3
noise = 1
np.random.seed(99)
data = generateData(intercept=intercept, slope=slope, noise=noise)

The training data looks like the following:

After a Bayesian regression model is built via PyMC3 and Edward, respectively, I plot the PPC distributions, as shown below:

It seems a bit odd to me that Edward’s PPC distribution is so skewed and much wider than PyMC3’s result?

Appendix

PyMC3 code for a Bayesian linear regression

import pymc3 as pm

xvals = data['x'].values
yvals = data['y'].values
with pm.Model() as model:

    BURN_IN_STEPS = 2000
    MCMC_STEPS = 4000

    # Specify priors
    sigma = pm.HalfCauchy('sigma', beta=10, testval=1.0)    
    beta0 = pm.Normal('beta0', 0.0, sd=10.0)
    beta1 = pm.Normal('beta1', 0.0, sd=10.0)

    # Specify likelihood
    likelihood = pm.Normal('y', mu=beta0 + beta1 * xvals, sd=sigma,
                           observed=yvals)

    # MCMC 
    start = pm.find_MAP()
    trace = pm.sample(BURN_IN_STEPS+MCMC_STEPS, start=start, step=pm.NUTS())
    trace = trace[BURN_IN_STEPS:]

# Get PPC
posterior_predictive_checks = pm.sample_ppc(trace, model=model, samples=1000, progressbar=False)
y_replicas = [y_rep.mean() for y_rep in posterior_predictive_checks['y']]

Plot PyMC3 PPC

fig, ax = plt.subplots(1, figsize=(11, 6.5))
ax.hist(y_replicas, bins=20, alpha=0.5, color="#348ABD", histtype="stepfilled", label="replica")
ax.axvline(yvals.mean(), color="#A60628", label="data")
ax.legend(loc=2, fontsize=FONTSIZE)
ax.set_xlabel("mean(y)", fontsize=20, labelpad=15)
_ = ax.set_title("PPC in PyMC3", fontsize=FONTSIZE)  

Edward code for a linear regression

import edward as ed
import tensorflow as tf

N = 20
D = 1
X = tf.placeholder(tf.float32, [N, D])
sigma = ed.models.Chi2(df=3.0)
w = ed.models.Normal(loc=tf.zeros(D), scale=tf.ones(D)*10.0)
b = ed.models.Normal(loc=tf.zeros(1), scale=tf.ones(1)*10.0)
y = ed.models.Normal(loc=ed.dot(X, w)+b, scale=tf.ones(N)*sigma)

T = 10000

qs = ed.models.Empirical(params=tf.Variable(tf.ones(T)))
qw = ed.models.Empirical(params=tf.Variable(tf.random_normal((T, D) )))
qb = ed.models.Empirical(params=tf.Variable(tf.random_normal((T, 1) )))

inference = ed.HMC({sigma: qs, w: qw, b: qb}, 
                   data={X: data[['x']].values, y: data['y'].values})
inference.run(step_size=3e-3)

# Get PPC
y_post = ed.copy(y, {w: qw, b: qb, sigma: qs})
T = lambda ys, _: tf.reduce_mean(y_post)
ppc_stats = ed.ppc(T, 
                   data={X: data[['x']].values, y: data['y'].values},
                   latent_vars={w: qw, b: qb, sigma: qs},
                   n_samples=1000)

Plot Edward PPC:

fig, ax = plt.subplots(1, figsize=(11, 6.5))
ax.hist(ppc_stats[0], bins=20, alpha=0.5, color="#348ABD", histtype="stepfilled", label="replica")
ax.axvline(data['y'].values.mean(), color="#A60628", label="data")
ax.legend(loc=2, fontsize=FONTSIZE)
ax.set_xlabel("mean(y)", fontsize=FONTSIZE, labelpad=15)
_ = ax.set_title("PPC in Edward", fontsize=FONTSIZE)