Here’s the full, reproducible code. I’m banging my head against my desk trying to figure out why the HMC sampler isn’t working. Any help would be greatly appreciated!
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import edward2 as ed
import lifetimes
from lifetimes.datasets import load_dataset
import pandas as pd
import numpy as np
cdnow_transactions = load_dataset(
'CDNOW_sample.txt',
header=None,
delim_whitespace=True,
names=['customer_id', 'customer_index', 'date', 'quantity', 'amount'],
converters={'date': lambda x: pd.to_datetime(x, format="%Y%m%d")}
)
rfm = lifetimes.utils.summary_data_from_transaction_data(
cdnow_transactions,
'customer_id',
'date',
observation_period_end=pd.to_datetime('1997-09-30'),
freq='W'
)
N = rfm.shape[0] # number of customers
ex = tf.convert_to_tensor(rfm['frequency'].values, dtype = tf.float64)
tee_ex = tf.convert_to_tensor(rfm['recency'].values, dtype=tf.float64)
Tee = tf.convert_to_tensor(rfm['T'].values, dtype=tf.float64) # length of training period
sd = tf.constant(5, dtype=tf.float64)
def pnbd_ll(p1x, t_x, t, lamda, mu):
log_lamda = tf.log(lamda)
log_mu = tf.log(mu)
mu_plus_lamda = lamda + mu
log_mu_plus_lamda = tf.log(mu_plus_lamda)
p_1 = p1x * log_lamda + log_mu - log_mu_plus_lamda - t_x * mu_plus_lamda
p_2 = (p1x + 1) * log_lamda - log_mu_plus_lamda - t * mu_plus_lamda
return tf.log(tf.exp(tf.cast(p_1, dtype=tf.float64)) + tf.exp(tf.cast(p_2, dtype=tf.float64)))
def pnbd_log_prob(p1x, t_x, t, r, alpha, s, beta, lamda, mu):
r_rv = ed.HalfNormal(scale=sd, name="r")
alpha_rv = ed.HalfNormal(scale=sd, name="alpha")
s_rv = ed.HalfNormal(scale=sd, name="s")
beta_rv = ed.HalfNormal(scale=sd, name="beta")
lamda_rv = ed.Gamma(concentration=tf.cast(r_rv, tf.float64), rate=tf.cast(alpha_rv, tf.float64), sample_shape=N, name="lamda")
mu_rv = ed.Gamma(concentration=tf.cast(s_rv, tf.float64), rate=tf.cast(beta_rv, tf.float64), sample_shape=N, name="mu")
full_model_log_p = tf.reduce_sum(pnbd_ll(p1x, t_x, t, lamda_rv, mu_rv)) + \
r_rv.distribution.log_prob(r) + \
alpha_rv.distribution.log_prob(alpha) + \
s_rv.distribution.log_prob(s) + \
beta_rv.distribution.log_prob(beta) + \
tf.reduce_sum(lamda_rv.distribution.log_prob(lamda)) + \
tf.reduce_sum(mu_rv.distribution.log_prob(mu))
return full_model_log_p
num_results = 2000
num_burnin_steps = 5000
def unnormalized_posterior_log_prob(r, alpha, s, beta, lamda, mu):
return pnbd_log_prob(ex, tee_ex, Tee, r, alpha, s, beta, lamda, mu)
hmc = tfp.mcmc.HamiltonianMonteCarlo(
target_log_prob_fn = unnormalized_posterior_log_prob,
step_size=.2,
num_leapfrog_steps=1
)
samples, kernel_results = tfp.mcmc.sample_chain(
num_results = num_results,
num_burnin_steps = num_burnin_steps,
current_state = [tf.ones([], dtype=tf.float64), tf.ones([], dtype=tf.float64), tf.ones([], dtype=tf.float64), tf.ones([], dtype=tf.float64), tf.ones([N], dtype=tf.float64), tf.ones([N], dtype=tf.float64)],
kernel=hmc)
with tf.Session() as sess:
samples_, kernel_results_ = sess.run([samples, kernel_results])
print(np.sum(kernel_results_.is_accepted))