Suppose that I initially train a Bayesian model with 5 training samples. As time passes by, I get more samples, and want to update the model with 2 samples a time for 20 times. Below is my code, I expect to see that the below result shoud be closer and closer to the true value as more samples are added and finally should be the same as I train the model with 45 samples together. But that’s not the case, the euclidean distance between the true and estimated values ocillate after the first 5 training samples. And the final results are worse than training 45 samples together. Do I miss anything?
from edward.models import Normal,Empirical
import tensorflow as tf
import edward as ed
import numpy as np
class BayesianLR(object):
def __init__(self,log_dir=None):
self.dim_x=None
self.ncases_train=None
self.ncases_post=None
self.log_dir=log_dir;#'E:/PythonCode/BayesianLinearStrategy/LOG'
def BuildModel(self,n_case,dim_x):
self.n_case=n_case
self.dim_x=dim_x
with tf.name_scope("model"):
self.x = tf.placeholder(tf.float32, [self.n_case, self.dim_x])
self.w = Normal(loc=tf.zeros(self.dim_x), scale=tf.ones(self.dim_x))
self.b = Normal(loc=tf.zeros(1), scale=tf.ones(1))
self.y = Normal(loc=ed.dot(self.x, self.w) + self.b, scale=tf.ones(self.n_case))
def InferenceByKLqp(self,x_train,y_train,n_iter,samples4grad=5):
with tf.name_scope("posterior"):
self.qw = Normal(loc=tf.Variable(tf.random_normal([self.dim_x])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([self.dim_x]))))
self.qb = Normal(loc=tf.Variable(tf.random_normal([1])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))
self.inference = ed.KLqp({self.w: self.qw, self.b: self.qb}, data={self.x: x_train, self.y: y_train})
self.samples4grad=samples4grad
self.inference.run(n_samples=samples4grad, n_iter=n_iter,logdir=self.log_dir)
def InferenceUpdate(self,x_train,y_train,n_iter=10):
n_case=x_train.shape[0]
dim_x=x_train.shape[1]
if self.n_case!=n_case:
self.BuildModel(n_case,dim_x)
for ic in range(n_iter):
self.inference.update(feed_dict={self.x: x_train, self.y: y_train})
def GetCoeffs(self):
w_mean=self.qw.mean().eval()
w_std=self.qw.stddev().eval()
b_mean=self.qb.mean().eval()
b_std=self.qb.stddev().eval()
return w_mean,w_std,b_mean,b_std
def build_toy_dataset(N, w,b, noise_std=0.1):
D = len(w)
x = np.random.randn(N, D)
noise_stdb=np.random.randn(1)
y = np.dot(x, w) + b+np.random.normal(0, noise_std, size=N)
return x, y
with tf.device('/cpu:0'):
ed.set_seed(12)
N = 5 # number of data points
D = 3 # number of features
n_iter=50
b_true=np.random.randn(1)
w_true = np.random.randn(D)
X_train, y_train = build_toy_dataset(N, w_true,b_true)
X_test, y_test = build_toy_dataset(N, w_true,b_true)
myBLR=BayesianLR(log_dir)
myBLR.BuildModel(N,D)
myBLR.InferenceByKLqp(X_train, y_train,n_iter=n_iter)
#myBLR.InferenceBySGHMC(X_train, y_train,n_samples=5000)
w_mean,w_std,b_mean,b_std=myBLR.GetCoeffs()
print('mean evaluated ',np.hstack((w_mean,b_mean)))
dist=np.linalg.norm(np.hstack((w_true,b_true))-np.hstack((w_mean,b_mean)))
print('First update dist= ',dist)
for ic in range(20):
X_train2, y_train2 = build_toy_dataset(2, w_true,b_true)
#myBLR.InferenceUpdate(X_train2, y_train2,n_iter=n_iter) #for p(z | x_2)
#updated to
X_train=np.vstack((X_train,X_train2));y_train=np.append(y_train,y_train2)
myBLR.InferenceUpdate(X_train, y_train,n_iter=n_iter) #for p(z | x_1, x_2)
w_mean,w_std,b_mean,b_std=myBLR.GetCoeffs()
dist=np.linalg.norm(np.hstack((w_true,b_true))-np.hstack((w_mean,b_mean)))
print('First update dist= ',dist)
print('mean evaluated ',np.hstack((w_mean,b_mean)))
print('true ',np.hstack((w_true,b_true)))
myBLR2=BayesianLR(log_dir)
myBLR2.BuildModel(X_train.shape[0],D)
myBLR2.InferenceByKLqp(X_train, y_train,n_iter=n_iter)
w_mean,w_std,b_mean,b_std=myBLR2.GetCoeffs()
print('mean evaluated2 ',np.hstack((w_mean,b_mean)))
dist=np.linalg.norm(np.hstack((w_true,b_true))-np.hstack((w_mean,b_mean)))
print('Batch update dist2= ',dist)