I made the model for NMF:
Likelihood:
X ~ Normal(W*H, sigma=1)
Prior:
W ~ Exponential(lambda_w=1)
H ~ Exponential(lambda_h=1)
The code in edward like this:
import numpy as np
import matplotlib.pyplot as plt
import edward as ed
import tensorflow as tf
from edward.models import Gamma, Poisson, Normal, PointMass, TransformedDistribution, Exponential
K=5
sample = 100
feature = 12
def dataset_generation(sample=100,feature=10,K=5):
w = np.repeat(1, (0.75*sample)//K)
h = np.repeat(1, feature//K)
W = np.zeros((sample, K))
H = np.zeros((K ,feature))
for i in range(K):
W[i*len(w):(i+1)*len(w),i]=w
H[i,i*len(h):(i+1)*len(h)]=h
X = W @ H
XX = X * np.random.normal(6, 1, size= X.shape)
XXX = XX.copy()
# changed every point in datasets which has value 0
XXX[np.where(X==0)] = np.random.normal( 2,1, size= len(XXX[np.where(X==0)]))
#change negative value to zero
XXX[np.where(XXX<=0)] = np.random.uniform( 0.0001,0.1, size= len(XXX[np.where(XXX<=0)]))
return XXX
X = dataset_generation(sample=sample,feature=feature,K=K)
plt.pcolormesh(X)
plt.show
def lognormal_q(loc,scale):
min_scale = 1e-5
rv = TransformedDistribution(
distribution=Normal(loc, tf.maximum(tf.nn.softplus(scale), min_scale)),
bijector=tf.contrib.distributions.bijectors.Exp())
return rv
X = X.astype(np.float32)
W = Exponential(rate=tf.ones([sample, K]))
H = Exponential(rate=tf.ones([K, feature])*0.1)
V = Normal(
loc=tf.matmul(W, H),
scale=tf.ones([sample, feature]))
qW_loc = tf.Variable(tf.random_normal([sample, K]))
qW_scale = tf.Variable(tf.random_normal([sample, K],stddev=0.1))
qH_loc = tf.Variable(tf.random_normal([K, feature]))
qH_scale = tf.Variable(tf.random_normal([K, feature],stddev=0.1))
qW = lognormal_q(loc=qW_loc,scale=qW_scale)
qH = lognormal_q(loc=qH_loc,scale=qH_scale)
inference = ed.KLqp({W: qW, H: qH}, data={V: X})
optimizer = tf.train.AdamOptimizer()
inference.initialize(n_iter = 30000,optimizer=optimizer)
tf.global_variables_initializer().run()
loss = np.empty(inference.n_iter, dtype=np.float32)
for t in range(inference.n_iter):
info_dict = inference.update()
inference.print_progress(info_dict)
loss[t] = info_dict["loss"]
inference.finalize()
plt.plot(range(inference.n_iter)[5000:],loss[5000:])
plt.show()
w = qW.sample(1000).eval()
h = qH.sample(1000).eval()
w= np.mean(w,axis=0)
h= np.mean(h,axis=0)
fig,axes = plt.subplots(1,3,figsize=(12,3))
axes[0].pcolormesh(np.asarray(w),cmap="copper")
axes[1].pcolormesh(np.asarray(h),cmap="copper")
axes[2].pcolormesh(np.asarray(w@h),cmap="copper")
plt.show()
It is working pretty well but the data that I have is massive enough for my computer. So I perform data subsampling based on the tutorial in edward site :
batch_sample = 50
def generator(arrays, batch_size):
"""Generate batches, one with respect to each array's first axis."""
starts = [0] * len(arrays) # pointers to where we are in iteration
while True:
batches = []
for i, array in enumerate(arrays):
start = starts[i]
stop = start + batch_size
diff = stop - array.shape[0]
if diff <= 0:
batch = array[start:stop]
starts[i] += batch_size
else:
batch = np.concatenate((array[start:], array[:diff]))
starts[i] = diff
batches.append(batch)
yield batches
batch_sample = 50
W = Exponential(rate=tf.ones([batch_sample, K]))
H = Exponential(rate=tf.ones([K, feature]) * 0.1)
X = X.astype(np.float32)
X_ph = tf.placeholder(tf.float32, [batch_sample, feature])
qW_loc = tf.Variable(tf.random_normal([batch_sample, K]))
qW_scale = tf.Variable(tf.random_normal([batch_sample, K],stddev=0.1))
qH_loc = tf.Variable(tf.random_normal([K, feature]))
qH_scale = tf.Variable(tf.random_normal([K, feature],stddev=0.1))
qW = lognormal_q(qW_loc,qW_scale)
qH = lognormal_q(qH_loc,qH_scale)
data = generator([X], batch_sample)
V = Normal(loc=tf.matmul(W, H), scale=tf.ones([batch_sample, feature]))
inference_local = ed.KLqp({W: qW}, data={V: X_ph, H: qH})
inference_global = ed.KLqp({H: qH}, data={V: X_ph, W: qW})
inference_local.initialize(
n_iter=75000,
scale={
V: float(sample) / batch_sample,
W: float(sample) / batch_sample
})
inference_global.initialize(scale={
V: float(sample) / batch_sample,
W: float(sample) / batch_sample
})
tf.global_variables_initializer().run()
qw_init = tf.variables_initializer([qW_loc,qW_scale])
loss = np.empty(inference.n_iter, dtype=np.float32)
for t in range(2000):
x_batch = next(data)[0]
for _ in range(30):
inference_local.update(feed_dict={X_ph:x_batch})
inference_global.update(feed_dict={X_ph:x_batch})
qw_init = tf.variables_initializer([qW_loc,qW_scale])
H_ = np.mean(qH.sample(5000).eval(),axis = 0)
W_ = np.mean(qW.sample(5000).eval(),axis = 0)
plt.pcolormesh(H_)
plt.show()
plt.pcolormesh(W_)
plt.show()
plt.pcolormesh(W_@H_)
plt.show()
The result is not good. Is there something wrong with the subsampling model?