Hi
I am trying to construct a RNN/GRU to predict a label for molecular data. Each molecule can be represented as a SMILES (simplified molecular-input line-entry system) string which (as of now) I am attempting to encode it using one-hot encoding which converts each molecule into a 120 x 56 matrix, and each molecule has a continuous label. I am following a GRU implementation from here.
I am wondering whether it is sensible to flatten the input dimension to a 1 by 120x56 vector. However, when I run my code, I get this error RuntimeError: maximum recursion depth exceeded while calling a Python object
.
I am wondering whether this is a TensorFlow memory problem, as I have only just started using TensorFlow. I am also wondering whether an Embedding layer is appropriate for this. Thank you in advance for any help!
import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import *
from edward.util import Progbar
from keras.layers import Embedding, Dense
H = 5
D = 2
V = 10
E = 2
batch_size = 10
M = 10
nb_steps = 120*56
N=10
X_train = np.ones([N, nb_steps ], dtype=np.int32)
y_train = np.ones([N, 1 ], dtype=np.int32)
with tf.variable_scope('model', reuse=True):
# Weights in GRU
Wfo = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H]))
Wro = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H]))
Wff = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H]))
Wrf = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H]))
Wfy = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H]))
Wry = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H]))
qWfo = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, H]))))
qWro = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([H, H]))))
qWff = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, H]))))
qWrf = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([H, H]))))
qWfy = Normal(loc=tf.Variable(tf.random_normal([D, H])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, H]))))
qWry = Normal(loc=tf.Variable(tf.random_normal([H, H])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([H, H]))))
# Placeholders
y_ph = tf.placeholder(tf.float32, [batch_size, 1], name='y_ph')
x = tf.placeholder(tf.int32, [batch_size, nb_steps ], name='x')
# GRU cell
def gru_cell(hprev, xt):
# output gate
#import pdb; pdb.set_trace()
i_o = tf.sigmoid(tf.matmul(xt,Wfo) + tf.matmul(hprev,Wro) )
# forget gate
i_f = tf.sigmoid(tf.matmul(xt,Wff) + tf.matmul(hprev,Wrf) )
# intermediate
y = tf.tanh(tf.matmul(xt,Wfy) + tf.matmul( (i_f*hprev),Wry) )
# new state
return (1-i_o)*y + (i_o*hprev)
# Embedding (?)
x_ = Embedding(V, D, name='Embedding')(x)
# Initialise hidden state
h = tf.zeros(shape=(batch_size, H)) # initial state
for t in range(nb_steps-1):
h = gru_cell(h, x_[:,t,:])
print('h : ',h)
# Variational Inference
W1 = Normal(loc=tf.zeros([D, 1]), scale=tf.ones([D, 1]))
W2 = Normal(loc=tf.zeros([H, D]), scale=tf.ones([H, D]))
qW1 = Normal(loc=tf.Variable(tf.random_normal([D, 1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, 1]))))
qW2 = Normal(loc=tf.Variable(tf.random_normal([H, D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([H, D]))))
def fhw(h_in):
fhw = tf.matmul(tf.sigmoid(tf.matmul(h_in, W2)), W1)
return fhw
y = Normal(loc=fhw(h), scale=0.1 * tf.ones([batch_size,1]))
# Inference
inference = ed.KLqp({W1: qW1, W2: qW2,
Wfo: qWfo, Wro: qWro,
Wff: qWff, Wrf: qWrf,
Wfy: qWfy, Wry: qWry}, data={y: y_ph})
optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer,scale={y: len(X_train) / batch_size}) # always redefine inference before
sess = ed.get_session()
tf.global_variables_initializer().run()
n_epoch = 1
n_iter_per_epoch = 1
for epoch in range(n_epoch):
avg_loss = 0.0
pbar = Progbar(n_iter_per_epoch)
for t in range(1, n_iter_per_epoch + 1):
pbar.update(t)
batch = np.random.randint(0, len(X_train)-1, batch_size)
info_dict = inference.update({x: X_train[batch], y_ph: y_train[batch]})
avg_loss += info_dict['loss']
# Print a lower bound to the average marginal likelihood for an
# image.
avg_loss = avg_loss / n_iter_per_epoch
avg_loss = avg_loss / batch_size
print("log p(x) >= {:0.3f}".format(avg_loss))
## Results should give ones
X_test = np.ones([10, nb_steps])
y_test = np.ones([10, nb_steps])
test1 = sess.run({W1: qW1.sample(), W2: qW2.sample(),
Wfo: qWfo.sample(), Wro: qWro.sample(),
Wff: qWff.sample(), Wrf: qWrf.sample(),
Wfy: qWfy.sample(), Wry: qWry.sample()},{x: X_test})
y_post = ed.copy(y, {W1: qW1, W2: qW2,
Wfo: qWfo, Wro: qWro,
Wff: qWff, Wrf: qWrf,
Wfy: qWfy, Wry: qWry})
y_out = sess.run(y_post, feed_dict={x:X_test})
print('MSE : ',np.mean(np.square(y_out-y_test)))
print(y_out[0:10])