Criticism of neural net with inferred scale

I have implemented a neural net with fixed scale = 1 and I am generating predictions like this (according to the getting started example):

rs = np.random.RandomState(0)
inputs = X_test
x = inputs
mus = tf.stack(
    [neural_network_with_2_layers(x, q_W_0.sample(), q_W_1.sample(),
                    q_b_0.sample(), q_b_1.sample())
     for _ in range(100)])
    
outputs = mus.eval() # These are arrays with predictions from the inference
# stacked vertically
# Take the average of those outputs vertically
p = np.average(outputs,0)

plt.plot(p,label="Average of 100 predictions")
plt.plot(Y_test,label="Y_test")
plt.legend()

While this works like a charm I wanted to step it up and infer the scale as well instead of just setting it at 1 arbitrarily. I added a q_sigma using a log normal and proceed with the inference. My question is, how do I swap the prior sigma with the new q_sigma during the prediction generation step?

Code I am using for the neural net:

def neural_network_with_2_layers(x, W_0, W_1, b_0, b_1):
    h = tf.nn.tanh(tf.matmul(x, W_0) + b_0)
    h = tf.matmul(h, W_1) + b_1
    return tf.reshape(h, [-1])

dim = 20  # layer dimensions
W_0 = Normal(loc=tf.zeros([D, dim]),scale=tf.ones([D, dim]))
W_1 = Normal(loc=tf.zeros([dim, 1]),scale=tf.ones([dim, 1]))
b_0 = Normal(loc=tf.zeros(dim),scale=tf.ones(dim))
b_1 = Normal(loc=tf.zeros(1),scale=tf.ones(1))
# We should also infer the scale

# We will transform it into a lognormal further down
sigma = InverseGamma(concentration=tf.ones([1]),rate=tf.ones([1]))


x = tf.placeholder(tf.float32, [None, D])
y = tf.placeholder(tf.float32,[None,D])

a = neural_network_with_2_layers(x,W_0,W_1,b_0,b_1)
b = tf.reshape(a,shape = [-1,1])
y = Normal(loc=b,scale=sigma)

q_W_0 = Normal(loc=tf.Variable(tf.random_normal([D, dim])),
               scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, dim]))))
q_W_1 = Normal(loc=tf.Variable(tf.random_normal([dim, 1])),
               scale=tf.nn.softplus(tf.Variable(tf.random_normal([dim, 1]))))
q_b_0 = Normal(loc=tf.Variable(tf.random_normal([dim])),
               scale=tf.nn.softplus(tf.Variable(tf.random_normal([dim]))))
q_b_1 = Normal(loc=tf.Variable(tf.random_normal([1])),
               scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))
# Here sigma turns into a lognormal
q_sigma = ed.models.TransformedDistribution(
    distribution=ed.models.NormalWithSoftplusScale(tf.Variable(tf.zeros([1])),
                                                   tf.Variable(tf.zeros([1]))),
                                                   bijector=bijector.Exp())
# Inference
inference = ed.KLpq(latent_vars={W_0: q_W_0, b_0: q_b_0,
                                 W_1: q_W_1, b_1: q_b_1,sigma:q_sigma},
                    data={x: X_train, y: Y_train})

inference.run(n_samples=20, n_iter=10000)

Update: I think this is doing the trick in case someone has the same question

xp = tf.placeholder(tf.float32, [Np, D])
y_post = tf.placeholder(tf.float32,[Np,D])
A1 = neural_network_with_2_layers(xp,q_W_0, q_W_1,q_b_0, q_b_1)
A2 = tf.reshape(A1,[Np,1])
y_post = Normal(loc=A2,scale=q_sigma)
# Equivalent with the above
#y_post = ed.copy(y,{W_0: q_W_0, b_0: q_b_0,
#                                 W_1: q_W_1, b_1: q_b_1,sigma:q_sigma})


outs2 = tf.stack([sess.run(y_post,{xp:X_test}) for _ in range(n_of_preds)])
outs2 = outs2.eval()
outs2 = outs2.reshape([n_of_preds,X_test.size])
p = np.average(outs2,0)