Unable to inference the variance in a simple bayesian linear regression

Hi,
I am a new user of the Edward. But it is a great library. Thanks for the hardwork.

I am trying to fit a simple bayesian linear regression with Y = wx +b + sigma * N(0,1)
where w = -7, b = -15, sigma = 4.0

I am able to do KLqp inference on w & b, however, the sigma parameters (with true value = 4.0), is failed to infer.

People recommend to use lognormal as prior probability, which I have implement below, still failed to work. Can someone able to take a look?

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import seaborn

from edward.models import Normal
import edward.models as edm
from tensorflow.contrib.distributions.python.ops import bijectors as bijector

plt.style.use('ggplot')
np.random.seed(123)

true_alpha = -15.0
true_beta =    7.0
true_sigma =   4.0

size = 252

X_train = np.random.randn(size)
Y_train = (true_alpha + true_beta*X_train + np.random.randn(size)*true_sigma)
ed.set_seed(123)

X_train = X_train.reshape(size,1)

D = 1    #No of features = 1
sample_size = 10000

X = tf.placeholder(tf.float32, [size, D])   #X has dimension of 1
w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
b = Normal(loc=tf.zeros(1), scale=tf.ones(1))

sigma = ed.models.TransformedDistribution(
    distribution=ed.models.Normal(loc=0.0, scale=0.25),
    bijector=bijector.Exp())

yhat = ed.dot(X, w) + b
y = Normal(loc=yhat, scale=sigma)  

qw = Normal(loc=tf.Variable(tf.random_normal([D])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
qb = Normal(loc=tf.Variable(tf.random_normal([1])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

qsigma = ed.models.TransformedDistribution(
    distribution=ed.models.Normal(loc=0.0, scale=0.25),
    bijector=bijector.Exp())

inference = ed.KLqp({w: qw, b: qb, sigma: qsigma}, data={X: X_train, y: Y_train})
inference.run(n_iter=sample_size)

w0 = qw.sample(sample_size).eval()
b0 = qb.sample(sample_size).eval()
my_sigma = qsigma.sample(sample_size).eval()

plt.subplot(3,1,1)
plt.title("Slope")
plt.hist(w0)
plt.subplot(3,1,2)
plt.title("Intercept")
plt.hist(b0)
plt.subplot(3,1,3)
plt.title("sigma")
plt.hist(my_sigma)

plt.show()