def layer_norm_compute_python(x, epsilon, scale, bias):
mean = tf.reduce_mean(x, axis=[-1], keep_dims=True)
variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keep_dims=True)
norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
return norm_x * scale + bias
def layer_norm(x, filters=None, epsilon=1e-6, scope=None, reuse=None):
if filters is None:
filters = x.get_shape()[-1]
with tf.variable_scope(scope, default_name="layer_norm", values=[x], reuse=reuse):
scale = tf.get_variable(
"layer_norm_scale", [filters], regularizer = regularizer, initializer=tf.ones_initializer())
bias = tf.get_variable(
"layer_norm_bias", [filters], regularizer = regularizer, initializer=tf.zeros_initializer())
result = layer_norm_compute_python(x, epsilon, scale, bias)
return result
--参考QANET代码