1. 数学中的L1和L2正则化
在数学中,L1正则化是向量各个元素绝对值之和;L2正则化是向量各个元素平方后,再求和。
2. Tensorflow中L1和L2
在Tensorflow中,对数学中的L1和L2,加入了惩罚系数scale;
且,这个惩罚系数暴露给开发者,根据实际,实时调节惩罚系数。
数学公式即为:
L1:scale*(|x1|+|x2|+…+|xn|)
L2:scale*(|x1|**2 + |x2|**2 + … + |xn|**2)/2
2.1 L1的接口
…\Lib\site-packages\tensorflow\contrib\layers\python\layers\regularizers.py
def l1_regularizer(scale, scope=None):
"""Returns a function that can be used to apply L1 regularization to weights.
L1 regularization encourages sparsity.
Args:
scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.
scope: An optional scope name.
Returns:
A function with signature `l1(weights)` that apply L1 regularization.
Raises:
ValueError: If scale is negative or if scale is not a float.
"""
if isinstance(scale, numbers.Integral):
raise ValueError('scale cannot be an integer: %s' % scale)
if isinstance(scale, numbers.Real):
if scale < 0.:
raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
scale)
if scale == 0.:
logging.info('Scale of 0 disables regularizer.')
return lambda _: None
def l1(weights, name=None):
"""Applies L1 regularization to weights."""
with ops.name_scope(scope, 'l1_regularizer', [weights]) as name:
my_scale = ops.convert_to_tensor(scale,
dtype=weights.dtype.base_dtype,
name='scale')
return standard_ops.**multiply**(
my_scale,
standard_ops.**reduce_sum(standard_ops.abs(weights)),
name=name)**
return l1
在源代码中,参数scale就是惩罚系数。从
standard_ops.multiply(
my_scale,
standard_ops.reduce_sum(standard_ops.abs(weights)),
name=name)
中,abs(weights)各元素先取绝对值,reduce_num相加,然后multiply乘以惩罚系数scale。
2.2 L2的接口
…\Lib\site-packages\tensorflow\contrib\layers\python\layers\regularizers.py
def l2_regularizer(scale, scope=None):
"""Returns a function that can be used to apply L2 regularization to weights.
Small values of L2 can help prevent overfitting the training data.
Args:
scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.
scope: An optional scope name.
Returns:
A function with signature `l2(weights)` that applies L2 regularization.
Raises:
ValueError: If scale is negative or if scale is not a float.
"""
if isinstance(scale, numbers.Integral):
raise ValueError('scale cannot be an integer: %s' % (scale,))
if isinstance(scale, numbers.Real):
if scale < 0.:
raise ValueError('Setting a scale less than 0 on a regularizer: %g.' %
scale)
if scale == 0.:
logging.info('Scale of 0 disables regularizer.')
return lambda _: None
def l2(weights):
"""Applies l2 regularization to weights."""
with ops.name_scope(scope, 'l2_regularizer', [weights]) as name:
my_scale = ops.convert_to_tensor(scale,
dtype=weights.dtype.base_dtype,
name='scale')
return **standard_ops.multiply(my_scale, nn.l2_loss(weights), name=name)**
return l2
在源代码中,standard_ops.multiply(my_scale, nn.l2_loss(weights), name=name);Weights数据先进行
平方求和,除以2(参见2.2.1),然后multiply乘以my_scale惩罚系数。
2.2.1 l2_loss接口
def l2_loss(t, name=None):
r"""L2 Loss.
Computes half the L2 norm of a tensor without the `sqrt`:
output = sum(t ** 2) / 2
Args:
t: A `Tensor`. Must be one of the following types: `half`, `bfloat16`, `float32`, `float64`.
Typically 2-D, but may have any dimensions.
name: A name for the operation (optional).
Returns:
A `Tensor`. Has the same type as `t`.
"""
_ctx = _context._context
if _ctx is None or not _ctx._eager_context.is_eager:
_, _, _op = _op_def_lib._apply_op_helper(
"L2Loss", t=t, name=name)
_result = _op.outputs[:]
_inputs_flat = _op.inputs
_attrs = ("T", _op.get_attr("T"))
_execute.record_gradient(
"L2Loss", _inputs_flat, _attrs, _result, name)
_result, = _result
return _result
else:
try:
_result = _pywrap_tensorflow.TFE_Py_FastPathExecute(
_ctx._context_handle, _ctx._eager_context.device_name, "L2Loss", name,
_ctx._post_execution_callbacks, t)
return _result
except _core._FallbackException:
return l2_loss_eager_fallback(
t, name=name, ctx=_ctx)
except _core._NotOkStatusException as e:
if name is not None:
message = e.message + " name: " + name
else:
message = e.message
_six.raise_from(_core._status_to_exception(e.code, message), None)
3. 示例
import tensorflow as tf
weights = tf.constant([[1.0, -2.0], [-3.0, 4.0]])
with tf.Session() as sess:
# 1*(1+2+3+4)=10
print(sess.run(tf.contrib.layers.l1_regularizer(1.0)(weights)))
# 0.5*(1+4+9+16)/2
print(sess.run(tf.contrib.layers.l2_regularizer(.5)(weights)))
结果打印:
10.0
7.5