From be60473c88175dbc9359c9d1bbb384518757ee81 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 21 Dec 2016 20:18:37 -0800 Subject: [PATCH] Introduce Glorot initializers in core. Change: 142728126 --- tensorflow/python/ops/init_ops.py | 147 ++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index fc8adf6421d..4cb29f52ce3 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -34,6 +34,7 @@ from __future__ import division from __future__ import print_function import math + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -290,6 +291,152 @@ def uniform_unit_scaling_initializer(factor=1.0, return _initializer +def variance_scaling_initializer(scale=1.0, + mode="fan_in", + distribution="normal", + seed=None, + dtype=dtypes.float32): + """Initializer capable of adapting its scale to the shape of weights tensors. + + With `distribution="normal"`, samples are drawn from a truncated normal + distribution centered on zero, with `stddev = sqrt(scale / n)` + where n is: + - number of input units in the weight tensor, if mode = "fan_in" + - number of output units, if mode = "fan_out" + - average of the numbers of input and output units, if mode = "fan_avg" + + With `distribution="uniform"`, samples are drawn from a uniform distribution + within [-limit, limit], with `limit = sqrt(3 * scale / n)`. + + Arguments: + scale: Scaling factor (positive float). + mode: One of "fan_in", "fan_out", "fan_avg". + distribution: Random distribution to use. One of "normal", "uniform". + seed: A Python integer. Used to create random seeds. See + [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) + for behavior. + dtype: The data type. Only floating point types are supported. + + Returns: + An initializer. + + Raises: + ValueError: In case of an invalid value for the "scale", mode" or + "distribution" arguments. + """ + # In order to be able to use the `scale` variable inside the inner + # `_initializer` function, we must temporarily rename it: for an unknown + # reason, the name `scale` prevents the variable from appearing in the inner + # scope. We note that this sort of access to the outer scope by the inner + # function is unsafe (as illustrated by this strange issue) and should + # be removed in the future, by refactoring initializers as classes. + scale_ = scale + if scale <= 0.: + raise ValueError("`scale` must be positive float.") + if mode not in {"fan_in", "fan_out", "fan_avg"}: + raise ValueError("Invalid `mode` argument:", mode) + distribution = distribution.lower() + if distribution not in {"normal", "uniform"}: + raise ValueError("Invalid `distribution` argument:", distribution) + + def _initializer(shape, dtype=_assert_float_dtype(dtype), + partition_info=None): + scale = scale_ + scale_shape = shape + if partition_info is not None: + scale_shape = partition_info.full_shape + fan_in, fan_out = _compute_fans(scale_shape) + if mode == "fan_in": + scale /= max(1., fan_in) + elif mode == "fan_out": + scale /= max(1., fan_out) + else: + scale /= max(1., (fan_in + fan_out) / 2.) + if distribution == "normal": + stddev = math.sqrt(scale) + return random_ops.truncated_normal(shape, 0.0, stddev, dtype, seed=seed) + else: + limit = math.sqrt(3.0 * scale) + return random_ops.random_uniform(shape, -limit, limit, + dtype, seed=seed) + return _initializer + + +def glorot_uniform_initializer(seed=None, dtype=dtypes.float32): + """The Glorot uniform initializer, also called Xavier uniform initializer. + + It draws samples from a uniform distribution within [-limit, limit] + where `limit` is `sqrt(6 / (fan_in + fan_out))` + where `fan_in` is the number of input units in the weight tensor + and `fan_out` is the number of output units in the weight tensor. + + Reference: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf + + Arguments: + seed: A Python integer. Used to create random seeds. See + [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) + for behavior. + dtype: The data type. Only floating point types are supported. + + Returns: + An initializer. + """ + return variance_scaling_initializer(scale=1.0, + mode="fan_avg", + distribution="uniform", + seed=seed, + dtype=dtype) + + +def glorot_normal_initializer(seed=None, dtype=dtypes.float32): + """The Glorot normal initializer, also called Xavier normal initializer. + + It draws samples from a truncated normal distribution centered on 0 + with `stddev = sqrt(2 / (fan_in + fan_out))` + where `fan_in` is the number of input units in the weight tensor + and `fan_out` is the number of output units in the weight tensor. + + Reference: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf + + Arguments: + seed: A Python integer. Used to create random seeds. See + [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) + for behavior. + dtype: The data type. Only floating point types are supported. + + Returns: + An initializer. + """ + return variance_scaling_initializer(scale=1.0, + mode="fan_avg", + distribution="normal", + seed=seed, + dtype=dtype) + + +def _compute_fans(shape): + """Computes the number of input and output units for a weight shape. + + Arguments: + shape: Integer shape tuple or TF tensor shape. + + Returns: + A tuple of scalars (fan_in, fan_out). + """ + if len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + else: + # Assuming convolution kernels (2D, 3D, or more). + # kernel shape: (..., input_depth, depth) + receptive_field_size = 1. + for dim in shape[:-2]: + receptive_field_size *= dim + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + return fan_in, fan_out + + # TODO(vrv): Unhide when we are ready to expose this publicly. def _random_walk(shape, nonlinearity, dtype=dtypes.float32, seed=None, name="random_walk"):