1106 lines
38 KiB
Python
1106 lines
38 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Initializers for TF 2."""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import math
|
|
|
|
import numpy as np
|
|
|
|
from tensorflow.python.framework import constant_op
|
|
from tensorflow.python.framework import dtypes
|
|
from tensorflow.python.ops import array_ops
|
|
from tensorflow.python.ops import gen_linalg_ops
|
|
from tensorflow.python.ops import linalg_ops_impl
|
|
from tensorflow.python.ops import math_ops
|
|
from tensorflow.python.ops import random_ops
|
|
from tensorflow.python.ops import stateless_random_ops
|
|
from tensorflow.python.ops.init_ops import _compute_fans
|
|
from tensorflow.python.util.tf_export import tf_export
|
|
|
|
_PARTITION_SHAPE = "partition_shape"
|
|
_PARTITION_OFFSET = "partition_offset"
|
|
|
|
|
|
class Initializer(object):
|
|
"""Initializer base class: all initializers inherit from this class.
|
|
|
|
Initializers should implement a `__call__` method with the following
|
|
signature:
|
|
|
|
```python
|
|
def __call__(self, shape, dtype=None, **kwargs):
|
|
# returns a tensor of shape `shape` and dtype `dtype`
|
|
# containing values drawn from a distribution of your choice.
|
|
```
|
|
"""
|
|
|
|
def __call__(self, shape, dtype=None, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. If not provided will return tensor
|
|
of `tf.float32`.
|
|
**kwargs: Additional keyword arguments. Accepted values:
|
|
`partition_shape` and `partition_offset`. Used when creating a single
|
|
partition in a partitioned variable. `partition_shape` is the shape of
|
|
the partition (i.e. the shape of the returned tensor) and
|
|
`partition_offset` is a tuple of `int` specifying the offset of this
|
|
partition w.r.t each axis. For example, a tensor of shape `(30, 100)`
|
|
can be partitioned into two partitions: `p0` of shape `(10, 100)` and
|
|
`p1` of shape `(20, 100)`; if the initializer is called with
|
|
`partition_shape=(20, 100)` and `partition_offset=(10, 0)`, it should
|
|
return the value for `p1`.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def get_config(self):
|
|
"""Returns the configuration of the initializer as a JSON-serializable dict.
|
|
|
|
Returns:
|
|
A JSON-serializable Python dict.
|
|
"""
|
|
return {}
|
|
|
|
@classmethod
|
|
def from_config(cls, config):
|
|
"""Instantiates an initializer from a configuration dictionary.
|
|
|
|
Example:
|
|
|
|
```python
|
|
initializer = RandomUniform(-1, 1)
|
|
config = initializer.get_config()
|
|
initializer = RandomUniform.from_config(config)
|
|
```
|
|
|
|
Args:
|
|
config: A Python dictionary.
|
|
It will typically be the output of `get_config`.
|
|
|
|
Returns:
|
|
An Initializer instance.
|
|
"""
|
|
config.pop("dtype", None)
|
|
return cls(**config)
|
|
|
|
def _validate_kwargs(self, kwargs, support_partition=True):
|
|
for kwarg in kwargs:
|
|
if kwarg not in [_PARTITION_SHAPE, _PARTITION_OFFSET]:
|
|
raise TypeError("Unknown keyword arguments: %s" % kwarg)
|
|
elif not support_partition:
|
|
raise ValueError("%s initializer doesn't support partition-related"
|
|
" arguments" % self.__class__.__name__)
|
|
|
|
|
|
@tf_export("zeros_initializer", v1=[])
|
|
class Zeros(Initializer):
|
|
"""Initializer that generates tensors initialized to 0.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.zeros_initializer())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([0., 0., 0.], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
array([[0., 0., 0.],
|
|
[0., 0., 0.],
|
|
[0., 0., 0.]], dtype=float32)>
|
|
>>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
"""
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only numeric or boolean dtypes are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValuesError: If the dtype is not numeric or boolean.
|
|
"""
|
|
self._validate_kwargs(kwargs)
|
|
dtype = dtypes.as_dtype(dtype)
|
|
if not dtype.is_numpy_compatible or dtype == dtypes.string:
|
|
raise ValueError("Expected numeric or boolean dtype, got %s." % dtype)
|
|
if _PARTITION_SHAPE in kwargs:
|
|
shape = kwargs[_PARTITION_SHAPE]
|
|
return array_ops.zeros(shape, dtype)
|
|
|
|
|
|
@tf_export("ones_initializer", v1=[])
|
|
class Ones(Initializer):
|
|
"""Initializer that generates tensors initialized to 1.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.ones_initializer())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([1., 1., 1.], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
array([[1., 1., 1.],
|
|
[1., 1., 1.],
|
|
[1., 1., 1.]], dtype=float32)>
|
|
>>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
"""
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only numeric or boolean dtypes are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValuesError: If the dtype is not numeric or boolean.
|
|
"""
|
|
self._validate_kwargs(kwargs)
|
|
dtype = dtypes.as_dtype(dtype)
|
|
if not dtype.is_numpy_compatible or dtype == dtypes.string:
|
|
raise ValueError("Expected numeric or boolean dtype, got %s." % dtype)
|
|
if _PARTITION_SHAPE in kwargs:
|
|
shape = kwargs[_PARTITION_SHAPE]
|
|
return array_ops.ones(shape, dtype)
|
|
|
|
|
|
@tf_export("constant_initializer", v1=[])
|
|
class Constant(Initializer):
|
|
"""Initializer that generates tensors with constant values.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
`tf.constant_initializer` returns an object which when called returns a tensor
|
|
populated with the `value` specified in the constructor. This `value` must be
|
|
convertible to the requested `dtype`.
|
|
|
|
The argument `value` can be a scalar constant value, or a list of
|
|
values. Scalars broadcast to whichever shape is requested from the
|
|
initializer.
|
|
|
|
If `value` is a list, then the length of the list must be equal to the number
|
|
of elements implied by the desired shape of the tensor. If the total number of
|
|
elements in `value` is not equal to the number of elements required by the
|
|
tensor shape, the initializer will raise a `TypeError`.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.constant_initializer(2.))
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([2., 2., 2.], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
array([[2., 2., 2.],
|
|
[2., 2., 2.],
|
|
[2., 2., 2.]], dtype=float32)>
|
|
>>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
|
|
>>> value = [0, 1, 2, 3, 4, 5, 6, 7]
|
|
>>> init = tf.constant_initializer(value)
|
|
>>> # Fitting shape
|
|
>>> tf.Variable(init(shape=[2, 4], dtype=tf.float32))
|
|
<tf.Variable ...
|
|
array([[0., 1., 2., 3.],
|
|
[4., 5., 6., 7.]], dtype=float32)>
|
|
>>> # Larger shape
|
|
>>> tf.Variable(init(shape=[3, 4], dtype=tf.float32))
|
|
Traceback (most recent call last):
|
|
...
|
|
TypeError: ...value has 8 elements, shape is (3, 4) with 12 elements...
|
|
>>> # Smaller shape
|
|
>>> tf.Variable(init(shape=[2, 3], dtype=tf.float32))
|
|
Traceback (most recent call last):
|
|
...
|
|
TypeError: ...value has 8 elements, shape is (2, 3) with 6 elements...
|
|
|
|
Args:
|
|
value: A Python scalar, list or tuple of values, or a N-dimensional numpy
|
|
array. All elements of the initialized variable will be set to the
|
|
corresponding value in the `value` argument.
|
|
|
|
Raises:
|
|
TypeError: If the input `value` is not one of the expected types.
|
|
"""
|
|
|
|
def __init__(self, value=0):
|
|
if not (np.isscalar(value) or isinstance(value, (list, tuple, np.ndarray))):
|
|
raise TypeError(
|
|
"Invalid type for initial value: %s (expected Python scalar, list or "
|
|
"tuple of values, or numpy.ndarray)." % type(value))
|
|
self.value = value
|
|
|
|
def __call__(self, shape, dtype=None, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. If not provided the dtype of the
|
|
tensor created will be the type of the inital value.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
TypeError: If the initializer cannot create a tensor of the requested
|
|
dtype.
|
|
"""
|
|
self._validate_kwargs(kwargs, support_partition=False)
|
|
if dtype is not None:
|
|
dtype = dtypes.as_dtype(dtype)
|
|
return constant_op.constant(self.value, dtype=dtype, shape=shape)
|
|
|
|
def get_config(self):
|
|
return {"value": self.value}
|
|
|
|
|
|
@tf_export("random_uniform_initializer", v1=[])
|
|
class RandomUniform(Initializer):
|
|
"""Initializer that generates tensors with a uniform distribution.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.ones_initializer())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([1., 1., 1.], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
array([[1., 1., 1.],
|
|
[1., 1., 1.],
|
|
[1., 1., 1.]], dtype=float32)>
|
|
>>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
|
|
Args:
|
|
minval: A python scalar or a scalar tensor. Lower bound of the range of
|
|
random values to generate (inclusive).
|
|
maxval: A python scalar or a scalar tensor. Upper bound of the range of
|
|
random values to generate (exclusive).
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
"""
|
|
|
|
def __init__(self, minval=-0.05, maxval=0.05, seed=None):
|
|
self.minval = minval
|
|
self.maxval = maxval
|
|
self.seed = seed
|
|
self._random_generator = _RandomGenerator(seed)
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only floating point and integer
|
|
types are supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValueError: If the dtype is not numeric.
|
|
"""
|
|
self._validate_kwargs(kwargs)
|
|
dtype = dtypes.as_dtype(dtype)
|
|
if not dtype.is_floating and not dtype.is_integer:
|
|
raise ValueError("Expected float or integer dtype, got %s." % dtype)
|
|
if _PARTITION_SHAPE in kwargs:
|
|
shape = kwargs[_PARTITION_SHAPE]
|
|
return self._random_generator.random_uniform(shape, self.minval,
|
|
self.maxval, dtype)
|
|
|
|
def get_config(self):
|
|
return {
|
|
"minval": self.minval,
|
|
"maxval": self.maxval,
|
|
"seed": self.seed
|
|
}
|
|
|
|
|
|
@tf_export("random_normal_initializer", v1=[])
|
|
class RandomNormal(Initializer):
|
|
"""Initializer that generates tensors with a normal distribution.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3,
|
|
... tf.random_normal_initializer(mean=1., stddev=2.))
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([...], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
...
|
|
>>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
|
|
Args:
|
|
mean: a python scalar or a scalar tensor. Mean of the random values to
|
|
generate.
|
|
stddev: a python scalar or a scalar tensor. Standard deviation of the random
|
|
values to generate.
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
|
|
"""
|
|
|
|
def __init__(self, mean=0.0, stddev=0.05, seed=None):
|
|
self.mean = mean
|
|
self.stddev = stddev
|
|
self.seed = seed
|
|
self._random_generator = _RandomGenerator(seed)
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only floating point types are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValueError: If the dtype is not floating point
|
|
"""
|
|
self._validate_kwargs(kwargs)
|
|
dtype = _assert_float_dtype(dtype)
|
|
if _PARTITION_SHAPE in kwargs:
|
|
shape = kwargs[_PARTITION_SHAPE]
|
|
return self._random_generator.random_normal(shape, self.mean, self.stddev,
|
|
dtype)
|
|
|
|
def get_config(self):
|
|
return {
|
|
"mean": self.mean,
|
|
"stddev": self.stddev,
|
|
"seed": self.seed
|
|
}
|
|
|
|
|
|
class TruncatedNormal(Initializer):
|
|
"""Initializer that generates a truncated normal distribution.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
These values are similar to values from a `tf.initializers.RandomNormal`
|
|
except that values more than two standard deviations from the mean are
|
|
discarded and re-drawn. This is the recommended initializer for neural network
|
|
weights and filters.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(
|
|
... 3, tf.initializers.TruncatedNormal(mean=1., stddev=2.))
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([...], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
...
|
|
>>> make_variables(4, tf.initializers.RandomUniform(minval=-1., maxval=1.))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
|
|
Args:
|
|
mean: a python scalar or a scalar tensor. Mean of the random values
|
|
to generate.
|
|
stddev: a python scalar or a scalar tensor. Standard deviation of the
|
|
random values to generate.
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
"""
|
|
|
|
def __init__(self, mean=0.0, stddev=0.05, seed=None):
|
|
self.mean = mean
|
|
self.stddev = stddev
|
|
self.seed = seed
|
|
self._random_generator = _RandomGenerator(seed)
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only floating point types are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValueError: If the dtype is not floating point
|
|
"""
|
|
self._validate_kwargs(kwargs)
|
|
dtype = _assert_float_dtype(dtype)
|
|
if _PARTITION_SHAPE in kwargs:
|
|
shape = kwargs[_PARTITION_SHAPE]
|
|
return self._random_generator.truncated_normal(shape, self.mean,
|
|
self.stddev, dtype)
|
|
|
|
def get_config(self):
|
|
return {
|
|
"mean": self.mean,
|
|
"stddev": self.stddev,
|
|
"seed": self.seed
|
|
}
|
|
|
|
|
|
class VarianceScaling(Initializer):
|
|
"""Initializer capable of adapting its scale to the shape of weights tensors.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
With `distribution="truncated_normal" or "untruncated_normal"`, samples are
|
|
drawn from a truncated/untruncated normal distribution with a mean of zero and
|
|
a standard deviation (after truncation, if used) `stddev = sqrt(scale / n)`
|
|
where n is:
|
|
|
|
- number of input units in the weight tensor, if mode = "fan_in"
|
|
- number of output units, if mode = "fan_out"
|
|
- average of the numbers of input and output units, if mode = "fan_avg"
|
|
|
|
With `distribution="uniform"`, samples are drawn from a uniform distribution
|
|
within [-limit, limit], with `limit = sqrt(3 * scale / n)`.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.VarianceScaling(scale=1.))
|
|
>>> v1
|
|
<tf.Variable ... shape=(3,) ... numpy=array([...], dtype=float32)>
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3) ... numpy=
|
|
...
|
|
>>> make_variables(4, tf.initializers.VarianceScaling(distribution='uniform'))
|
|
(<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
|
|
|
|
Args:
|
|
scale: Scaling factor (positive float).
|
|
mode: One of "fan_in", "fan_out", "fan_avg".
|
|
distribution: Random distribution to use. One of "truncated_normal",
|
|
"untruncated_normal" and "uniform".
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
|
|
Raises:
|
|
ValueError: In case of an invalid value for the "scale", mode" or
|
|
"distribution" arguments.
|
|
"""
|
|
|
|
def __init__(self,
|
|
scale=1.0,
|
|
mode="fan_in",
|
|
distribution="truncated_normal",
|
|
seed=None):
|
|
if scale <= 0.:
|
|
raise ValueError("`scale` must be positive float.")
|
|
if mode not in {"fan_in", "fan_out", "fan_avg"}:
|
|
raise ValueError("Invalid `mode` argument:", mode)
|
|
distribution = distribution.lower()
|
|
# Compatibility with keras-team/keras.
|
|
if distribution == "normal":
|
|
distribution = "truncated_normal"
|
|
if distribution not in {"uniform", "truncated_normal",
|
|
"untruncated_normal"}:
|
|
raise ValueError("Invalid `distribution` argument:", distribution)
|
|
self.scale = scale
|
|
self.mode = mode
|
|
self.distribution = distribution
|
|
self.seed = seed
|
|
self._random_generator = _RandomGenerator(seed)
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only floating point types are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValueError: If the dtype is not floating point
|
|
"""
|
|
self._validate_kwargs(kwargs)
|
|
dtype = _assert_float_dtype(dtype)
|
|
scale = self.scale
|
|
fan_in, fan_out = _compute_fans(shape)
|
|
if _PARTITION_SHAPE in kwargs:
|
|
shape = kwargs[_PARTITION_SHAPE]
|
|
if self.mode == "fan_in":
|
|
scale /= max(1., fan_in)
|
|
elif self.mode == "fan_out":
|
|
scale /= max(1., fan_out)
|
|
else:
|
|
scale /= max(1., (fan_in + fan_out) / 2.)
|
|
if self.distribution == "truncated_normal":
|
|
# constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
|
|
stddev = math.sqrt(scale) / .87962566103423978
|
|
return self._random_generator.truncated_normal(shape, 0.0, stddev, dtype)
|
|
elif self.distribution == "untruncated_normal":
|
|
stddev = math.sqrt(scale)
|
|
return self._random_generator.random_normal(shape, 0.0, stddev, dtype)
|
|
else:
|
|
limit = math.sqrt(3.0 * scale)
|
|
return self._random_generator.random_uniform(shape, -limit, limit, dtype)
|
|
|
|
def get_config(self):
|
|
return {
|
|
"scale": self.scale,
|
|
"mode": self.mode,
|
|
"distribution": self.distribution,
|
|
"seed": self.seed
|
|
}
|
|
|
|
|
|
class Orthogonal(Initializer):
|
|
"""Initializer that generates an orthogonal matrix.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
If the shape of the tensor to initialize is two-dimensional, it is initialized
|
|
with an orthogonal matrix obtained from the QR decomposition of a matrix of
|
|
random numbers drawn from a normal distribution.
|
|
If the matrix has fewer rows than columns then the output will have orthogonal
|
|
rows. Otherwise, the output will have orthogonal columns.
|
|
|
|
If the shape of the tensor to initialize is more than two-dimensional,
|
|
a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])`
|
|
is initialized, where `n` is the length of the shape vector.
|
|
The matrix is subsequently reshaped to give a tensor of the desired shape.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.Orthogonal())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.Orthogonal(gain=0.5))
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
gain: multiplicative factor to apply to the orthogonal matrix
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
|
|
References:
|
|
[Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C)
|
|
([pdf](https://arxiv.org/pdf/1312.6120.pdf))
|
|
"""
|
|
|
|
def __init__(self, gain=1.0, seed=None):
|
|
self.gain = gain
|
|
self.seed = seed
|
|
self._random_generator = _RandomGenerator(seed)
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only floating point types are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValueError: If the dtype is not floating point or the input shape is not
|
|
valid.
|
|
"""
|
|
self._validate_kwargs(kwargs, support_partition=False)
|
|
dtype = _assert_float_dtype(dtype)
|
|
# Check the shape
|
|
if len(shape) < 2:
|
|
raise ValueError("The tensor to initialize must be "
|
|
"at least two-dimensional")
|
|
# Flatten the input shape with the last dimension remaining
|
|
# its original shape so it works for conv2d
|
|
num_rows = 1
|
|
for dim in shape[:-1]:
|
|
num_rows *= dim
|
|
num_cols = shape[-1]
|
|
flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))
|
|
|
|
# Generate a random matrix
|
|
a = self._random_generator.random_normal(flat_shape, dtype=dtype)
|
|
# Compute the qr factorization
|
|
q, r = gen_linalg_ops.qr(a, full_matrices=False)
|
|
# Make Q uniform
|
|
d = array_ops.diag_part(r)
|
|
q *= math_ops.sign(d)
|
|
if num_rows < num_cols:
|
|
q = array_ops.matrix_transpose(q)
|
|
return self.gain * array_ops.reshape(q, shape)
|
|
|
|
def get_config(self):
|
|
return {"gain": self.gain, "seed": self.seed}
|
|
|
|
|
|
class Identity(Initializer):
|
|
"""Initializer that generates the identity matrix.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Only usable for generating 2D matrices.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variable(k, initializer):
|
|
... return tf.Variable(initializer(shape=[k, k], dtype=tf.float32))
|
|
>>> make_variable(2, tf.initializers.Identity())
|
|
<tf.Variable ... shape=(2, 2) dtype=float32, numpy=
|
|
array([[1., 0.],
|
|
[0., 1.]], dtype=float32)>
|
|
>>> make_variable(3, tf.initializers.Identity(gain=0.5))
|
|
<tf.Variable ... shape=(3, 3) dtype=float32, numpy=
|
|
array([[0.5, 0. , 0. ],
|
|
[0. , 0.5, 0. ],
|
|
[0. , 0. , 0.5]], dtype=float32)>
|
|
|
|
Args:
|
|
gain: Multiplicative factor to apply to the identity matrix.
|
|
"""
|
|
|
|
def __init__(self, gain=1.0):
|
|
self.gain = gain
|
|
|
|
def __call__(self, shape, dtype=dtypes.float32, **kwargs):
|
|
"""Returns a tensor object initialized as specified by the initializer.
|
|
|
|
Args:
|
|
shape: Shape of the tensor.
|
|
dtype: Optional dtype of the tensor. Only floating point types are
|
|
supported.
|
|
**kwargs: Additional keyword arguments.
|
|
|
|
Raises:
|
|
ValueError: If the dtype is not floating point
|
|
ValueError: If the requested shape does not have exactly two axes.
|
|
"""
|
|
self._validate_kwargs(kwargs, support_partition=False)
|
|
dtype = _assert_float_dtype(dtype)
|
|
if len(shape) != 2:
|
|
raise ValueError(
|
|
"Identity matrix initializer can only be used for 2D matrices.")
|
|
initializer = linalg_ops_impl.eye(*shape, dtype=dtype)
|
|
return self.gain * initializer
|
|
|
|
def get_config(self):
|
|
return {"gain": self.gain}
|
|
|
|
|
|
class GlorotUniform(VarianceScaling):
|
|
"""The Glorot uniform initializer, also called Xavier uniform initializer.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Draws samples from a uniform distribution within [-limit, limit] where `limit`
|
|
is `sqrt(6 / (fan_in + fan_out))` where `fan_in` is the number of input units
|
|
in the weight tensor and `fan_out` is the number of output units in the weight
|
|
tensor.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.GlorotUniform())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.RandomNormal())
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
|
|
References:
|
|
[Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
|
|
([pdf](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf))
|
|
"""
|
|
|
|
def __init__(self, seed=None):
|
|
super(GlorotUniform, self).__init__(
|
|
scale=1.0,
|
|
mode="fan_avg",
|
|
distribution="uniform",
|
|
seed=seed)
|
|
|
|
def get_config(self):
|
|
return {"seed": self.seed}
|
|
|
|
|
|
class GlorotNormal(VarianceScaling):
|
|
"""The Glorot normal initializer, also called Xavier normal initializer.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Draws samples from a truncated normal distribution centered on 0 with `stddev
|
|
= sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of input units in
|
|
the weight tensor and `fan_out` is the number of output units in the weight
|
|
tensor.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.GlorotNormal())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.RandomNormal())
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
seed: A Python integer. Used to create random seeds. See
|
|
`tf.random.set_seed` for behavior.
|
|
|
|
References:
|
|
[Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
|
|
([pdf](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf))
|
|
"""
|
|
|
|
def __init__(self, seed=None):
|
|
super(GlorotNormal, self).__init__(
|
|
scale=1.0,
|
|
mode="fan_avg",
|
|
distribution="truncated_normal",
|
|
seed=seed)
|
|
|
|
def get_config(self):
|
|
return {"seed": self.seed}
|
|
|
|
|
|
# Aliases.
|
|
|
|
# pylint: disable=invalid-name
|
|
zeros_initializer = Zeros
|
|
ones_initializer = Ones
|
|
constant_initializer = Constant
|
|
random_uniform_initializer = RandomUniform
|
|
random_normal_initializer = RandomNormal
|
|
truncated_normal_initializer = TruncatedNormal
|
|
variance_scaling_initializer = VarianceScaling
|
|
glorot_uniform_initializer = GlorotUniform
|
|
glorot_normal_initializer = GlorotNormal
|
|
orthogonal_initializer = Orthogonal
|
|
identity_initializer = Identity
|
|
# pylint: enable=invalid-name
|
|
|
|
|
|
def lecun_normal(seed=None):
|
|
"""LeCun normal initializer.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Draws samples from a truncated normal distribution centered on 0 with `stddev
|
|
= sqrt(1 / fan_in)` where `fan_in` is the number of input units in the weight
|
|
tensor.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.lecun_normal())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.RandomNormal())
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
seed: A Python integer. Used to seed the random generator.
|
|
|
|
Returns:
|
|
A callable Initializer with `shape` and `dtype` arguments which generates a
|
|
tensor.
|
|
|
|
References:
|
|
- Self-Normalizing Neural Networks,
|
|
[Klambauer et al., 2017]
|
|
(https://papers.nips.cc/paper/6698-self-normalizing-neural-networks)
|
|
([pdf]
|
|
(https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf))
|
|
- Efficient Backprop,
|
|
[Lecun et al., 1998](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)
|
|
"""
|
|
return VarianceScaling(
|
|
scale=1., mode="fan_in", distribution="truncated_normal", seed=seed)
|
|
|
|
|
|
def lecun_uniform(seed=None):
|
|
"""LeCun uniform initializer.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Draws samples from a uniform distribution within [-limit, limit] where `limit`
|
|
is `sqrt(3 / fan_in)` where `fan_in` is the number of input units in the
|
|
weight tensor.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.lecun_uniform())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.RandomNormal())
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
seed: A Python integer. Used to seed the random generator.
|
|
|
|
Returns:
|
|
A callable Initializer with `shape` and `dtype` arguments which generates a
|
|
tensor.
|
|
|
|
References:
|
|
- Self-Normalizing Neural Networks,
|
|
[Klambauer et al., 2017](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks) # pylint: disable=line-too-long
|
|
([pdf](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf))
|
|
- Efficient Backprop,
|
|
[Lecun et al., 1998](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)
|
|
"""
|
|
return VarianceScaling(
|
|
scale=1., mode="fan_in", distribution="uniform", seed=seed)
|
|
|
|
|
|
def he_normal(seed=None):
|
|
"""He normal initializer.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
It draws samples from a truncated normal distribution centered on 0 with
|
|
`stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in the
|
|
weight tensor.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.he_normal())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.RandomNormal())
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
seed: A Python integer. Used to seed the random generator.
|
|
|
|
Returns:
|
|
A callable Initializer with `shape` and `dtype` arguments which generates a
|
|
tensor.
|
|
|
|
References:
|
|
[He et al., 2015](https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html) # pylint: disable=line-too-long
|
|
([pdf](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf))
|
|
"""
|
|
return VarianceScaling(
|
|
scale=2., mode="fan_in", distribution="truncated_normal", seed=seed)
|
|
|
|
|
|
def he_uniform(seed=None):
|
|
"""He uniform variance scaling initializer.
|
|
|
|
Initializers allow you to pre-specify an initialization strategy, encoded in
|
|
the Initializer object, without knowing the shape and dtype of the variable
|
|
being initialized.
|
|
|
|
Draws samples from a uniform distribution within [-limit, limit] where `limit`
|
|
is `sqrt(6 / fan_in)` where `fan_in` is the number of input units in the
|
|
weight tensor.
|
|
|
|
Examples:
|
|
|
|
>>> def make_variables(k, initializer):
|
|
... return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
|
|
... tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
|
|
>>> v1, v2 = make_variables(3, tf.initializers.he_uniform())
|
|
>>> v1
|
|
<tf.Variable ... shape=(3, 3) ...
|
|
>>> v2
|
|
<tf.Variable ... shape=(3, 3, 3) ...
|
|
>>> make_variables(4, tf.initializers.RandomNormal())
|
|
(<tf.Variable ... shape=(4, 4) dtype=float32...
|
|
<tf.Variable ... shape=(4, 4, 4) dtype=float32...
|
|
|
|
Args:
|
|
seed: A Python integer. Used to seed the random generator.
|
|
|
|
Returns:
|
|
A callable Initializer with `shape` and `dtype` arguments which generates a
|
|
tensor.
|
|
|
|
References:
|
|
[He et al., 2015](https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html) # pylint: disable=line-too-long
|
|
([pdf](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf))
|
|
"""
|
|
return VarianceScaling(
|
|
scale=2., mode="fan_in", distribution="uniform", seed=seed)
|
|
|
|
|
|
# Utility functions.
|
|
|
|
|
|
def _assert_float_dtype(dtype):
|
|
"""Validate and return floating point type based on `dtype`.
|
|
|
|
`dtype` must be a floating point type.
|
|
|
|
Args:
|
|
dtype: The data type to validate.
|
|
|
|
Returns:
|
|
Validated type.
|
|
|
|
Raises:
|
|
ValueError: if `dtype` is not a floating point type.
|
|
"""
|
|
dtype = dtypes.as_dtype(dtype)
|
|
if not dtype.is_floating:
|
|
raise ValueError("Expected floating point type, got %s." % dtype)
|
|
return dtype
|
|
|
|
|
|
class _RandomGenerator(object):
|
|
"""Random generator that selects appropriate random ops."""
|
|
|
|
def __init__(self, seed=None):
|
|
super(_RandomGenerator, self).__init__()
|
|
if seed is not None:
|
|
# Stateless random ops requires 2-int seed.
|
|
self.seed = [seed, 0]
|
|
else:
|
|
self.seed = None
|
|
|
|
def random_normal(self, shape, mean=0.0, stddev=1, dtype=dtypes.float32):
|
|
"""A deterministic random normal if seed is passed."""
|
|
if self.seed:
|
|
op = stateless_random_ops.stateless_random_normal
|
|
else:
|
|
op = random_ops.random_normal
|
|
return op(
|
|
shape=shape, mean=mean, stddev=stddev, dtype=dtype, seed=self.seed)
|
|
|
|
def random_uniform(self, shape, minval, maxval, dtype):
|
|
"""A deterministic random uniform if seed is passed."""
|
|
if self.seed:
|
|
op = stateless_random_ops.stateless_random_uniform
|
|
else:
|
|
op = random_ops.random_uniform
|
|
return op(
|
|
shape=shape, minval=minval, maxval=maxval, dtype=dtype, seed=self.seed)
|
|
|
|
def truncated_normal(self, shape, mean, stddev, dtype):
|
|
"""A deterministic truncated normal if seed is passed."""
|
|
if self.seed:
|
|
op = stateless_random_ops.stateless_truncated_normal
|
|
else:
|
|
op = random_ops.truncated_normal
|
|
return op(
|
|
shape=shape, mean=mean, stddev=stddev, dtype=dtype, seed=self.seed)
|
|
|
|
# Compatibility aliases
|
|
|
|
# pylint: disable=invalid-name
|
|
zero = zeros = Zeros
|
|
one = ones = Ones
|
|
constant = Constant
|
|
uniform = random_uniform = RandomUniform
|
|
normal = random_normal = RandomNormal
|
|
truncated_normal = TruncatedNormal
|
|
identity = Identity
|
|
orthogonal = Orthogonal
|
|
glorot_normal = GlorotNormal
|
|
glorot_uniform = GlorotUniform
|