Update the default parameter of LSTM/GRU cell in v2 to match the value in layer.
1. The default recurrent activation is now sigmoid instead of hard_sigmoid. 2. The default implementation is now 2, which group and batch into fewer and larger operations. 3. For GRUCell, the default reset_after is True (for CuDNN compatibility). PiperOrigin-RevId: 239601520
This commit is contained in:
parent
2de2eed1cc
commit
9dfe9f2b8d
@ -149,15 +149,17 @@ from tensorflow.python.keras.layers.recurrent import RNN
|
||||
from tensorflow.python.keras.layers.recurrent import AbstractRNNCell
|
||||
from tensorflow.python.keras.layers.recurrent import StackedRNNCells
|
||||
from tensorflow.python.keras.layers.recurrent import SimpleRNNCell
|
||||
from tensorflow.python.keras.layers.recurrent import GRUCell
|
||||
from tensorflow.python.keras.layers.recurrent import LSTMCell
|
||||
from tensorflow.python.keras.layers.recurrent import PeepholeLSTMCell
|
||||
from tensorflow.python.keras.layers.recurrent import SimpleRNN
|
||||
|
||||
from tensorflow.python.keras.layers.recurrent import GRU
|
||||
from tensorflow.python.keras.layers.recurrent import GRUCell
|
||||
from tensorflow.python.keras.layers.recurrent import LSTM
|
||||
from tensorflow.python.keras.layers.recurrent import LSTMCell
|
||||
from tensorflow.python.keras.layers.recurrent_v2 import GRU as GRU_v2
|
||||
from tensorflow.python.keras.layers.recurrent_v2 import GRUCell as GRUCell_v2
|
||||
from tensorflow.python.keras.layers.recurrent_v2 import LSTM as LSTM_v2
|
||||
from tensorflow.python.keras.layers.recurrent_v2 import LSTMCell as LSTMCell_v2
|
||||
|
||||
# Convolutional-recurrent layers.
|
||||
from tensorflow.python.keras.layers.convolutional_recurrent import ConvLSTM2D
|
||||
|
@ -1511,7 +1511,7 @@ class SimpleRNN(RNN):
|
||||
return cls(**config)
|
||||
|
||||
|
||||
@keras_export('keras.layers.GRUCell')
|
||||
@keras_export(v1=['keras.layers.GRUCell'])
|
||||
class GRUCell(DropoutRNNCellMixin, Layer):
|
||||
"""Cell class for the GRU layer.
|
||||
|
||||
@ -2049,7 +2049,7 @@ class GRU(RNN):
|
||||
return cls(**config)
|
||||
|
||||
|
||||
@keras_export('keras.layers.LSTMCell')
|
||||
@keras_export(v1=['keras.layers.LSTMCell'])
|
||||
class LSTMCell(DropoutRNNCellMixin, Layer):
|
||||
"""Cell class for the LSTM layer.
|
||||
|
||||
|
@ -45,6 +45,96 @@ _CPU_DEVICE_NAME = 'CPU'
|
||||
_GPU_DEVICE_NAME = 'GPU'
|
||||
|
||||
|
||||
@keras_export('keras.layers.GRUCell', v1=[])
|
||||
class GRUCell(recurrent.GRUCell):
|
||||
"""Cell class for the GRU layer.
|
||||
|
||||
Arguments:
|
||||
units: Positive integer, dimensionality of the output space.
|
||||
activation: Activation function to use. Default: hyperbolic tangent
|
||||
(`tanh`). If you pass None, no activation is applied
|
||||
(ie. "linear" activation: `a(x) = x`).
|
||||
recurrent_activation: Activation function to use for the recurrent step.
|
||||
Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
|
||||
applied (ie. "linear" activation: `a(x) = x`).
|
||||
use_bias: Boolean, whether the layer uses a bias vector.
|
||||
kernel_initializer: Initializer for the `kernel` weights matrix,
|
||||
used for the linear transformation of the inputs.
|
||||
recurrent_initializer: Initializer for the `recurrent_kernel`
|
||||
weights matrix, used for the linear transformation of the recurrent state.
|
||||
bias_initializer: Initializer for the bias vector.
|
||||
kernel_regularizer: Regularizer function applied to the `kernel` weights
|
||||
matrix.
|
||||
recurrent_regularizer: Regularizer function applied to the
|
||||
`recurrent_kernel` weights matrix.
|
||||
bias_regularizer: Regularizer function applied to the bias vector.
|
||||
kernel_constraint: Constraint function applied to the `kernel` weights
|
||||
matrix.
|
||||
recurrent_constraint: Constraint function applied to the `recurrent_kernel`
|
||||
weights matrix.
|
||||
bias_constraint: Constraint function applied to the bias vector.
|
||||
dropout: Float between 0 and 1. Fraction of the units to drop for the
|
||||
linear transformation of the inputs.
|
||||
recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
|
||||
the linear transformation of the recurrent state.
|
||||
implementation: Implementation mode, either 1 or 2.
|
||||
Mode 1 will structure its operations as a larger number of
|
||||
smaller dot products and additions, whereas mode 2 (default) will
|
||||
batch them into fewer, larger operations. These modes will
|
||||
have different performance profiles on different hardware and
|
||||
for different applications.
|
||||
reset_after: GRU convention (whether to apply reset gate after or
|
||||
before matrix multiplication). False = "before",
|
||||
True = "after" (default and CuDNN compatible).
|
||||
|
||||
Call arguments:
|
||||
inputs: A 2D tensor.
|
||||
states: List of state tensors corresponding to the previous timestep.
|
||||
training: Python boolean indicating whether the layer should behave in
|
||||
training mode or in inference mode. Only relevant when `dropout` or
|
||||
`recurrent_dropout` is used.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
units,
|
||||
activation='tanh',
|
||||
recurrent_activation='sigmoid',
|
||||
use_bias=True,
|
||||
kernel_initializer='glorot_uniform',
|
||||
recurrent_initializer='orthogonal',
|
||||
bias_initializer='zeros',
|
||||
kernel_regularizer=None,
|
||||
recurrent_regularizer=None,
|
||||
bias_regularizer=None,
|
||||
kernel_constraint=None,
|
||||
recurrent_constraint=None,
|
||||
bias_constraint=None,
|
||||
dropout=0.,
|
||||
recurrent_dropout=0.,
|
||||
implementation=2,
|
||||
reset_after=True,
|
||||
**kwargs):
|
||||
super(GRUCell, self).__init__(
|
||||
units,
|
||||
activation=activation,
|
||||
recurrent_activation=recurrent_activation,
|
||||
use_bias=use_bias,
|
||||
kernel_initializer=kernel_initializer,
|
||||
recurrent_initializer=recurrent_initializer,
|
||||
bias_initializer=bias_initializer,
|
||||
kernel_regularizer=kernel_regularizer,
|
||||
recurrent_regularizer=recurrent_regularizer,
|
||||
bias_regularizer=bias_regularizer,
|
||||
kernel_constraint=kernel_constraint,
|
||||
recurrent_constraint=recurrent_constraint,
|
||||
bias_constraint=bias_constraint,
|
||||
dropout=dropout,
|
||||
recurrent_dropout=recurrent_dropout,
|
||||
implementation=implementation,
|
||||
reset_after=reset_after,
|
||||
**kwargs)
|
||||
|
||||
|
||||
@keras_export('keras.layers.GRU', v1=[])
|
||||
class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU):
|
||||
"""Gated Recurrent Unit - Cho et al. 2014.
|
||||
@ -165,7 +255,7 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU):
|
||||
bias_constraint=None,
|
||||
dropout=0.,
|
||||
recurrent_dropout=0.,
|
||||
implementation=1,
|
||||
implementation=2,
|
||||
return_sequences=False,
|
||||
return_state=False,
|
||||
go_backwards=False,
|
||||
@ -467,6 +557,96 @@ def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major,
|
||||
return last_output, outputs, h, _runtime('cudnn')
|
||||
|
||||
|
||||
@keras_export('keras.layers.LSTMCell', v1=[])
|
||||
class LSTMCell(recurrent.LSTMCell):
|
||||
"""Cell class for the LSTM layer.
|
||||
|
||||
Arguments:
|
||||
units: Positive integer, dimensionality of the output space.
|
||||
activation: Activation function to use. Default: hyperbolic tangent
|
||||
(`tanh`). If you pass `None`, no activation is applied (ie. "linear"
|
||||
activation: `a(x) = x`).
|
||||
recurrent_activation: Activation function to use for the recurrent step.
|
||||
Default: sigmoid (`sigmoid`). If you pass `None`, no activation is applied
|
||||
(ie. "linear" activation: `a(x) = x`).
|
||||
use_bias: Boolean, whether the layer uses a bias vector.
|
||||
kernel_initializer: Initializer for the `kernel` weights matrix, used for
|
||||
the linear transformation of the inputs.
|
||||
recurrent_initializer: Initializer for the `recurrent_kernel` weights
|
||||
matrix, used for the linear transformation of the recurrent state.
|
||||
bias_initializer: Initializer for the bias vector.
|
||||
unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at
|
||||
initialization. Setting it to true will also force
|
||||
`bias_initializer="zeros"`. This is recommended in [Jozefowicz et
|
||||
al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
|
||||
kernel_regularizer: Regularizer function applied to the `kernel` weights
|
||||
matrix.
|
||||
recurrent_regularizer: Regularizer function applied to
|
||||
the `recurrent_kernel` weights matrix.
|
||||
bias_regularizer: Regularizer function applied to the bias vector.
|
||||
kernel_constraint: Constraint function applied to the `kernel` weights
|
||||
matrix.
|
||||
recurrent_constraint: Constraint function applied to the `recurrent_kernel`
|
||||
weights matrix.
|
||||
bias_constraint: Constraint function applied to the bias vector.
|
||||
dropout: Float between 0 and 1. Fraction of the units to drop for the linear
|
||||
transformation of the inputs.
|
||||
recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
|
||||
the linear transformation of the recurrent state.
|
||||
implementation: Implementation mode, either 1 or 2.
|
||||
Mode 1 will structure its operations as a larger number of smaller dot
|
||||
products and additions, whereas mode 2 (default) will batch them into
|
||||
fewer, larger operations. These modes will have different performance
|
||||
profiles on different hardware and for different applications.
|
||||
|
||||
Call arguments:
|
||||
inputs: A 2D tensor.
|
||||
states: List of state tensors corresponding to the previous timestep.
|
||||
training: Python boolean indicating whether the layer should behave in
|
||||
training mode or in inference mode. Only relevant when `dropout` or
|
||||
`recurrent_dropout` is used.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
units,
|
||||
activation='tanh',
|
||||
recurrent_activation='sigmoid',
|
||||
use_bias=True,
|
||||
kernel_initializer='glorot_uniform',
|
||||
recurrent_initializer='orthogonal',
|
||||
bias_initializer='zeros',
|
||||
unit_forget_bias=True,
|
||||
kernel_regularizer=None,
|
||||
recurrent_regularizer=None,
|
||||
bias_regularizer=None,
|
||||
kernel_constraint=None,
|
||||
recurrent_constraint=None,
|
||||
bias_constraint=None,
|
||||
dropout=0.,
|
||||
recurrent_dropout=0.,
|
||||
implementation=2,
|
||||
**kwargs):
|
||||
super(LSTMCell, self).__init__(
|
||||
units,
|
||||
activation=activation,
|
||||
recurrent_activation=recurrent_activation,
|
||||
use_bias=use_bias,
|
||||
kernel_initializer=kernel_initializer,
|
||||
recurrent_initializer=recurrent_initializer,
|
||||
bias_initializer=bias_initializer,
|
||||
unit_forget_bias=unit_forget_bias,
|
||||
kernel_regularizer=kernel_regularizer,
|
||||
recurrent_regularizer=recurrent_regularizer,
|
||||
bias_regularizer=bias_regularizer,
|
||||
kernel_constraint=kernel_constraint,
|
||||
recurrent_constraint=recurrent_constraint,
|
||||
bias_constraint=bias_constraint,
|
||||
dropout=dropout,
|
||||
recurrent_dropout=recurrent_dropout,
|
||||
implementation=implementation,
|
||||
**kwargs)
|
||||
|
||||
|
||||
@keras_export('keras.layers.LSTM', v1=[])
|
||||
class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM):
|
||||
"""Long Short-Term Memory layer - Hochreiter 1997.
|
||||
@ -569,7 +749,7 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM):
|
||||
bias_constraint=None,
|
||||
dropout=0.,
|
||||
recurrent_dropout=0.,
|
||||
implementation=1,
|
||||
implementation=2,
|
||||
return_sequences=False,
|
||||
return_state=False,
|
||||
go_backwards=False,
|
||||
|
@ -1,5 +1,6 @@
|
||||
path: "tensorflow.keras.layers.GRUCell"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.layers.recurrent_v2.GRUCell\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.layers.recurrent.GRUCell\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.layers.recurrent.DropoutRNNCellMixin\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
|
||||
@ -87,7 +88,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\'], "
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'2\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "add_loss"
|
||||
|
@ -161,7 +161,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'2\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "add_loss"
|
||||
|
@ -1,5 +1,6 @@
|
||||
path: "tensorflow.keras.layers.LSTMCell"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.layers.recurrent_v2.LSTMCell\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTMCell\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.layers.recurrent.DropoutRNNCellMixin\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
|
||||
@ -87,7 +88,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\'], "
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'2\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "add_loss"
|
||||
|
@ -161,7 +161,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'time_major\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
|
||||
argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'time_major\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'2\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "add_loss"
|
||||
|
Loading…
Reference in New Issue
Block a user