From b51d81f87f5de3c26b2db59ae6ec6b5f963acd7d Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 10 Dec 2018 12:42:26 -0800 Subject: [PATCH] Update the default activation function for unified LSTM to 'sigmoid'. I believe for historical reason, the activation function for LSTM is hard_sigmoid because it is faster compare to sigmoid. With the new LSTM, the performance issue should be fixed with grappler swapping the backend. PiperOrigin-RevId: 224863406 --- tensorflow/python/keras/layers/recurrent.py | 17 ++++++------ .../python/keras/layers/unified_lstm_test.py | 27 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index 86a69e45d90..fb4c1736b18 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -2546,13 +2546,11 @@ class UnifiedLSTM(LSTM): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation - is applied - (ie. "linear" activation: `a(x) = x`). + Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation + is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. - Default: hard sigmoid (`hard_sigmoid`). If you pass `None`, no - activation is applied - (ie. "linear" activation: `a(x) = x`). + Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs.. @@ -2602,7 +2600,7 @@ class UnifiedLSTM(LSTM): def __init__(self, units, activation='tanh', - recurrent_activation='hard_sigmoid', + recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', @@ -2663,8 +2661,9 @@ class UnifiedLSTM(LSTM): self._num_inputs = None self._dropout_mask = None self.could_use_cudnn = ( - activation == 'tanh' and recurrent_dropout == 0 and - not unroll and use_bias and bias_regularizer is None) + activation == 'tanh' and recurrent_activation == 'sigmoid' and + recurrent_dropout == 0 and not unroll and use_bias and + bias_regularizer is None) def call(self, inputs, mask=None, training=None, initial_state=None): # LSTM does not support constants. Ignore it during process. diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py index 932b2d331dc..a2b523b00e0 100644 --- a/tensorflow/python/keras/layers/unified_lstm_test.py +++ b/tensorflow/python/keras/layers/unified_lstm_test.py @@ -161,17 +161,20 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase): existing_loss = loss_value @parameterized.named_parameters( - ('_non_tan_activation', 'relu', 0, False, True, None), - ('_use_recurrent_dropout', 'tanh', 0.1, False, True, None), - ('_unroll', 'tanh', 0, True, True, None), - ('_not_use_bias', 'tanh', 0, False, False, None), - ('_use_bias_regularizer', 'tanh', 0, False, True, 'l2') + ('non_tan_activation', 'relu', 'sigmoid', 0, False, True, None), + ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True, None), + ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True, None), + ('unroll', 'tanh', 'sigmoid', 0, True, True, None), + ('not_use_bias', 'tanh', 'sigmoid', 0, False, False, None), + ('use_bias_regularizer', 'tanh', 'sigmoid', 0, False, True, 'l2') ) @test_util.run_in_graph_and_eager_modes(config=_config) - def test_could_use_defun_backend(self, activation, recurrent_dropout, - unroll, use_bias, bias_regularizer): + def test_could_use_defun_backend(self, activation, recurrent_activation, + recurrent_dropout, unroll, use_bias, + bias_regularizer): layer = UnifiedLSTM(1, activation=activation, + recurrent_activation=recurrent_activation, recurrent_dropout=recurrent_dropout, unroll=unroll, use_bias=use_bias, @@ -270,22 +273,22 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase): inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=dtypes.float32) with test_util.device(use_gpu=False): - # Note that CuDNN use 'sigmoid' as activation. Force the CPU - # implementation to use 'sigmoid' so that it will generate same output as - # CuDNN implementation. - layer = UnifiedLSTM(rnn_state_size, recurrent_activation='sigmoid') + layer = UnifiedLSTM(rnn_state_size) output = layer(inputs) cpu_model = keras.models.Model(inputs, output) weights = cpu_model.get_weights() y_1 = cpu_model.predict(x_train) with test_util.device(use_gpu=True): - layer = UnifiedLSTM(rnn_state_size, recurrent_activation='sigmoid') + layer = UnifiedLSTM(rnn_state_size) output = layer(inputs) gpu_model = keras.models.Model(inputs, output) gpu_model.set_weights(weights) y_2 = gpu_model.predict(x_train) + # Note that CuDNN uses 'sigmoid' as activation, so the unified LSTM uses + # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve + # the same output. with test_util.device(use_gpu=True): layer = keras.layers.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = layer(inputs)