From fd3fbcaa780cb9dd568a78d603b569cee2ef496e Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 19 Jul 2019 11:07:58 +0200 Subject: [PATCH] Address review comments --- DeepSpeech.py | 44 +++++++++++++++++--------------------------- util/flags.py | 2 +- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/DeepSpeech.py b/DeepSpeech.py index 09f9acdd..8492fa66 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -79,7 +79,7 @@ def dense(name, x, units, dropout_rate=None, relu=True): def rnn_impl_lstmblockfusedcell(x, seq_length, previous_state, reuse): - with tf.variable_scope('cudnn_lstm/rnn/multi_rnn_cell/cell_0'): + with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell/cell_0'): fw_cell = tf.contrib.rnn.LSTMBlockFusedCell(Config.n_cell_dim, reuse=reuse, name='cudnn_compatible_lstm_cell') @@ -95,14 +95,20 @@ def rnn_impl_lstmblockfusedcell(x, seq_length, previous_state, reuse): def rnn_impl_cudnn_rnn(x, seq_length, previous_state, _): assert previous_state is None # 'Passing previous state not supported with CuDNN backend' - # Forward direction cell: + # Hack: CudnnLSTM works similarly to Keras layers in that when you instantiate + # the object it creates the variables, and then you just call it several times + # to enable variable re-use. Because all of our code is structure in an old + # school TensorFlow structure where you can just call tf.get_variable again with + # reuse=True to reuse variables, we can't easily make use of the object oriented + # way CudnnLSTM is implemented, so we save a singleton instance in the function, + # emulating a static function variable. if not rnn_impl_cudnn_rnn.cell: - with tf.variable_scope('rnn'): - fw_cell = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers=1, - num_units=Config.n_cell_dim, - input_mode='linear_input', - direction='unidirectional', - dtype=tf.float32) + # Forward direction cell: + fw_cell = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers=1, + num_units=Config.n_cell_dim, + input_mode='linear_input', + direction='unidirectional', + dtype=tf.float32) rnn_impl_cudnn_rnn.cell = fw_cell output, output_state = rnn_impl_cudnn_rnn.cell(inputs=x, @@ -110,18 +116,11 @@ def rnn_impl_cudnn_rnn(x, seq_length, previous_state, _): return output, output_state -# Hack: CudnnLSTM works similarly to Keras layers in that when you instantiate -# the object it creates the variables, and then you just call it several times -# to enable variable re-use. Because all of our code is structure in an old -# school TensorFlow structure where you can just call tf.get_variable again with -# reuse=True to reuse variables, we can't easily make use of the object oriented -# way CudnnLSTM is implemented, so we save a singleton instance in the function, -# emulating a static function variable. rnn_impl_cudnn_rnn.cell = None def rnn_impl_static_rnn(x, seq_length, previous_state, reuse): - with tf.variable_scope('cudnn_lstm/rnn/multi_rnn_cell'): + with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell'): # Forward direction cell: fw_cell = tfv1.nn.rnn_cell.LSTMCell(Config.n_cell_dim, reuse=reuse, @@ -611,7 +610,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): if batch_size <= 0: # no state management since n_step is expected to be dynamic too (see below) - previous_states = None + previous_state = None else: previous_state_c = tfv1.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_c') previous_state_h = tfv1.placeholder(tf.float32, [batch_size, Config.n_cell_dim], name='previous_state_h') @@ -698,16 +697,7 @@ def export(): output_names = ",".join(output_names_tensors + output_names_ops) # Create a saver using variables from the above newly created graph - # Training graph uses LSTMFusedCell, but the TFLite inference graph uses - # a static RNN with a normal cell, so we need to rewrite the names to - # match the training weights when restoring. - def fixup(name): - if name.startswith('rnn/lstm_cell/'): - return name.replace('rnn/lstm_cell/', 'rnn/cudnn_compatible_lstm_cell/') - return name - - mapping = {fixup(v.op.name): v for v in tf.global_variables()} - saver = tfv1.train.Saver(mapping) + saver = tfv1.train.Saver() # Restore variables from training checkpoint checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) diff --git a/util/flags.py b/util/flags.py index 997c8f30..923b4471 100644 --- a/util/flags.py +++ b/util/flags.py @@ -55,7 +55,7 @@ def create_flags(): f.DEFINE_integer('inter_op_parallelism_threads', 0, 'number of inter-op parallelism threads - see tf.ConfigProto for more details. USE OF THIS FLAG IS UNSUPPORTED') f.DEFINE_integer('intra_op_parallelism_threads', 0, 'number of intra-op parallelism threads - see tf.ConfigProto for more details. USE OF THIS FLAG IS UNSUPPORTED') - f.DEFINE_boolean('use_cudnn_rnn', False, 'use CuDNN RNN backend for training on GPU') + f.DEFINE_boolean('use_cudnn_rnn', False, 'use CuDNN RNN backend for training on GPU. Note that checkpoints created with this flag can only be used with CuDNN RNN, i.e. fine tuning on a CPU device will not work') # Sample limits