From c9e6cbc9581dc9e8e56187a68cbc772c96cfe214 Mon Sep 17 00:00:00 2001
From: Caleb Moses <caleb@dragonfly.co.nz>
Date: Wed, 18 Mar 2020 10:14:47 +1300
Subject: [PATCH 1/3] Add trial pruning to lm_optimizer.py

---
 DeepSpeech.py   |  6 +++---
 lm_optimizer.py | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/DeepSpeech.py b/DeepSpeech.py
index 92404e07..f3015aff 100644
--- a/DeepSpeech.py
+++ b/DeepSpeech.py
@@ -75,7 +75,7 @@ def create_overlapping_windows(batch_x):
 
 
 def dense(name, x, units, dropout_rate=None, relu=True):
-    with tfv1.variable_scope(name):
+    with tfv1.variable_scope(name, reuse=tf.AUTO_REUSE):
         bias = variable_on_cpu('bias', [units], tfv1.zeros_initializer())
         weights = variable_on_cpu('weights', [x.shape[-1], units], tfv1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
 
@@ -91,7 +91,7 @@ def dense(name, x, units, dropout_rate=None, relu=True):
 
 
 def rnn_impl_lstmblockfusedcell(x, seq_length, previous_state, reuse):
-    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell/cell_0'):
+    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell/cell_0', reuse=tf.AUTO_REUSE):
         fw_cell = tf.contrib.rnn.LSTMBlockFusedCell(Config.n_cell_dim,
                                                     forget_bias=0,
                                                     reuse=reuse,
@@ -133,7 +133,7 @@ rnn_impl_cudnn_rnn.cell = None
 
 
 def rnn_impl_static_rnn(x, seq_length, previous_state, reuse):
-    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell'):
+    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell', reuse=tf.AUTO_REUSE):
         # Forward direction cell:
         fw_cell = tfv1.nn.rnn_cell.LSTMCell(Config.n_cell_dim,
                                             forget_bias=0,
diff --git a/lm_optimizer.py b/lm_optimizer.py
index 9e01ab96..70d2c9db 100644
--- a/lm_optimizer.py
+++ b/lm_optimizer.py
@@ -29,10 +29,22 @@ def objective(trial):
     FLAGS.lm_beta = trial.suggest_uniform('lm_beta', 0, FLAGS.lm_beta_max)
 
     tfv1.reset_default_graph()
-    samples = evaluate(FLAGS.test_files.split(','), create_model)
 
     is_character_based = trial.study.user_attrs['is_character_based']
 
+    samples = []
+    for step, test_file in enumerate(FLAGS.test_files.split(',')):
+        current_samples = evaluate([test_file], create_model, try_loading)
+        samples += current_samples
+
+        # Report intermediate objective value.
+        wer, cer = wer_cer_batch(current_samples)
+        trial.report(cer if is_character_based else wer, step)
+
+        # Handle pruning based on the intermediate value.
+        if trial.should_prune():
+            raise optuna.exceptions.TrialPruned()
+
     wer, cer = wer_cer_batch(samples)
     return cer if is_character_based else wer
 

From 8e37a5cfb4a10a954bf933098d39e5aef05833e0 Mon Sep 17 00:00:00 2001
From: Caleb Moses <caleb@dragonfly.co.nz>
Date: Wed, 18 Mar 2020 10:38:51 +1300
Subject: [PATCH 2/3] Run reset_default_graph before every evaluate

---
 DeepSpeech.py   | 6 +++---
 lm_optimizer.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/DeepSpeech.py b/DeepSpeech.py
index f3015aff..92404e07 100644
--- a/DeepSpeech.py
+++ b/DeepSpeech.py
@@ -75,7 +75,7 @@ def create_overlapping_windows(batch_x):
 
 
 def dense(name, x, units, dropout_rate=None, relu=True):
-    with tfv1.variable_scope(name, reuse=tf.AUTO_REUSE):
+    with tfv1.variable_scope(name):
         bias = variable_on_cpu('bias', [units], tfv1.zeros_initializer())
         weights = variable_on_cpu('weights', [x.shape[-1], units], tfv1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"))
 
@@ -91,7 +91,7 @@ def dense(name, x, units, dropout_rate=None, relu=True):
 
 
 def rnn_impl_lstmblockfusedcell(x, seq_length, previous_state, reuse):
-    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell/cell_0', reuse=tf.AUTO_REUSE):
+    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell/cell_0'):
         fw_cell = tf.contrib.rnn.LSTMBlockFusedCell(Config.n_cell_dim,
                                                     forget_bias=0,
                                                     reuse=reuse,
@@ -133,7 +133,7 @@ rnn_impl_cudnn_rnn.cell = None
 
 
 def rnn_impl_static_rnn(x, seq_length, previous_state, reuse):
-    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell', reuse=tf.AUTO_REUSE):
+    with tfv1.variable_scope('cudnn_lstm/rnn/multi_rnn_cell'):
         # Forward direction cell:
         fw_cell = tfv1.nn.rnn_cell.LSTMCell(Config.n_cell_dim,
                                             forget_bias=0,
diff --git a/lm_optimizer.py b/lm_optimizer.py
index 70d2c9db..fd6c8d04 100644
--- a/lm_optimizer.py
+++ b/lm_optimizer.py
@@ -28,12 +28,12 @@ def objective(trial):
     FLAGS.lm_alpha = trial.suggest_uniform('lm_alpha', 0, FLAGS.lm_alpha_max)
     FLAGS.lm_beta = trial.suggest_uniform('lm_beta', 0, FLAGS.lm_beta_max)
 
-    tfv1.reset_default_graph()
-
     is_character_based = trial.study.user_attrs['is_character_based']
 
     samples = []
     for step, test_file in enumerate(FLAGS.test_files.split(',')):
+        tfv1.reset_default_graph()
+
         current_samples = evaluate([test_file], create_model, try_loading)
         samples += current_samples
 

From 7072daa05ceff1514cb287215fc4f419f102cdd0 Mon Sep 17 00:00:00 2001
From: Caleb Moses <calebjdmoses@gmail.com>
Date: Thu, 19 Mar 2020 10:54:22 +1300
Subject: [PATCH 3/3] Remove try_loading from evaluate call

---
 lm_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm_optimizer.py b/lm_optimizer.py
index fd6c8d04..b786bcdf 100644
--- a/lm_optimizer.py
+++ b/lm_optimizer.py
@@ -34,7 +34,7 @@ def objective(trial):
     for step, test_file in enumerate(FLAGS.test_files.split(',')):
         tfv1.reset_default_graph()
 
-        current_samples = evaluate([test_file], create_model, try_loading)
+        current_samples = evaluate([test_file], create_model)
         samples += current_samples
 
         # Report intermediate objective value.