From f90408d3ab62fb2a1eadbdaff2e0461ab0ac2621 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 19 Aug 2021 18:33:32 +0200 Subject: [PATCH 1/3] Move early_training_checks to train function --- bin/run-ldc93s1.py | 7 ++----- notebooks/easy-transfer-learning.ipynb | 4 +--- notebooks/train-your-first-coqui-STT-model.ipynb | 4 +--- training/coqui_stt_training/train.py | 3 ++- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/bin/run-ldc93s1.py b/bin/run-ldc93s1.py index e266b7a4..5a2746d8 100755 --- a/bin/run-ldc93s1.py +++ b/bin/run-ldc93s1.py @@ -2,8 +2,7 @@ import os from import_ldc93s1 import _download_and_preprocess_data as download_ldc from coqui_stt_training.util.config import initialize_globals_from_args -from coqui_stt_training.train import train, test, early_training_checks -import tensorflow.compat.v1 as tfv1 +from coqui_stt_training.train import train, test # only one GPU for only one training sample os.environ["CUDA_VISIBLE_DEVICES"] = "0" @@ -21,8 +20,6 @@ initialize_globals_from_args( epochs=200, ) -early_training_checks() - train() -tfv1.reset_default_graph() + test() diff --git a/notebooks/easy-transfer-learning.ipynb b/notebooks/easy-transfer-learning.ipynb index b83f1f80..4631db82 100644 --- a/notebooks/easy-transfer-learning.ipynb +++ b/notebooks/easy-transfer-learning.ipynb @@ -195,13 +195,11 @@ }, "outputs": [], "source": [ - "from coqui_stt_training.train import train, early_training_checks\n", + "from coqui_stt_training.train import train\n", "\n", "# use maximum one GPU\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", - "early_training_checks()\n", - "\n", "train()" ] }, diff --git a/notebooks/train-your-first-coqui-STT-model.ipynb b/notebooks/train-your-first-coqui-STT-model.ipynb index 2009dfa0..bcb10d89 100644 --- a/notebooks/train-your-first-coqui-STT-model.ipynb +++ b/notebooks/train-your-first-coqui-STT-model.ipynb @@ -197,13 +197,11 @@ }, "outputs": [], "source": [ - "from coqui_stt_training.train import train, early_training_checks\n", + "from coqui_stt_training.train import train\n", "\n", "# use maximum one GPU\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", - "early_training_checks()\n", - "\n", "train()" ] }, diff --git a/training/coqui_stt_training/train.py b/training/coqui_stt_training/train.py index 6be4ef55..3d3b7177 100644 --- a/training/coqui_stt_training/train.py +++ b/training/coqui_stt_training/train.py @@ -522,6 +522,8 @@ def log_grads_and_vars(grads_and_vars): def train(): + early_training_checks() + tfv1.reset_default_graph() tfv1.set_random_seed(Config.random_seed) @@ -1266,7 +1268,6 @@ def early_training_checks(): def main(): initialize_globals_from_cli() - early_training_checks() if Config.train_files: train() From f9556d2236a406b70a0c0c181567d159788ff2c1 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 19 Aug 2021 18:33:48 +0200 Subject: [PATCH 2/3] Add comments marking nodes with names/shapes known by native client --- training/coqui_stt_training/train.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/training/coqui_stt_training/train.py b/training/coqui_stt_training/train.py index 3d3b7177..acfe15e2 100644 --- a/training/coqui_stt_training/train.py +++ b/training/coqui_stt_training/train.py @@ -912,17 +912,28 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): batch_size = batch_size if batch_size > 0 else None # Create feature computation graph + + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. input_samples = tfv1.placeholder( tf.float32, [Config.audio_window_samples], "input_samples" ) samples = tf.expand_dims(input_samples, -1) mfccs, _ = audio_to_features(samples, Config.audio_sample_rate) + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. mfccs = tf.identity(mfccs, name="mfccs") # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input] # This shape is read by the native_client in STT_CreateModel to know the # value of n_steps, n_context and n_input. Make sure you update the code # there if this shape is changed. + # + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. input_tensor = tfv1.placeholder( tf.float32, [ @@ -933,15 +944,24 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): ], name="input_node", ) + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. seq_length = tfv1.placeholder(tf.int32, [batch_size], name="input_lengths") if batch_size <= 0: # no state management since n_step is expected to be dynamic too (see below) previous_state = None else: + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. previous_state_c = tfv1.placeholder( tf.float32, [batch_size, Config.n_cell_dim], name="previous_state_c" ) + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. previous_state_h = tfv1.placeholder( tf.float32, [batch_size, Config.n_cell_dim], name="previous_state_h" ) @@ -971,6 +991,10 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): # TF Lite runtime will check that input dimensions are 1, 2 or 4 # by default we get 3, the middle one being batch_size which is forced to # one on inference graph, so remove that dimension + # + # native_client: this node's name and shape are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. if tflite: logits = tf.squeeze(logits, [1]) @@ -1045,6 +1069,9 @@ def export(): graph_version = int(file_relative_read("GRAPH_VERSION").strip()) assert graph_version > 0 + # native_client: these nodes's names and shapes are part of the API boundary + # with the native client, if you change them you should sync changes with + # the C++ code. outputs["metadata_version"] = tf.constant([graph_version], name="metadata_version") outputs["metadata_sample_rate"] = tf.constant( [Config.audio_sample_rate], name="metadata_sample_rate" From 4c3537952adc643232c83b976865e78315743041 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 19 Aug 2021 18:42:07 +0200 Subject: [PATCH 3/3] Fix lm_optimizer.py to use new Config/flags/logging setup --- lm_optimizer.py | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/lm_optimizer.py b/lm_optimizer.py index 85ca1fd5..ae919640 100644 --- a/lm_optimizer.py +++ b/lm_optimizer.py @@ -4,36 +4,36 @@ from __future__ import absolute_import, print_function import sys -import absl.app import optuna import tensorflow.compat.v1 as tfv1 from coqui_stt_ctcdecoder import Scorer from coqui_stt_training.evaluate import evaluate -from coqui_stt_training.train import create_model -from coqui_stt_training.util.config import Config, initialize_globals_from_cli +from coqui_stt_training.train import create_model, early_training_checks +from coqui_stt_training.util.config import ( + Config, + initialize_globals_from_cli, + log_error, +) from coqui_stt_training.util.evaluate_tools import wer_cer_batch -from coqui_stt_training.util.flags import FLAGS, create_flags -from coqui_stt_training.util.logging import log_error def character_based(): is_character_based = False - if FLAGS.scorer_path: - scorer = Scorer( - FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path, Config.alphabet - ) - is_character_based = scorer.is_utf8_mode() + scorer = Scorer( + Config.lm_alpha, Config.lm_beta, Config.scorer_path, Config.alphabet + ) + is_character_based = scorer.is_utf8_mode() return is_character_based def objective(trial): - FLAGS.lm_alpha = trial.suggest_uniform("lm_alpha", 0, FLAGS.lm_alpha_max) - FLAGS.lm_beta = trial.suggest_uniform("lm_beta", 0, FLAGS.lm_beta_max) + Config.lm_alpha = trial.suggest_uniform("lm_alpha", 0, Config.lm_alpha_max) + Config.lm_beta = trial.suggest_uniform("lm_beta", 0, Config.lm_beta_max) is_character_based = trial.study.user_attrs["is_character_based"] samples = [] - for step, test_file in enumerate(FLAGS.test_files.split(",")): + for step, test_file in enumerate(Config.test_files): tfv1.reset_default_graph() current_samples = evaluate([test_file], create_model) @@ -51,10 +51,18 @@ def objective(trial): return cer if is_character_based else wer -def main(_): +def main(): initialize_globals_from_cli() + early_training_checks() - if not FLAGS.test_files: + if not Config.scorer_path: + log_error( + "Missing --scorer_path: can't optimize scorer alpha and beta " + "parameters without a scorer!" + ) + sys.exit(1) + + if not Config.test_files: log_error( "You need to specify what files to use for evaluation via " "the --test_files flag." @@ -65,7 +73,7 @@ def main(_): study = optuna.create_study() study.set_user_attr("is_character_based", is_character_based) - study.optimize(objective, n_jobs=1, n_trials=FLAGS.n_trials) + study.optimize(objective, n_jobs=1, n_trials=Config.n_trials) print( "Best params: lm_alpha={} and lm_beta={} with WER={}".format( study.best_params["lm_alpha"], @@ -76,5 +84,4 @@ def main(_): if __name__ == "__main__": - create_flags() - absl.app.run(main) + main()