From 3637f88c065588ab03ac91c483de8cbf2d092c57 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 29 Jan 2020 11:53:33 +0100 Subject: [PATCH] Fix CI errors, address comments, update examples --- DeepSpeech.py | 5 ----- .../deepspeech/libdeepspeech/test/BasicTest.java | 6 ------ .../deepspeech/libdeepspeech/DeepSpeechModel.java | 4 ++-- native_client/javascript/index.js | 15 ++------------- native_client/python/__init__.py | 5 ++--- native_client/tflitemodelstate.cc | 12 ++++++------ native_client/tfmodelstate.cc | 11 +++++------ taskcluster/examples-base.tyml | 4 ++-- taskcluster/win-opt-base.tyml | 2 +- util/flags.py | 2 +- 10 files changed, 21 insertions(+), 45 deletions(-) diff --git a/DeepSpeech.py b/DeepSpeech.py index 9421e7f0..d25c192b 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -876,11 +876,6 @@ def package_zip(): with open(os.path.join(export_dir, 'info.json'), 'w') as f: json.dump({ 'name': FLAGS.export_language, - 'parameters': { - 'beamWidth': FLAGS.export_beam_width, - 'lmAlpha': FLAGS.lm_alpha, - 'lmBeta': FLAGS.lm_beta - } }, f) shutil.copy(FLAGS.scorer_path, export_dir) diff --git a/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java b/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java index 60d21256..2957b2e7 100644 --- a/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java +++ b/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java @@ -33,8 +33,6 @@ public class BasicTest { public static final String scorerFile = "/data/local/tmp/test/kenlm.scorer"; public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav"; - public static final int BEAM_WIDTH = 50; - private char readLEChar(RandomAccessFile f) throws IOException { byte b1 = f.readByte(); byte b2 = f.readByte(); @@ -117,7 +115,6 @@ public class BasicTest { @Test public void loadDeepSpeech_stt_noLM() { DeepSpeechModel m = new DeepSpeechModel(modelFile); - m.setBeamWidth(BEAM_WIDTH); String decoded = doSTT(m, false); assertEquals("she had your dark suit in greasy wash water all year", decoded); @@ -127,7 +124,6 @@ public class BasicTest { @Test public void loadDeepSpeech_stt_withLM() { DeepSpeechModel m = new DeepSpeechModel(modelFile); - m.setBeamWidth(BEAM_WIDTH); m.enableExternalScorer(scorerFile); String decoded = doSTT(m, false); @@ -138,7 +134,6 @@ public class BasicTest { @Test public void loadDeepSpeech_sttWithMetadata_noLM() { DeepSpeechModel m = new DeepSpeechModel(modelFile); - m.setBeamWidth(BEAM_WIDTH); String decoded = doSTT(m, true); assertEquals("she had your dark suit in greasy wash water all year", decoded); @@ -148,7 +143,6 @@ public class BasicTest { @Test public void loadDeepSpeech_sttWithMetadata_withLM() { DeepSpeechModel m = new DeepSpeechModel(modelFile); - m.setBeamWidth(BEAM_WIDTH); m.enableExternalScorer(scorerFile); String decoded = doSTT(m, true); diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index 1c26e2f9..6d0a316b 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -33,7 +33,7 @@ public class DeepSpeechModel { * * @return Beam width value used by the model. */ - public int beamWidth() { + public long beamWidth() { return impl.GetModelBeamWidth(this._msp); } @@ -45,7 +45,7 @@ public class DeepSpeechModel { * * @return Zero on success, non-zero on failure. */ - public int setBeamWidth(int beamWidth) { + public int setBeamWidth(long beamWidth) { return impl.SetModelBeamWidth(this._msp, beamWidth); } diff --git a/native_client/javascript/index.js b/native_client/javascript/index.js index 38ecbf0a..58697033 100644 --- a/native_client/javascript/index.js +++ b/native_client/javascript/index.js @@ -42,9 +42,8 @@ function Model(aModelPath) { } /** - * Get beam width value used by the model. If {@link DS_SetModelBeamWidth} - * was not called before, will return the default value loaded from the - * model file. + * Get beam width value used by the model. If :js:func:Model.setBeamWidth was + * not called before, will return the default value loaded from the model file. * * @return {number} Beam width value used by the model. */ @@ -63,16 +62,6 @@ Model.prototype.setBeamWidth = function(aBeamWidth) { return binding.SetModelBeamWidth(this._impl, aBeamWidth); } -/** - * Return the sample rate expected by the model. - * - * @return {number} Sample rate. - */ -Model.prototype.beamWidth = function() { - return binding.GetModelBeamWidth(this._impl); -} - - /** * Return the sample rate expected by the model. * diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index 855a6eeb..960305be 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -45,9 +45,8 @@ class Model(object): def beamWidth(self): """ - Get beam width value used by the model. If {@link DS_SetModelBeamWidth} - was not called before, will return the default value loaded from the - model file. + Get beam width value used by the model. If setModelBeamWidth was not + called before, will return the default value loaded from the model file. :return: Beam width value used by the model. :type: int diff --git a/native_client/tflitemodelstate.cc b/native_client/tflitemodelstate.cc index 5e0c71f3..4836ed0b 100644 --- a/native_client/tflitemodelstate.cc +++ b/native_client/tflitemodelstate.cc @@ -128,16 +128,16 @@ TFLiteModelState::init(const char* model_path) int metadata_sample_rate_idx = get_output_tensor_by_name("metadata_sample_rate"); int metadata_feature_win_len_idx = get_output_tensor_by_name("metadata_feature_win_len"); int metadata_feature_win_step_idx = get_output_tensor_by_name("metadata_feature_win_step"); - int metadata_alphabet_idx = get_output_tensor_by_name("metadata_alphabet"); int metadata_beam_width_idx = get_output_tensor_by_name("metadata_beam_width"); + int metadata_alphabet_idx = get_output_tensor_by_name("metadata_alphabet"); std::vector metadata_exec_plan; metadata_exec_plan.push_back(find_parent_node_ids(metadata_version_idx)[0]); metadata_exec_plan.push_back(find_parent_node_ids(metadata_sample_rate_idx)[0]); metadata_exec_plan.push_back(find_parent_node_ids(metadata_feature_win_len_idx)[0]); metadata_exec_plan.push_back(find_parent_node_ids(metadata_feature_win_step_idx)[0]); - metadata_exec_plan.push_back(find_parent_node_ids(metadata_alphabet_idx)[0]); metadata_exec_plan.push_back(find_parent_node_ids(metadata_beam_width_idx)[0]); + metadata_exec_plan.push_back(find_parent_node_ids(metadata_alphabet_idx)[0]); for (int i = 0; i < metadata_exec_plan.size(); ++i) { assert(metadata_exec_plan[i] > -1); @@ -202,20 +202,20 @@ TFLiteModelState::init(const char* model_path) audio_win_len_ = sample_rate_ * (*win_len_ms / 1000.0); audio_win_step_ = sample_rate_ * (*win_step_ms / 1000.0); + int* const beam_width = interpreter_->typed_tensor(metadata_beam_width_idx); + beam_width_ = (unsigned int)(*beam_width); + tflite::StringRef serialized_alphabet = tflite::GetString(interpreter_->tensor(metadata_alphabet_idx), 0); err = alphabet_.deserialize(serialized_alphabet.str, serialized_alphabet.len); if (err != 0) { return DS_ERR_INVALID_ALPHABET; } - int* const beam_width = interpreter_->typed_tensor(metadata_beam_width_idx); - beam_width_ = (unsigned int)(*beam_width); - assert(sample_rate_ > 0); assert(audio_win_len_ > 0); assert(audio_win_step_ > 0); - assert(alphabet_.GetSize() > 0); assert(beam_width_ > 0); + assert(alphabet_.GetSize() > 0); TfLiteIntArray* dims_input_node = interpreter_->tensor(input_node_idx_)->dims; diff --git a/native_client/tfmodelstate.cc b/native_client/tfmodelstate.cc index ab7cc136..5b1e1675 100644 --- a/native_client/tfmodelstate.cc +++ b/native_client/tfmodelstate.cc @@ -102,8 +102,8 @@ TFModelState::init(const char* model_path) "metadata_sample_rate", "metadata_feature_win_len", "metadata_feature_win_step", - "metadata_alphabet", "metadata_beam_width", + "metadata_alphabet", }, {}, &metadata_outputs); if (!status.ok()) { std::cout << "Unable to fetch metadata: " << status << std::endl; @@ -115,21 +115,20 @@ TFModelState::init(const char* model_path) int win_step_ms = metadata_outputs[2].scalar()(); audio_win_len_ = sample_rate_ * (win_len_ms / 1000.0); audio_win_step_ = sample_rate_ * (win_step_ms / 1000.0); + int beam_width = metadata_outputs[3].scalar()(); + beam_width_ = (unsigned int)(beam_width); - string serialized_alphabet = metadata_outputs[3].scalar()(); + string serialized_alphabet = metadata_outputs[4].scalar()(); err = alphabet_.deserialize(serialized_alphabet.data(), serialized_alphabet.size()); if (err != 0) { return DS_ERR_INVALID_ALPHABET; } - int beam_width = metadata_outputs[4].scalar()(); - beam_width_ = (unsigned int)(beam_width); - assert(sample_rate_ > 0); assert(audio_win_len_ > 0); assert(audio_win_step_ > 0); - assert(alphabet_.GetSize() > 0); assert(beam_width_ > 0); + assert(alphabet_.GetSize() > 0); for (int i = 0; i < graph_def_.node_size(); ++i) { NodeDef node = graph_def_.node(i); diff --git a/taskcluster/examples-base.tyml b/taskcluster/examples-base.tyml index 2af1c1f1..381e9284 100644 --- a/taskcluster/examples-base.tyml +++ b/taskcluster/examples-base.tyml @@ -30,11 +30,11 @@ then: image: ${build.docker_image} env: - DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.1/models.tar.gz" + DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.1/models_beam_width.tar.gz" DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz" PIP_DEFAULT_TIMEOUT: "60" EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "4b97ac41d03ca0d23fa92526433db72a90f47d4a" + EXAMPLES_CHECKOUT_TARGET: "embedded-beam-width" command: - "/bin/bash" diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index 6bcc0acd..e892ec70 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -44,7 +44,7 @@ payload: MSYS: 'winsymlinks:nativestrict' TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow} EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "4b97ac41d03ca0d23fa92526433db72a90f47d4a" + EXAMPLES_CHECKOUT_TARGET: "embedded-beam-width" command: - >- diff --git a/util/flags.py b/util/flags.py index a465c9fc..9f31aae4 100644 --- a/util/flags.py +++ b/util/flags.py @@ -111,7 +111,7 @@ def create_flags(): f.DEFINE_string('export_language', '', 'language the model was trained on e.g. "en" or "English". Gets embedded into exported model.') f.DEFINE_boolean('export_zip', False, 'export a TFLite model and package with LM and info.json') f.DEFINE_string('export_name', 'output_graph', 'name for the export model') - f.DEFINE_string('export_beam_width', 500, 'default beam width to embed into exported graph') + f.DEFINE_integer('export_beam_width', 500, 'default beam width to embed into exported graph') # Reporting