diff --git a/.pylintrc b/.pylintrc index 4f3ec3ab..98a88b4f 100644 --- a/.pylintrc +++ b/.pylintrc @@ -7,7 +7,7 @@ extension-pkg-whitelist= # Add files or directories to the blacklist. They should be base names, not # paths. -ignore=CVS +ignore=examples # Add files or directories matching the regex patterns to the blacklist. The # regex matches against base names, not paths. diff --git a/README.rst b/README.rst index af4b202d..1e53b5f2 100644 --- a/README.rst +++ b/README.rst @@ -34,7 +34,7 @@ To install and use deepspeech all you have to do is: tar xvf audio-0.5.1.tar.gz # Transcribe an audio file - deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav A pre-trained English model is available for use and can be downloaded using `the instructions below `_. Currently, only 16-bit, 16 kHz, mono-channel WAVE audio files are supported in the Python client. A package with some example audio files is available for download in our `release notes `_. @@ -50,7 +50,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th pip3 install deepspeech-gpu # Transcribe an audio file. - deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav Please ensure you have the required `CUDA dependencies `_. diff --git a/USING.rst b/USING.rst index 4e0ef1ba..10aca873 100644 --- a/USING.rst +++ b/USING.rst @@ -105,7 +105,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my_audio_file.wav + deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio my_audio_file.wav The arguments ``--lm`` and ``--trie`` are optional, and represent a language model. @@ -159,7 +159,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - ./deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio_input.wav + ./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav See the help output with ``./deepspeech -h`` and the `native client README `_ for more details. diff --git a/bin/benchmark_nc.py b/bin/benchmark_nc.py index ea2d57e3..eeac928a 100755 --- a/bin/benchmark_nc.py +++ b/bin/benchmark_nc.py @@ -239,7 +239,7 @@ def delete_tree(dir): except IOError: print('No remote directory: %s' % dir) -def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries): +def setup_tempdir(dir, models, wav, lm_binary, trie, binaries): r''' Copy models, libs and binary to a directory (new one if dir is None) ''' @@ -268,7 +268,7 @@ def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries): print('Copying %s to %s' % (f, dir)) shutil.copy2(f, dir) - for extra_file in [ wav, alphabet, lm_binary, trie ]: + for extra_file in [ wav, lm_binary, trie ]: if extra_file and not os.path.isfile(os.path.join(dir, os.path.basename(extra_file))): print('Copying %s to %s' % (extra_file, dir)) shutil.copy2(extra_file, dir) @@ -375,10 +375,10 @@ def establish_ssh(target=None, auto_trust=False, allow_agent=True, look_keys=Tru return ssh_conn -def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1): +def run_benchmarks(dir, models, wav, lm_binary=None, trie=None, iters=-1): r''' Core of the running of the benchmarks. We will run on all of models, against - the WAV file provided as wav, and the provided alphabet. + the WAV file provided as wav. ''' assert_valid_dir(dir) @@ -396,9 +396,9 @@ def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters= } if lm_binary and trie: - cmdline = './deepspeech --model "%s" --alphabet "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, alphabet, lm_binary, trie, wav) + cmdline = './deepspeech --model "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, lm_binary, trie, wav) else: - cmdline = './deepspeech --model "%s" --alphabet "%s" --audio "%s" -t' % (model_filename, alphabet, wav) + cmdline = './deepspeech --model "%s" --audio "%s" -t' % (model_filename, wav) for it in range(iters): sys.stdout.write('\rRunning %s: %d/%d' % (os.path.basename(model), (it+1), iters)) @@ -453,8 +453,6 @@ def handle_args(): help='List of files (protocolbuffer) to work on. Might be a zip file.') parser.add_argument('--wav', required=False, help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.') - parser.add_argument('--alphabet', required=False, - help='Text file to pass to native_client for the alphabet.') parser.add_argument('--lm_binary', required=False, help='Path to the LM binary file used by the decoder.') parser.add_argument('--trie', required=False, @@ -472,8 +470,8 @@ def handle_args(): def do_main(): cli_args = handle_args() - if not cli_args.models or not cli_args.wav or not cli_args.alphabet: - raise AssertionError('Missing arguments (models, wav or alphabet)') + if not cli_args.models or not cli_args.wav: + raise AssertionError('Missing arguments (models or wav)') if cli_args.dir is not None and not os.path.isdir(cli_args.dir): raise AssertionError('Inexistent temp directory') @@ -484,18 +482,17 @@ def do_main(): global ssh_conn ssh_conn = establish_ssh(target=cli_args.target, auto_trust=cli_args.autotrust, allow_agent=cli_args.allowagent, look_keys=cli_args.lookforkeys) - tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, alphabet=cli_args.alphabet, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries) + tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries) dest_sorted_models = list(map(lambda x: os.path.join(tempdir, os.path.basename(x)), sorted_models)) dest_wav = os.path.join(tempdir, os.path.basename(cli_args.wav)) - dest_alphabet = os.path.join(tempdir, os.path.basename(cli_args.alphabet)) if cli_args.lm_binary and cli_args.trie: dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary)) dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie)) - inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters) + inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters) else: - inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters) + inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, iters=cli_args.iters) if cli_args.csv: produce_csv(input=inference_times, output=cli_args.csv) diff --git a/evaluate_tflite.py b/evaluate_tflite.py index ddbfbc49..44d50b83 100644 --- a/evaluate_tflite.py +++ b/evaluate_tflite.py @@ -23,16 +23,16 @@ This module should be self-contained: - pip install native_client/python/dist/deepspeech*.whl - pip install -r requirements_eval_tflite.txt -Then run with a TF Lite model, alphabet, LM/trie and a CSV test file +Then run with a TF Lite model, LM/trie and a CSV test file ''' BEAM_WIDTH = 500 LM_ALPHA = 0.75 LM_BETA = 1.85 -def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask): +def tflite_worker(model, lm, trie, queue_in, queue_out, gpu_mask): os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask) - ds = Model(model, alphabet, BEAM_WIDTH) + ds = Model(model, BEAM_WIDTH) ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA) while True: @@ -58,8 +58,6 @@ def main(): parser = argparse.ArgumentParser(description='Computing TFLite accuracy') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') - parser.add_argument('--alphabet', required=True, - help='Path to the configuration file specifying the alphabet used by the network') parser.add_argument('--lm', required=True, help='Path to the language model binary file') parser.add_argument('--trie', required=True, @@ -78,7 +76,7 @@ def main(): processes = [] for i in range(args.proc): - worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) + worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() # Launch reader() as a separate python process processes.append(worker_process) diff --git a/examples/ffmpeg_vad_streaming/README.MD b/examples/ffmpeg_vad_streaming/README.MD index 2027f4f2..7c1de91a 100644 --- a/examples/ffmpeg_vad_streaming/README.MD +++ b/examples/ffmpeg_vad_streaming/README.MD @@ -22,14 +22,12 @@ Here is an example for a local audio file: ```bash node ./index.js --audio \ --model $HOME/models/output_graph.pbmm \ - --alphabet $HOME/models/alphabet.txt ``` Here is an example for a remote RTMP-Stream: ```bash node ./index.js --audio rtmp://:1935/live/teststream \ --model $HOME/models/output_graph.pbmm \ - --alphabet $HOME/models/alphabet.txt ``` ## Examples @@ -39,21 +37,18 @@ node ./index.js --audio $HOME/audio/2830-3980-0043.wav \ --lm $HOME/models/lm.binary \ --trie $HOME/models/trie \ --model $HOME/models/output_graph.pbmm \ - --alphabet $HOME/models/alphabet.txt ``` ```bash node ./index.js --audio $HOME/audio/4507-16021-0012.wav \ --lm $HOME/models/lm.binary \ --trie $HOME/models/trie \ --model $HOME/models/output_graph.pbmm \ - --alphabet $HOME/models/alphabet.txt ``` ```bash node ./index.js --audio $HOME/audio/8455-210777-0068.wav \ --lm $HOME/models/lm.binary \ --trie $HOME/models/trie \ --model $HOME/models/output_graph.pbmm \ - --alphabet $HOME/models/alphabet.txt ``` Real time streaming inference in combination with a RTMP server. ```bash @@ -61,7 +56,6 @@ node ./index.js --audio rtmp://// \ --lm $HOME/models/lm.binary \ --trie $HOME/models/trie \ --model $HOME/models/output_graph.pbmm \ - --alphabet $HOME/models/alphabet.txt ``` ## Notes diff --git a/examples/ffmpeg_vad_streaming/index.js b/examples/ffmpeg_vad_streaming/index.js index d64cc9f8..05d5b49b 100644 --- a/examples/ffmpeg_vad_streaming/index.js +++ b/examples/ffmpeg_vad_streaming/index.js @@ -32,7 +32,6 @@ VersionAction.prototype.call = function(parser) { let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); -parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'}); parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'}); parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'}); parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'}); @@ -45,7 +44,7 @@ function totalTime(hrtimeValue) { console.error('Loading model from file %s', args['model']); const model_load_start = process.hrtime(); -let model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH); +let model = new Ds.Model(args['model'], BEAM_WIDTH); const model_load_end = process.hrtime(model_load_start); console.error('Loaded model in %ds.', totalTime(model_load_end)); diff --git a/examples/ffmpeg_vad_streaming/test.sh b/examples/ffmpeg_vad_streaming/test.sh index 13fefb7c..3966ca96 100755 --- a/examples/ffmpeg_vad_streaming/test.sh +++ b/examples/ffmpeg_vad_streaming/test.sh @@ -13,18 +13,15 @@ pushd ${THIS} node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \ --lm $HOME/DeepSpeech/models/lm.binary \ --trie $HOME/DeepSpeech/models/trie \ - --model $HOME/DeepSpeech/models/output_graph.pbmm \ - --alphabet $HOME/DeepSpeech/models/alphabet.txt + --model $HOME/DeepSpeech/models/output_graph.pbmm node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \ --lm $HOME/DeepSpeech/models/lm.binary \ --trie $HOME/DeepSpeech/models/trie \ - --model $HOME/DeepSpeech/models/output_graph.pbmm \ - --alphabet $HOME/DeepSpeech/models/alphabet.txt + --model $HOME/DeepSpeech/models/output_graph.pbmm node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \ --lm $HOME/DeepSpeech/models/lm.binary \ --trie $HOME/DeepSpeech/models/trie \ - --model $HOME/DeepSpeech/models/output_graph.pbmm \ - --alphabet $HOME/DeepSpeech/models/alphabet.txt + --model $HOME/DeepSpeech/models/output_graph.pbmm popd diff --git a/examples/mic_vad_streaming/README.rst b/examples/mic_vad_streaming/README.rst index 4f8b6aa0..4eace37d 100644 --- a/examples/mic_vad_streaming/README.rst +++ b/examples/mic_vad_streaming/README.rst @@ -29,7 +29,7 @@ Usage .. code-block:: usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner] - [-w SAVEWAV] -m MODEL [-a ALPHABET] [-l LM] + [-w SAVEWAV] -m MODEL [-l LM] [-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT] [-la LM_ALPHA] [-lb LM_BETA] [-bw BEAM_WIDTH] @@ -49,9 +49,6 @@ Usage Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model) - -a ALPHABET, --alphabet ALPHABET - Path to the configuration file specifying the alphabet - used by the network. Default: alphabet.txt -l LM, --lm LM Path to the language model binary file. Default: lm.binary -t TRIE, --trie TRIE Path to the language model trie file created with diff --git a/examples/mic_vad_streaming/mic_vad_streaming.py b/examples/mic_vad_streaming/mic_vad_streaming.py index 885ade83..6933c0dd 100755 --- a/examples/mic_vad_streaming/mic_vad_streaming.py +++ b/examples/mic_vad_streaming/mic_vad_streaming.py @@ -156,14 +156,12 @@ def main(ARGS): if os.path.isdir(ARGS.model): model_dir = ARGS.model ARGS.model = os.path.join(model_dir, 'output_graph.pb') - ARGS.alphabet = os.path.join(model_dir, ARGS.alphabet if ARGS.alphabet else 'alphabet.txt') ARGS.lm = os.path.join(model_dir, ARGS.lm) ARGS.trie = os.path.join(model_dir, ARGS.trie) print('Initializing model...') logging.info("ARGS.model: %s", ARGS.model) - logging.info("ARGS.alphabet: %s", ARGS.alphabet) - model = deepspeech.Model(ARGS.model, ARGS.alphabet, ARGS.beam_width) + model = deepspeech.Model(ARGS.model, ARGS.beam_width) if ARGS.lm and ARGS.trie: logging.info("ARGS.lm: %s", ARGS.lm) logging.info("ARGS.trie: %s", ARGS.trie) @@ -219,8 +217,6 @@ if __name__ == '__main__': parser.add_argument('-m', '--model', required=True, help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)") - parser.add_argument('-a', '--alphabet', default='alphabet.txt', - help="Path to the configuration file specifying the alphabet used by the network. Default: alphabet.txt") parser.add_argument('-l', '--lm', default='lm.binary', help="Path to the language model binary file. Default: lm.binary") parser.add_argument('-t', '--trie', default='trie', diff --git a/examples/mic_vad_streaming/test.sh b/examples/mic_vad_streaming/test.sh index e35c7e5f..5359d68e 100755 --- a/examples/mic_vad_streaming/test.sh +++ b/examples/mic_vad_streaming/test.sh @@ -14,7 +14,6 @@ pushd ${THIS} python mic_vad_streaming.py \ --model $HOME/DeepSpeech/models/output_graph.pbmm \ - --alphabet $HOME/DeepSpeech/models/alphabet.txt \ --lm $HOME/DeepSpeech/models/lm.binary \ --trie $HOME/DeepSpeech/models/trie \ --file $HOME/DeepSpeech/audio/2830-3980-0043.wav diff --git a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs index 31b1f9d4..6667cdf8 100644 --- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs +++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs @@ -77,7 +77,7 @@ namespace DeepSpeechWPF { try { - _sttClient.CreateModel("output_graph.pbmm", "alphabet.txt", BEAM_WIDTH); + _sttClient.CreateModel("output_graph.pbmm", BEAM_WIDTH); Dispatcher.Invoke(() => { EnableControls(); }); } catch (Exception ex) diff --git a/examples/nodejs_wav/Readme.md b/examples/nodejs_wav/Readme.md index 8dce9a5c..ddd63736 100644 --- a/examples/nodejs_wav/Readme.md +++ b/examples/nodejs_wav/Readme.md @@ -11,7 +11,6 @@ Edit references to models path if necessary: ``` let modelPath = './models/output_graph.pbmm'; -let alphabetPath = './models/alphabet.txt'; let lmPath = './models/lm.binary'; let triePath = './models/trie'; ``` diff --git a/examples/nodejs_wav/index.js b/examples/nodejs_wav/index.js index 7883a010..b82063db 100644 --- a/examples/nodejs_wav/index.js +++ b/examples/nodejs_wav/index.js @@ -7,9 +7,8 @@ const Wav = require('node-wav'); const BEAM_WIDTH = 1024; let modelPath = './models/output_graph.pbmm'; -let alphabetPath = './models/alphabet.txt'; -let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH); +let model = new DeepSpeech.Model(modelPath, BEAM_WIDTH); let desiredSampleRate = model.sampleRate(); diff --git a/examples/vad_transcriber/audioTranscript_cmd.py b/examples/vad_transcriber/audioTranscript_cmd.py index 36c71f68..d6eeb59f 100644 --- a/examples/vad_transcriber/audioTranscript_cmd.py +++ b/examples/vad_transcriber/audioTranscript_cmd.py @@ -18,7 +18,7 @@ def main(args): parser.add_argument('--audio', required=False, help='Path to the audio file to run (WAV format)') parser.add_argument('--model', required=True, - help='Path to directory that contains all model files (output_graph, lm, trie and alphabet)') + help='Path to directory that contains all model files (output_graph, lm and trie)') parser.add_argument('--stream', required=False, action='store_true', help='To use deepspeech streaming interface') args = parser.parse_args() @@ -34,10 +34,10 @@ def main(args): dirName = os.path.expanduser(args.model) # Resolve all the paths of model files - output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName) + output_graph, lm, trie = wavTranscriber.resolve_models(dirName) # Load output_graph, alpahbet, lm and trie - model_retval = wavTranscriber.load_model(output_graph, alphabet, lm, trie) + model_retval = wavTranscriber.load_model(output_graph, lm, trie) if args.audio is not None: title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)'] diff --git a/examples/vad_transcriber/audioTranscript_gui.py b/examples/vad_transcriber/audioTranscript_gui.py index 30fbb0ad..a050b2a6 100644 --- a/examples/vad_transcriber/audioTranscript_gui.py +++ b/examples/vad_transcriber/audioTranscript_gui.py @@ -109,7 +109,7 @@ class App(QMainWindow): self.microphone = QRadioButton("Microphone") self.fileUpload = QRadioButton("File Upload") self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian") - self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, alphabet, lm & trie") + self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, lm & trie") self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription") self.browseButton = QPushButton('Browse', self) self.browseButton.setToolTip('Select a wav file') @@ -238,9 +238,9 @@ class App(QMainWindow): def modelResult(self, dirName): # Fetch and Resolve all the paths of model files - output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName) + output_graph, lm, trie = wavTranscriber.resolve_models(dirName) # Load output_graph, alpahbet, lm and trie - self.model = wavTranscriber.load_model(output_graph, alphabet, lm, trie) + self.model = wavTranscriber.load_model(output_graph, lm, trie) def modelFinish(self): # self.timer.stop() diff --git a/examples/vad_transcriber/wavTranscriber.py b/examples/vad_transcriber/wavTranscriber.py index 727dc5cf..dd1f407a 100644 --- a/examples/vad_transcriber/wavTranscriber.py +++ b/examples/vad_transcriber/wavTranscriber.py @@ -8,20 +8,19 @@ from timeit import default_timer as timer ''' Load the pre-trained model into the memory @param models: Output Grapgh Protocol Buffer file -@param alphabet: Alphabet.txt file @param lm: Language model file @param trie: Trie file @Retval Returns a list [DeepSpeech Object, Model Load Time, LM Load Time] ''' -def load_model(models, alphabet, lm, trie): +def load_model(models, lm, trie): BEAM_WIDTH = 500 LM_ALPHA = 0.75 LM_BETA = 1.85 model_load_start = timer() - ds = Model(models, alphabet, BEAM_WIDTH) + ds = Model(models, BEAM_WIDTH) model_load_end = timer() - model_load_start logging.debug("Loaded model in %0.3fs." % (model_load_end)) @@ -61,21 +60,18 @@ Resolve directory path for the models and fetch each of them. @param dirName: Path to the directory containing pre-trained models @Retval: -Retunns a tuple containing each of the model files (pb, alphabet, lm and trie) +Retunns a tuple containing each of the model files (pb, lm and trie) ''' def resolve_models(dirName): pb = glob.glob(dirName + "/*.pb")[0] logging.debug("Found Model: %s" % pb) - alphabet = glob.glob(dirName + "/alphabet.txt")[0] - logging.debug("Found Alphabet: %s" % alphabet) - lm = glob.glob(dirName + "/lm.binary")[0] trie = glob.glob(dirName + "/trie")[0] logging.debug("Found Language Model: %s" % lm) logging.debug("Found Trie: %s" % trie) - return pb, alphabet, lm, trie + return pb, lm, trie ''' Generate VAD segments. Filters out non-voiced audio frames. diff --git a/native_client/args.h b/native_client/args.h index 549f6419..6342763f 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -12,8 +12,6 @@ char* model = NULL; -char* alphabet = NULL; - char* lm = NULL; char* trie = NULL; @@ -41,12 +39,11 @@ int stream_size = 0; void PrintHelp(const char* bin) { std::cout << - "Usage: " << bin << " --model MODEL --alphabet ALPHABET [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n" + "Usage: " << bin << " --model MODEL [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n" "\n" "Running DeepSpeech inference.\n" "\n" " --model MODEL Path to the model (protocol buffer binary file)\n" - " --alphabet ALPHABET Path to the configuration file specifying the alphabet used by the network\n" " --lm LM Path to the language model binary file\n" " --trie TRIE Path to the language model trie file created with native_client/generate_trie\n" " --audio AUDIO Path to the audio file to run (WAV format)\n" @@ -68,7 +65,6 @@ bool ProcessArgs(int argc, char** argv) const char* const short_opts = "m:a:l:r:w:c:d:b:tehv"; const option long_opts[] = { {"model", required_argument, nullptr, 'm'}, - {"alphabet", required_argument, nullptr, 'a'}, {"lm", required_argument, nullptr, 'l'}, {"trie", required_argument, nullptr, 'r'}, {"audio", required_argument, nullptr, 'w'}, @@ -98,10 +94,6 @@ bool ProcessArgs(int argc, char** argv) model = optarg; break; - case 'a': - alphabet = optarg; - break; - case 'l': lm = optarg; break; @@ -163,7 +155,7 @@ bool ProcessArgs(int argc, char** argv) return false; } - if (!model || !alphabet || !audio) { + if (!model || !audio) { PrintHelp(argv[0]); return false; } diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs index 9bbf5e3c..5dbaf3fb 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs @@ -29,36 +29,26 @@ namespace DeepSpeechClient /// Create an object providing an interface to a trained DeepSpeech model. /// /// The path to the frozen model graph. - /// The path to the configuration file specifying the alphabet used by the network. /// The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time. /// Thrown when the native binary failed to create the model. public unsafe void CreateModel(string aModelPath, - string aAlphabetConfigPath, uint aBeamWidth) + uint aBeamWidth) { string exceptionMessage = null; if (string.IsNullOrWhiteSpace(aModelPath)) { exceptionMessage = "Model path cannot be empty."; } - if (string.IsNullOrWhiteSpace(aAlphabetConfigPath)) - { - exceptionMessage = "Alphabet path cannot be empty."; - } if (!File.Exists(aModelPath)) { exceptionMessage = $"Cannot find the model file: {aModelPath}"; } - if (!File.Exists(aAlphabetConfigPath)) - { - exceptionMessage = $"Cannot find the alphabet file: {aAlphabetConfigPath}"; - } if (exceptionMessage != null) { throw new FileNotFoundException(exceptionMessage); } var resultCode = NativeImp.DS_CreateModel(aModelPath, - aAlphabetConfigPath, aBeamWidth, ref _modelStatePP); EvaluateResultCode(resultCode); @@ -86,7 +76,7 @@ namespace DeepSpeechClient case ErrorCodes.DS_ERR_NO_MODEL: throw new ArgumentException("Missing model information."); case ErrorCodes.DS_ERR_INVALID_ALPHABET: - throw new ArgumentException("Invalid alphabet file or invalid alphabet size."); + throw new ArgumentException("Invalid alphabet embedded in model. (Data corruption?)"); case ErrorCodes.DS_ERR_INVALID_SHAPE: throw new ArgumentException("Invalid model shape."); case ErrorCodes.DS_ERR_INVALID_LM: diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs index f7bbee98..5f9e3a86 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs @@ -17,11 +17,9 @@ namespace DeepSpeechClient.Interfaces /// Create an object providing an interface to a trained DeepSpeech model. /// /// The path to the frozen model graph. - /// The path to the configuration file specifying the alphabet used by the network. /// The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time. /// Thrown when the native binary failed to create the model. unsafe void CreateModel(string aModelPath, - string aAlphabetConfigPath, uint aBeamWidth); /// diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs index 92cdb150..0ea331d8 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs @@ -16,7 +16,6 @@ namespace DeepSpeechClient [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, - string aAlphabetConfigPath, uint aBeamWidth, ref IntPtr** pint); diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/DeepSpeechConsole/Program.cs index 5085fd21..364cab71 100644 --- a/native_client/dotnet/DeepSpeechConsole/Program.cs +++ b/native_client/dotnet/DeepSpeechConsole/Program.cs @@ -35,7 +35,6 @@ namespace CSharpExamples static void Main(string[] args) { string model = null; - string alphabet = null; string lm = null; string trie = null; string audio = null; @@ -43,7 +42,6 @@ namespace CSharpExamples if (args.Length > 0) { model = GetArgument(args, "--model"); - alphabet = GetArgument(args, "--alphabet"); lm = GetArgument(args, "--lm"); trie = GetArgument(args, "--trie"); audio = GetArgument(args, "--audio"); @@ -64,7 +62,6 @@ namespace CSharpExamples stopwatch.Start(); sttClient.CreateModel( model ?? "output_graph.pbmm", - alphabet ?? "alphabet.txt", BEAM_WIDTH); stopwatch.Stop(); diff --git a/native_client/java/README.rst b/native_client/java/README.rst index 4ab6cf96..c345c094 100644 --- a/native_client/java/README.rst +++ b/native_client/java/README.rst @@ -51,7 +51,6 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including: * ``output_graph.tflite`` which is the TF Lite model -* ``alphabet.txt`` * ``lm.binary`` and ``trie`` files, if you want to use the language model ; please be aware that too big language model will make the device run out of memory diff --git a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java index b44fdfab..a1065d4e 100644 --- a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java +++ b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java @@ -23,7 +23,6 @@ public class DeepSpeechActivity extends AppCompatActivity { DeepSpeechModel _m = null; EditText _tfliteModel; - EditText _alphabet; EditText _audioFile; TextView _decodedString; @@ -49,10 +48,10 @@ public class DeepSpeechActivity extends AppCompatActivity { return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24); } - private void newModel(String tfliteModel, String alphabet) { + private void newModel(String tfliteModel) { this._tfliteStatus.setText("Creating model"); if (this._m == null) { - this._m = new DeepSpeechModel(tfliteModel, alphabet, BEAM_WIDTH); + this._m = new DeepSpeechModel(tfliteModel, BEAM_WIDTH); } } @@ -61,7 +60,7 @@ public class DeepSpeechActivity extends AppCompatActivity { this._startInference.setEnabled(false); - this.newModel(this._tfliteModel.getText().toString(), this._alphabet.getText().toString()); + this.newModel(this._tfliteModel.getText().toString()); this._tfliteStatus.setText("Extracting audio features ..."); @@ -128,13 +127,11 @@ public class DeepSpeechActivity extends AppCompatActivity { this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus); this._tfliteModel = (EditText) findViewById(R.id.tfliteModel); - this._alphabet = (EditText) findViewById(R.id.alphabet); this._audioFile = (EditText) findViewById(R.id.audioFile); this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite"); this._tfliteStatus.setText("Ready, waiting ..."); - this._alphabet.setText("/sdcard/deepspeech/alphabet.txt"); this._audioFile.setText("/sdcard/deepspeech/audio.wav"); this._startInference = (Button) findViewById(R.id.btnStartInference); diff --git a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml b/native_client/java/app/src/main/res/layout/activity_deep_speech.xml index 82fb4fe3..02c383d4 100644 --- a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml +++ b/native_client/java/app/src/main/res/layout/activity_deep_speech.xml @@ -97,25 +97,6 @@ android:inputType="text" /> - - - - - - - ${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?" } @@ -322,22 +322,22 @@ run_tflite_basic_inference_tests() run_netframework_inference_tests() { set +e - phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } @@ -345,22 +345,22 @@ run_netframework_inference_tests() run_electronjs_inference_tests() { set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } @@ -368,25 +368,25 @@ run_electronjs_inference_tests() run_basic_inference_tests() { set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" @@ -397,24 +397,24 @@ run_all_inference_tests() run_basic_inference_tests set +e - phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status" set +e - phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}" set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" } @@ -424,7 +424,6 @@ run_prod_concurrent_stream_tests() set +e output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \ --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \ - --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt \ --lm ${TASKCLUSTER_TMP_DIR}/lm.binary \ --trie ${TASKCLUSTER_TMP_DIR}/trie \ --audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \ @@ -442,25 +441,25 @@ run_prod_concurrent_stream_tests() run_prod_inference_tests() { set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" } @@ -468,13 +467,13 @@ run_prod_inference_tests() run_multi_inference_tests() { set +e -o pipefail - multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') + multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status" set +e -o pipefail - multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') + multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status" @@ -483,7 +482,7 @@ run_multi_inference_tests() run_cpp_only_inference_tests() { set +e - phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status" @@ -566,7 +565,6 @@ download_data() ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}" ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}" cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/ - cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources @@ -579,7 +577,7 @@ download_material() download_native_client_files "${target_dir}" download_data - ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt + ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav } download_benchmark_model() @@ -1595,7 +1593,6 @@ android_setup_ndk_data() adb push \ ${TASKCLUSTER_TMP_DIR}/${model_name} \ ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \ - ${TASKCLUSTER_TMP_DIR}/alphabet.txt \ ${ANDROID_TMP_DIR}/ds/ } @@ -1606,7 +1603,6 @@ android_setup_apk_data() adb push \ ${TASKCLUSTER_TMP_DIR}/${model_name} \ ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \ - ${TASKCLUSTER_TMP_DIR}/alphabet.txt \ ${TASKCLUSTER_TMP_DIR}/lm.binary \ ${TASKCLUSTER_TMP_DIR}/trie \ ${ANDROID_TMP_DIR}/test/ diff --git a/util/flags.py b/util/flags.py index 0241d9bf..6a8caf3b 100644 --- a/util/flags.py +++ b/util/flags.py @@ -133,7 +133,6 @@ def create_flags(): # Decoder f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.') - f.DEFINE_alias('alphabet', 'alphabet_config_path') f.DEFINE_string('lm_binary_path', 'data/lm/lm.binary', 'path to the language model binary file created with KenLM') f.DEFINE_alias('lm', 'lm_binary_path') f.DEFINE_string('lm_trie_path', 'data/lm/trie', 'path to the language model trie file created with native_client/generate_trie')