Remove alphabet param usage
This commit is contained in:
parent
8c82081779
commit
3fdc7d422d
@ -7,7 +7,7 @@ extension-pkg-whitelist=
|
|||||||
|
|
||||||
# Add files or directories to the blacklist. They should be base names, not
|
# Add files or directories to the blacklist. They should be base names, not
|
||||||
# paths.
|
# paths.
|
||||||
ignore=CVS
|
ignore=examples
|
||||||
|
|
||||||
# Add files or directories matching the regex patterns to the blacklist. The
|
# Add files or directories matching the regex patterns to the blacklist. The
|
||||||
# regex matches against base names, not paths.
|
# regex matches against base names, not paths.
|
||||||
|
@ -34,7 +34,7 @@ To install and use deepspeech all you have to do is:
|
|||||||
tar xvf audio-0.5.1.tar.gz
|
tar xvf audio-0.5.1.tar.gz
|
||||||
|
|
||||||
# Transcribe an audio file
|
# Transcribe an audio file
|
||||||
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
||||||
|
|
||||||
A pre-trained English model is available for use and can be downloaded using `the instructions below <USING.rst#using-a-pre-trained-model>`_. Currently, only 16-bit, 16 kHz, mono-channel WAVE audio files are supported in the Python client. A package with some example audio files is available for download in our `release notes <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
|
A pre-trained English model is available for use and can be downloaded using `the instructions below <USING.rst#using-a-pre-trained-model>`_. Currently, only 16-bit, 16 kHz, mono-channel WAVE audio files are supported in the Python client. A package with some example audio files is available for download in our `release notes <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th
|
|||||||
pip3 install deepspeech-gpu
|
pip3 install deepspeech-gpu
|
||||||
|
|
||||||
# Transcribe an audio file.
|
# Transcribe an audio file.
|
||||||
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
||||||
|
|
||||||
Please ensure you have the required `CUDA dependencies <USING.rst#cuda-dependency>`_.
|
Please ensure you have the required `CUDA dependencies <USING.rst#cuda-dependency>`_.
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
|
deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
|
||||||
|
|
||||||
The arguments ``--lm`` and ``--trie`` are optional, and represent a language model.
|
The arguments ``--lm`` and ``--trie`` are optional, and represent a language model.
|
||||||
|
|
||||||
@ -159,7 +159,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
./deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio_input.wav
|
./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav
|
||||||
|
|
||||||
See the help output with ``./deepspeech -h`` and the `native client README <native_client/README.rst>`_ for more details.
|
See the help output with ``./deepspeech -h`` and the `native client README <native_client/README.rst>`_ for more details.
|
||||||
|
|
||||||
|
@ -239,7 +239,7 @@ def delete_tree(dir):
|
|||||||
except IOError:
|
except IOError:
|
||||||
print('No remote directory: %s' % dir)
|
print('No remote directory: %s' % dir)
|
||||||
|
|
||||||
def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries):
|
def setup_tempdir(dir, models, wav, lm_binary, trie, binaries):
|
||||||
r'''
|
r'''
|
||||||
Copy models, libs and binary to a directory (new one if dir is None)
|
Copy models, libs and binary to a directory (new one if dir is None)
|
||||||
'''
|
'''
|
||||||
@ -268,7 +268,7 @@ def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries):
|
|||||||
print('Copying %s to %s' % (f, dir))
|
print('Copying %s to %s' % (f, dir))
|
||||||
shutil.copy2(f, dir)
|
shutil.copy2(f, dir)
|
||||||
|
|
||||||
for extra_file in [ wav, alphabet, lm_binary, trie ]:
|
for extra_file in [ wav, lm_binary, trie ]:
|
||||||
if extra_file and not os.path.isfile(os.path.join(dir, os.path.basename(extra_file))):
|
if extra_file and not os.path.isfile(os.path.join(dir, os.path.basename(extra_file))):
|
||||||
print('Copying %s to %s' % (extra_file, dir))
|
print('Copying %s to %s' % (extra_file, dir))
|
||||||
shutil.copy2(extra_file, dir)
|
shutil.copy2(extra_file, dir)
|
||||||
@ -375,10 +375,10 @@ def establish_ssh(target=None, auto_trust=False, allow_agent=True, look_keys=Tru
|
|||||||
|
|
||||||
return ssh_conn
|
return ssh_conn
|
||||||
|
|
||||||
def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1):
|
def run_benchmarks(dir, models, wav, lm_binary=None, trie=None, iters=-1):
|
||||||
r'''
|
r'''
|
||||||
Core of the running of the benchmarks. We will run on all of models, against
|
Core of the running of the benchmarks. We will run on all of models, against
|
||||||
the WAV file provided as wav, and the provided alphabet.
|
the WAV file provided as wav.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
assert_valid_dir(dir)
|
assert_valid_dir(dir)
|
||||||
@ -396,9 +396,9 @@ def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=
|
|||||||
}
|
}
|
||||||
|
|
||||||
if lm_binary and trie:
|
if lm_binary and trie:
|
||||||
cmdline = './deepspeech --model "%s" --alphabet "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, alphabet, lm_binary, trie, wav)
|
cmdline = './deepspeech --model "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, lm_binary, trie, wav)
|
||||||
else:
|
else:
|
||||||
cmdline = './deepspeech --model "%s" --alphabet "%s" --audio "%s" -t' % (model_filename, alphabet, wav)
|
cmdline = './deepspeech --model "%s" --audio "%s" -t' % (model_filename, wav)
|
||||||
|
|
||||||
for it in range(iters):
|
for it in range(iters):
|
||||||
sys.stdout.write('\rRunning %s: %d/%d' % (os.path.basename(model), (it+1), iters))
|
sys.stdout.write('\rRunning %s: %d/%d' % (os.path.basename(model), (it+1), iters))
|
||||||
@ -453,8 +453,6 @@ def handle_args():
|
|||||||
help='List of files (protocolbuffer) to work on. Might be a zip file.')
|
help='List of files (protocolbuffer) to work on. Might be a zip file.')
|
||||||
parser.add_argument('--wav', required=False,
|
parser.add_argument('--wav', required=False,
|
||||||
help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.')
|
help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.')
|
||||||
parser.add_argument('--alphabet', required=False,
|
|
||||||
help='Text file to pass to native_client for the alphabet.')
|
|
||||||
parser.add_argument('--lm_binary', required=False,
|
parser.add_argument('--lm_binary', required=False,
|
||||||
help='Path to the LM binary file used by the decoder.')
|
help='Path to the LM binary file used by the decoder.')
|
||||||
parser.add_argument('--trie', required=False,
|
parser.add_argument('--trie', required=False,
|
||||||
@ -472,8 +470,8 @@ def handle_args():
|
|||||||
def do_main():
|
def do_main():
|
||||||
cli_args = handle_args()
|
cli_args = handle_args()
|
||||||
|
|
||||||
if not cli_args.models or not cli_args.wav or not cli_args.alphabet:
|
if not cli_args.models or not cli_args.wav:
|
||||||
raise AssertionError('Missing arguments (models, wav or alphabet)')
|
raise AssertionError('Missing arguments (models or wav)')
|
||||||
|
|
||||||
if cli_args.dir is not None and not os.path.isdir(cli_args.dir):
|
if cli_args.dir is not None and not os.path.isdir(cli_args.dir):
|
||||||
raise AssertionError('Inexistent temp directory')
|
raise AssertionError('Inexistent temp directory')
|
||||||
@ -484,18 +482,17 @@ def do_main():
|
|||||||
global ssh_conn
|
global ssh_conn
|
||||||
ssh_conn = establish_ssh(target=cli_args.target, auto_trust=cli_args.autotrust, allow_agent=cli_args.allowagent, look_keys=cli_args.lookforkeys)
|
ssh_conn = establish_ssh(target=cli_args.target, auto_trust=cli_args.autotrust, allow_agent=cli_args.allowagent, look_keys=cli_args.lookforkeys)
|
||||||
|
|
||||||
tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, alphabet=cli_args.alphabet, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries)
|
tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries)
|
||||||
|
|
||||||
dest_sorted_models = list(map(lambda x: os.path.join(tempdir, os.path.basename(x)), sorted_models))
|
dest_sorted_models = list(map(lambda x: os.path.join(tempdir, os.path.basename(x)), sorted_models))
|
||||||
dest_wav = os.path.join(tempdir, os.path.basename(cli_args.wav))
|
dest_wav = os.path.join(tempdir, os.path.basename(cli_args.wav))
|
||||||
dest_alphabet = os.path.join(tempdir, os.path.basename(cli_args.alphabet))
|
|
||||||
|
|
||||||
if cli_args.lm_binary and cli_args.trie:
|
if cli_args.lm_binary and cli_args.trie:
|
||||||
dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary))
|
dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary))
|
||||||
dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie))
|
dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie))
|
||||||
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
|
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
|
||||||
else:
|
else:
|
||||||
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)
|
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, iters=cli_args.iters)
|
||||||
|
|
||||||
if cli_args.csv:
|
if cli_args.csv:
|
||||||
produce_csv(input=inference_times, output=cli_args.csv)
|
produce_csv(input=inference_times, output=cli_args.csv)
|
||||||
|
@ -23,16 +23,16 @@ This module should be self-contained:
|
|||||||
- pip install native_client/python/dist/deepspeech*.whl
|
- pip install native_client/python/dist/deepspeech*.whl
|
||||||
- pip install -r requirements_eval_tflite.txt
|
- pip install -r requirements_eval_tflite.txt
|
||||||
|
|
||||||
Then run with a TF Lite model, alphabet, LM/trie and a CSV test file
|
Then run with a TF Lite model, LM/trie and a CSV test file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
BEAM_WIDTH = 500
|
BEAM_WIDTH = 500
|
||||||
LM_ALPHA = 0.75
|
LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
|
|
||||||
def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask):
|
def tflite_worker(model, lm, trie, queue_in, queue_out, gpu_mask):
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask)
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask)
|
||||||
ds = Model(model, alphabet, BEAM_WIDTH)
|
ds = Model(model, BEAM_WIDTH)
|
||||||
ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
|
ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@ -58,8 +58,6 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
|
parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
|
||||||
parser.add_argument('--model', required=True,
|
parser.add_argument('--model', required=True,
|
||||||
help='Path to the model (protocol buffer binary file)')
|
help='Path to the model (protocol buffer binary file)')
|
||||||
parser.add_argument('--alphabet', required=True,
|
|
||||||
help='Path to the configuration file specifying the alphabet used by the network')
|
|
||||||
parser.add_argument('--lm', required=True,
|
parser.add_argument('--lm', required=True,
|
||||||
help='Path to the language model binary file')
|
help='Path to the language model binary file')
|
||||||
parser.add_argument('--trie', required=True,
|
parser.add_argument('--trie', required=True,
|
||||||
@ -78,7 +76,7 @@ def main():
|
|||||||
|
|
||||||
processes = []
|
processes = []
|
||||||
for i in range(args.proc):
|
for i in range(args.proc):
|
||||||
worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
|
worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
|
||||||
worker_process.start() # Launch reader() as a separate python process
|
worker_process.start() # Launch reader() as a separate python process
|
||||||
processes.append(worker_process)
|
processes.append(worker_process)
|
||||||
|
|
||||||
|
@ -22,14 +22,12 @@ Here is an example for a local audio file:
|
|||||||
```bash
|
```bash
|
||||||
node ./index.js --audio <AUDIO_FILE> \
|
node ./index.js --audio <AUDIO_FILE> \
|
||||||
--model $HOME/models/output_graph.pbmm \
|
--model $HOME/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/models/alphabet.txt
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Here is an example for a remote RTMP-Stream:
|
Here is an example for a remote RTMP-Stream:
|
||||||
```bash
|
```bash
|
||||||
node ./index.js --audio rtmp://<IP>:1935/live/teststream \
|
node ./index.js --audio rtmp://<IP>:1935/live/teststream \
|
||||||
--model $HOME/models/output_graph.pbmm \
|
--model $HOME/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/models/alphabet.txt
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
@ -39,21 +37,18 @@ node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
|
|||||||
--lm $HOME/models/lm.binary \
|
--lm $HOME/models/lm.binary \
|
||||||
--trie $HOME/models/trie \
|
--trie $HOME/models/trie \
|
||||||
--model $HOME/models/output_graph.pbmm \
|
--model $HOME/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/models/alphabet.txt
|
|
||||||
```
|
```
|
||||||
```bash
|
```bash
|
||||||
node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
|
node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
|
||||||
--lm $HOME/models/lm.binary \
|
--lm $HOME/models/lm.binary \
|
||||||
--trie $HOME/models/trie \
|
--trie $HOME/models/trie \
|
||||||
--model $HOME/models/output_graph.pbmm \
|
--model $HOME/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/models/alphabet.txt
|
|
||||||
```
|
```
|
||||||
```bash
|
```bash
|
||||||
node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
|
node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
|
||||||
--lm $HOME/models/lm.binary \
|
--lm $HOME/models/lm.binary \
|
||||||
--trie $HOME/models/trie \
|
--trie $HOME/models/trie \
|
||||||
--model $HOME/models/output_graph.pbmm \
|
--model $HOME/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/models/alphabet.txt
|
|
||||||
```
|
```
|
||||||
Real time streaming inference in combination with a RTMP server.
|
Real time streaming inference in combination with a RTMP server.
|
||||||
```bash
|
```bash
|
||||||
@ -61,7 +56,6 @@ node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
|
|||||||
--lm $HOME/models/lm.binary \
|
--lm $HOME/models/lm.binary \
|
||||||
--trie $HOME/models/trie \
|
--trie $HOME/models/trie \
|
||||||
--model $HOME/models/output_graph.pbmm \
|
--model $HOME/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/models/alphabet.txt
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
@ -32,7 +32,6 @@ VersionAction.prototype.call = function(parser) {
|
|||||||
|
|
||||||
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
|
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
|
||||||
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
|
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
|
||||||
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
|
|
||||||
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
||||||
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
||||||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
|
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
|
||||||
@ -45,7 +44,7 @@ function totalTime(hrtimeValue) {
|
|||||||
|
|
||||||
console.error('Loading model from file %s', args['model']);
|
console.error('Loading model from file %s', args['model']);
|
||||||
const model_load_start = process.hrtime();
|
const model_load_start = process.hrtime();
|
||||||
let model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
|
let model = new Ds.Model(args['model'], BEAM_WIDTH);
|
||||||
const model_load_end = process.hrtime(model_load_start);
|
const model_load_end = process.hrtime(model_load_start);
|
||||||
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
||||||
|
|
||||||
|
@ -13,18 +13,15 @@ pushd ${THIS}
|
|||||||
node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
|
node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
|
||||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
--trie $HOME/DeepSpeech/models/trie \
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
--model $HOME/DeepSpeech/models/output_graph.pbmm
|
||||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
|
||||||
|
|
||||||
node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
|
node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
|
||||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
--trie $HOME/DeepSpeech/models/trie \
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
--model $HOME/DeepSpeech/models/output_graph.pbmm
|
||||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
|
||||||
|
|
||||||
node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
|
node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
|
||||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
--trie $HOME/DeepSpeech/models/trie \
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
--model $HOME/DeepSpeech/models/output_graph.pbmm
|
||||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
|
||||||
popd
|
popd
|
||||||
|
@ -29,7 +29,7 @@ Usage
|
|||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner]
|
usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner]
|
||||||
[-w SAVEWAV] -m MODEL [-a ALPHABET] [-l LM]
|
[-w SAVEWAV] -m MODEL [-l LM]
|
||||||
[-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT]
|
[-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT]
|
||||||
[-la LM_ALPHA] [-lb LM_BETA]
|
[-la LM_ALPHA] [-lb LM_BETA]
|
||||||
[-bw BEAM_WIDTH]
|
[-bw BEAM_WIDTH]
|
||||||
@ -49,9 +49,6 @@ Usage
|
|||||||
Path to the model (protocol buffer binary file, or
|
Path to the model (protocol buffer binary file, or
|
||||||
entire directory containing all standard-named files
|
entire directory containing all standard-named files
|
||||||
for model)
|
for model)
|
||||||
-a ALPHABET, --alphabet ALPHABET
|
|
||||||
Path to the configuration file specifying the alphabet
|
|
||||||
used by the network. Default: alphabet.txt
|
|
||||||
-l LM, --lm LM Path to the language model binary file. Default:
|
-l LM, --lm LM Path to the language model binary file. Default:
|
||||||
lm.binary
|
lm.binary
|
||||||
-t TRIE, --trie TRIE Path to the language model trie file created with
|
-t TRIE, --trie TRIE Path to the language model trie file created with
|
||||||
|
@ -156,14 +156,12 @@ def main(ARGS):
|
|||||||
if os.path.isdir(ARGS.model):
|
if os.path.isdir(ARGS.model):
|
||||||
model_dir = ARGS.model
|
model_dir = ARGS.model
|
||||||
ARGS.model = os.path.join(model_dir, 'output_graph.pb')
|
ARGS.model = os.path.join(model_dir, 'output_graph.pb')
|
||||||
ARGS.alphabet = os.path.join(model_dir, ARGS.alphabet if ARGS.alphabet else 'alphabet.txt')
|
|
||||||
ARGS.lm = os.path.join(model_dir, ARGS.lm)
|
ARGS.lm = os.path.join(model_dir, ARGS.lm)
|
||||||
ARGS.trie = os.path.join(model_dir, ARGS.trie)
|
ARGS.trie = os.path.join(model_dir, ARGS.trie)
|
||||||
|
|
||||||
print('Initializing model...')
|
print('Initializing model...')
|
||||||
logging.info("ARGS.model: %s", ARGS.model)
|
logging.info("ARGS.model: %s", ARGS.model)
|
||||||
logging.info("ARGS.alphabet: %s", ARGS.alphabet)
|
model = deepspeech.Model(ARGS.model, ARGS.beam_width)
|
||||||
model = deepspeech.Model(ARGS.model, ARGS.alphabet, ARGS.beam_width)
|
|
||||||
if ARGS.lm and ARGS.trie:
|
if ARGS.lm and ARGS.trie:
|
||||||
logging.info("ARGS.lm: %s", ARGS.lm)
|
logging.info("ARGS.lm: %s", ARGS.lm)
|
||||||
logging.info("ARGS.trie: %s", ARGS.trie)
|
logging.info("ARGS.trie: %s", ARGS.trie)
|
||||||
@ -219,8 +217,6 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
parser.add_argument('-m', '--model', required=True,
|
parser.add_argument('-m', '--model', required=True,
|
||||||
help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
|
help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
|
||||||
parser.add_argument('-a', '--alphabet', default='alphabet.txt',
|
|
||||||
help="Path to the configuration file specifying the alphabet used by the network. Default: alphabet.txt")
|
|
||||||
parser.add_argument('-l', '--lm', default='lm.binary',
|
parser.add_argument('-l', '--lm', default='lm.binary',
|
||||||
help="Path to the language model binary file. Default: lm.binary")
|
help="Path to the language model binary file. Default: lm.binary")
|
||||||
parser.add_argument('-t', '--trie', default='trie',
|
parser.add_argument('-t', '--trie', default='trie',
|
||||||
|
@ -14,7 +14,6 @@ pushd ${THIS}
|
|||||||
|
|
||||||
python mic_vad_streaming.py \
|
python mic_vad_streaming.py \
|
||||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt \
|
|
||||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
--trie $HOME/DeepSpeech/models/trie \
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
--file $HOME/DeepSpeech/audio/2830-3980-0043.wav
|
--file $HOME/DeepSpeech/audio/2830-3980-0043.wav
|
||||||
|
@ -77,7 +77,7 @@ namespace DeepSpeechWPF
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_sttClient.CreateModel("output_graph.pbmm", "alphabet.txt", BEAM_WIDTH);
|
_sttClient.CreateModel("output_graph.pbmm", BEAM_WIDTH);
|
||||||
Dispatcher.Invoke(() => { EnableControls(); });
|
Dispatcher.Invoke(() => { EnableControls(); });
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
|
@ -11,7 +11,6 @@ Edit references to models path if necessary:
|
|||||||
|
|
||||||
```
|
```
|
||||||
let modelPath = './models/output_graph.pbmm';
|
let modelPath = './models/output_graph.pbmm';
|
||||||
let alphabetPath = './models/alphabet.txt';
|
|
||||||
let lmPath = './models/lm.binary';
|
let lmPath = './models/lm.binary';
|
||||||
let triePath = './models/trie';
|
let triePath = './models/trie';
|
||||||
```
|
```
|
||||||
|
@ -7,9 +7,8 @@ const Wav = require('node-wav');
|
|||||||
|
|
||||||
const BEAM_WIDTH = 1024;
|
const BEAM_WIDTH = 1024;
|
||||||
let modelPath = './models/output_graph.pbmm';
|
let modelPath = './models/output_graph.pbmm';
|
||||||
let alphabetPath = './models/alphabet.txt';
|
|
||||||
|
|
||||||
let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH);
|
let model = new DeepSpeech.Model(modelPath, BEAM_WIDTH);
|
||||||
|
|
||||||
let desiredSampleRate = model.sampleRate();
|
let desiredSampleRate = model.sampleRate();
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ def main(args):
|
|||||||
parser.add_argument('--audio', required=False,
|
parser.add_argument('--audio', required=False,
|
||||||
help='Path to the audio file to run (WAV format)')
|
help='Path to the audio file to run (WAV format)')
|
||||||
parser.add_argument('--model', required=True,
|
parser.add_argument('--model', required=True,
|
||||||
help='Path to directory that contains all model files (output_graph, lm, trie and alphabet)')
|
help='Path to directory that contains all model files (output_graph, lm and trie)')
|
||||||
parser.add_argument('--stream', required=False, action='store_true',
|
parser.add_argument('--stream', required=False, action='store_true',
|
||||||
help='To use deepspeech streaming interface')
|
help='To use deepspeech streaming interface')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -34,10 +34,10 @@ def main(args):
|
|||||||
dirName = os.path.expanduser(args.model)
|
dirName = os.path.expanduser(args.model)
|
||||||
|
|
||||||
# Resolve all the paths of model files
|
# Resolve all the paths of model files
|
||||||
output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName)
|
output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
|
||||||
|
|
||||||
# Load output_graph, alpahbet, lm and trie
|
# Load output_graph, alpahbet, lm and trie
|
||||||
model_retval = wavTranscriber.load_model(output_graph, alphabet, lm, trie)
|
model_retval = wavTranscriber.load_model(output_graph, lm, trie)
|
||||||
|
|
||||||
if args.audio is not None:
|
if args.audio is not None:
|
||||||
title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
|
title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
|
||||||
|
@ -109,7 +109,7 @@ class App(QMainWindow):
|
|||||||
self.microphone = QRadioButton("Microphone")
|
self.microphone = QRadioButton("Microphone")
|
||||||
self.fileUpload = QRadioButton("File Upload")
|
self.fileUpload = QRadioButton("File Upload")
|
||||||
self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian")
|
self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian")
|
||||||
self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, alphabet, lm & trie")
|
self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, lm & trie")
|
||||||
self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription")
|
self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription")
|
||||||
self.browseButton = QPushButton('Browse', self)
|
self.browseButton = QPushButton('Browse', self)
|
||||||
self.browseButton.setToolTip('Select a wav file')
|
self.browseButton.setToolTip('Select a wav file')
|
||||||
@ -238,9 +238,9 @@ class App(QMainWindow):
|
|||||||
|
|
||||||
def modelResult(self, dirName):
|
def modelResult(self, dirName):
|
||||||
# Fetch and Resolve all the paths of model files
|
# Fetch and Resolve all the paths of model files
|
||||||
output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName)
|
output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
|
||||||
# Load output_graph, alpahbet, lm and trie
|
# Load output_graph, alpahbet, lm and trie
|
||||||
self.model = wavTranscriber.load_model(output_graph, alphabet, lm, trie)
|
self.model = wavTranscriber.load_model(output_graph, lm, trie)
|
||||||
|
|
||||||
def modelFinish(self):
|
def modelFinish(self):
|
||||||
# self.timer.stop()
|
# self.timer.stop()
|
||||||
|
@ -8,20 +8,19 @@ from timeit import default_timer as timer
|
|||||||
'''
|
'''
|
||||||
Load the pre-trained model into the memory
|
Load the pre-trained model into the memory
|
||||||
@param models: Output Grapgh Protocol Buffer file
|
@param models: Output Grapgh Protocol Buffer file
|
||||||
@param alphabet: Alphabet.txt file
|
|
||||||
@param lm: Language model file
|
@param lm: Language model file
|
||||||
@param trie: Trie file
|
@param trie: Trie file
|
||||||
|
|
||||||
@Retval
|
@Retval
|
||||||
Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
|
Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
|
||||||
'''
|
'''
|
||||||
def load_model(models, alphabet, lm, trie):
|
def load_model(models, lm, trie):
|
||||||
BEAM_WIDTH = 500
|
BEAM_WIDTH = 500
|
||||||
LM_ALPHA = 0.75
|
LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
|
|
||||||
model_load_start = timer()
|
model_load_start = timer()
|
||||||
ds = Model(models, alphabet, BEAM_WIDTH)
|
ds = Model(models, BEAM_WIDTH)
|
||||||
model_load_end = timer() - model_load_start
|
model_load_end = timer() - model_load_start
|
||||||
logging.debug("Loaded model in %0.3fs." % (model_load_end))
|
logging.debug("Loaded model in %0.3fs." % (model_load_end))
|
||||||
|
|
||||||
@ -61,21 +60,18 @@ Resolve directory path for the models and fetch each of them.
|
|||||||
@param dirName: Path to the directory containing pre-trained models
|
@param dirName: Path to the directory containing pre-trained models
|
||||||
|
|
||||||
@Retval:
|
@Retval:
|
||||||
Retunns a tuple containing each of the model files (pb, alphabet, lm and trie)
|
Retunns a tuple containing each of the model files (pb, lm and trie)
|
||||||
'''
|
'''
|
||||||
def resolve_models(dirName):
|
def resolve_models(dirName):
|
||||||
pb = glob.glob(dirName + "/*.pb")[0]
|
pb = glob.glob(dirName + "/*.pb")[0]
|
||||||
logging.debug("Found Model: %s" % pb)
|
logging.debug("Found Model: %s" % pb)
|
||||||
|
|
||||||
alphabet = glob.glob(dirName + "/alphabet.txt")[0]
|
|
||||||
logging.debug("Found Alphabet: %s" % alphabet)
|
|
||||||
|
|
||||||
lm = glob.glob(dirName + "/lm.binary")[0]
|
lm = glob.glob(dirName + "/lm.binary")[0]
|
||||||
trie = glob.glob(dirName + "/trie")[0]
|
trie = glob.glob(dirName + "/trie")[0]
|
||||||
logging.debug("Found Language Model: %s" % lm)
|
logging.debug("Found Language Model: %s" % lm)
|
||||||
logging.debug("Found Trie: %s" % trie)
|
logging.debug("Found Trie: %s" % trie)
|
||||||
|
|
||||||
return pb, alphabet, lm, trie
|
return pb, lm, trie
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Generate VAD segments. Filters out non-voiced audio frames.
|
Generate VAD segments. Filters out non-voiced audio frames.
|
||||||
|
@ -12,8 +12,6 @@
|
|||||||
|
|
||||||
char* model = NULL;
|
char* model = NULL;
|
||||||
|
|
||||||
char* alphabet = NULL;
|
|
||||||
|
|
||||||
char* lm = NULL;
|
char* lm = NULL;
|
||||||
|
|
||||||
char* trie = NULL;
|
char* trie = NULL;
|
||||||
@ -41,12 +39,11 @@ int stream_size = 0;
|
|||||||
void PrintHelp(const char* bin)
|
void PrintHelp(const char* bin)
|
||||||
{
|
{
|
||||||
std::cout <<
|
std::cout <<
|
||||||
"Usage: " << bin << " --model MODEL --alphabet ALPHABET [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n"
|
"Usage: " << bin << " --model MODEL [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Running DeepSpeech inference.\n"
|
"Running DeepSpeech inference.\n"
|
||||||
"\n"
|
"\n"
|
||||||
" --model MODEL Path to the model (protocol buffer binary file)\n"
|
" --model MODEL Path to the model (protocol buffer binary file)\n"
|
||||||
" --alphabet ALPHABET Path to the configuration file specifying the alphabet used by the network\n"
|
|
||||||
" --lm LM Path to the language model binary file\n"
|
" --lm LM Path to the language model binary file\n"
|
||||||
" --trie TRIE Path to the language model trie file created with native_client/generate_trie\n"
|
" --trie TRIE Path to the language model trie file created with native_client/generate_trie\n"
|
||||||
" --audio AUDIO Path to the audio file to run (WAV format)\n"
|
" --audio AUDIO Path to the audio file to run (WAV format)\n"
|
||||||
@ -68,7 +65,6 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
const char* const short_opts = "m:a:l:r:w:c:d:b:tehv";
|
const char* const short_opts = "m:a:l:r:w:c:d:b:tehv";
|
||||||
const option long_opts[] = {
|
const option long_opts[] = {
|
||||||
{"model", required_argument, nullptr, 'm'},
|
{"model", required_argument, nullptr, 'm'},
|
||||||
{"alphabet", required_argument, nullptr, 'a'},
|
|
||||||
{"lm", required_argument, nullptr, 'l'},
|
{"lm", required_argument, nullptr, 'l'},
|
||||||
{"trie", required_argument, nullptr, 'r'},
|
{"trie", required_argument, nullptr, 'r'},
|
||||||
{"audio", required_argument, nullptr, 'w'},
|
{"audio", required_argument, nullptr, 'w'},
|
||||||
@ -98,10 +94,6 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
model = optarg;
|
model = optarg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'a':
|
|
||||||
alphabet = optarg;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'l':
|
case 'l':
|
||||||
lm = optarg;
|
lm = optarg;
|
||||||
break;
|
break;
|
||||||
@ -163,7 +155,7 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!model || !alphabet || !audio) {
|
if (!model || !audio) {
|
||||||
PrintHelp(argv[0]);
|
PrintHelp(argv[0]);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -29,36 +29,26 @@ namespace DeepSpeechClient
|
|||||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
|
||||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||||
public unsafe void CreateModel(string aModelPath,
|
public unsafe void CreateModel(string aModelPath,
|
||||||
string aAlphabetConfigPath, uint aBeamWidth)
|
uint aBeamWidth)
|
||||||
{
|
{
|
||||||
string exceptionMessage = null;
|
string exceptionMessage = null;
|
||||||
if (string.IsNullOrWhiteSpace(aModelPath))
|
if (string.IsNullOrWhiteSpace(aModelPath))
|
||||||
{
|
{
|
||||||
exceptionMessage = "Model path cannot be empty.";
|
exceptionMessage = "Model path cannot be empty.";
|
||||||
}
|
}
|
||||||
if (string.IsNullOrWhiteSpace(aAlphabetConfigPath))
|
|
||||||
{
|
|
||||||
exceptionMessage = "Alphabet path cannot be empty.";
|
|
||||||
}
|
|
||||||
if (!File.Exists(aModelPath))
|
if (!File.Exists(aModelPath))
|
||||||
{
|
{
|
||||||
exceptionMessage = $"Cannot find the model file: {aModelPath}";
|
exceptionMessage = $"Cannot find the model file: {aModelPath}";
|
||||||
}
|
}
|
||||||
if (!File.Exists(aAlphabetConfigPath))
|
|
||||||
{
|
|
||||||
exceptionMessage = $"Cannot find the alphabet file: {aAlphabetConfigPath}";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exceptionMessage != null)
|
if (exceptionMessage != null)
|
||||||
{
|
{
|
||||||
throw new FileNotFoundException(exceptionMessage);
|
throw new FileNotFoundException(exceptionMessage);
|
||||||
}
|
}
|
||||||
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
||||||
aAlphabetConfigPath,
|
|
||||||
aBeamWidth,
|
aBeamWidth,
|
||||||
ref _modelStatePP);
|
ref _modelStatePP);
|
||||||
EvaluateResultCode(resultCode);
|
EvaluateResultCode(resultCode);
|
||||||
@ -86,7 +76,7 @@ namespace DeepSpeechClient
|
|||||||
case ErrorCodes.DS_ERR_NO_MODEL:
|
case ErrorCodes.DS_ERR_NO_MODEL:
|
||||||
throw new ArgumentException("Missing model information.");
|
throw new ArgumentException("Missing model information.");
|
||||||
case ErrorCodes.DS_ERR_INVALID_ALPHABET:
|
case ErrorCodes.DS_ERR_INVALID_ALPHABET:
|
||||||
throw new ArgumentException("Invalid alphabet file or invalid alphabet size.");
|
throw new ArgumentException("Invalid alphabet embedded in model. (Data corruption?)");
|
||||||
case ErrorCodes.DS_ERR_INVALID_SHAPE:
|
case ErrorCodes.DS_ERR_INVALID_SHAPE:
|
||||||
throw new ArgumentException("Invalid model shape.");
|
throw new ArgumentException("Invalid model shape.");
|
||||||
case ErrorCodes.DS_ERR_INVALID_LM:
|
case ErrorCodes.DS_ERR_INVALID_LM:
|
||||||
|
@ -17,11 +17,9 @@ namespace DeepSpeechClient.Interfaces
|
|||||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
|
||||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||||
unsafe void CreateModel(string aModelPath,
|
unsafe void CreateModel(string aModelPath,
|
||||||
string aAlphabetConfigPath,
|
|
||||||
uint aBeamWidth);
|
uint aBeamWidth);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -16,7 +16,6 @@ namespace DeepSpeechClient
|
|||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||||
string aAlphabetConfigPath,
|
|
||||||
uint aBeamWidth,
|
uint aBeamWidth,
|
||||||
ref IntPtr** pint);
|
ref IntPtr** pint);
|
||||||
|
|
||||||
|
@ -35,7 +35,6 @@ namespace CSharpExamples
|
|||||||
static void Main(string[] args)
|
static void Main(string[] args)
|
||||||
{
|
{
|
||||||
string model = null;
|
string model = null;
|
||||||
string alphabet = null;
|
|
||||||
string lm = null;
|
string lm = null;
|
||||||
string trie = null;
|
string trie = null;
|
||||||
string audio = null;
|
string audio = null;
|
||||||
@ -43,7 +42,6 @@ namespace CSharpExamples
|
|||||||
if (args.Length > 0)
|
if (args.Length > 0)
|
||||||
{
|
{
|
||||||
model = GetArgument(args, "--model");
|
model = GetArgument(args, "--model");
|
||||||
alphabet = GetArgument(args, "--alphabet");
|
|
||||||
lm = GetArgument(args, "--lm");
|
lm = GetArgument(args, "--lm");
|
||||||
trie = GetArgument(args, "--trie");
|
trie = GetArgument(args, "--trie");
|
||||||
audio = GetArgument(args, "--audio");
|
audio = GetArgument(args, "--audio");
|
||||||
@ -64,7 +62,6 @@ namespace CSharpExamples
|
|||||||
stopwatch.Start();
|
stopwatch.Start();
|
||||||
sttClient.CreateModel(
|
sttClient.CreateModel(
|
||||||
model ?? "output_graph.pbmm",
|
model ?? "output_graph.pbmm",
|
||||||
alphabet ?? "alphabet.txt",
|
|
||||||
BEAM_WIDTH);
|
BEAM_WIDTH);
|
||||||
stopwatch.Stop();
|
stopwatch.Stop();
|
||||||
|
|
||||||
|
@ -51,7 +51,6 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
|
|||||||
|
|
||||||
|
|
||||||
* ``output_graph.tflite`` which is the TF Lite model
|
* ``output_graph.tflite`` which is the TF Lite model
|
||||||
* ``alphabet.txt``
|
|
||||||
* ``lm.binary`` and ``trie`` files, if you want to use the language model ; please
|
* ``lm.binary`` and ``trie`` files, if you want to use the language model ; please
|
||||||
be aware that too big language model will make the device run out of memory
|
be aware that too big language model will make the device run out of memory
|
||||||
|
|
||||||
|
@ -23,7 +23,6 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
|||||||
DeepSpeechModel _m = null;
|
DeepSpeechModel _m = null;
|
||||||
|
|
||||||
EditText _tfliteModel;
|
EditText _tfliteModel;
|
||||||
EditText _alphabet;
|
|
||||||
EditText _audioFile;
|
EditText _audioFile;
|
||||||
|
|
||||||
TextView _decodedString;
|
TextView _decodedString;
|
||||||
@ -49,10 +48,10 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
|||||||
return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24);
|
return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void newModel(String tfliteModel, String alphabet) {
|
private void newModel(String tfliteModel) {
|
||||||
this._tfliteStatus.setText("Creating model");
|
this._tfliteStatus.setText("Creating model");
|
||||||
if (this._m == null) {
|
if (this._m == null) {
|
||||||
this._m = new DeepSpeechModel(tfliteModel, alphabet, BEAM_WIDTH);
|
this._m = new DeepSpeechModel(tfliteModel, BEAM_WIDTH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,7 +60,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
|||||||
|
|
||||||
this._startInference.setEnabled(false);
|
this._startInference.setEnabled(false);
|
||||||
|
|
||||||
this.newModel(this._tfliteModel.getText().toString(), this._alphabet.getText().toString());
|
this.newModel(this._tfliteModel.getText().toString());
|
||||||
|
|
||||||
this._tfliteStatus.setText("Extracting audio features ...");
|
this._tfliteStatus.setText("Extracting audio features ...");
|
||||||
|
|
||||||
@ -128,13 +127,11 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
|||||||
this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus);
|
this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus);
|
||||||
|
|
||||||
this._tfliteModel = (EditText) findViewById(R.id.tfliteModel);
|
this._tfliteModel = (EditText) findViewById(R.id.tfliteModel);
|
||||||
this._alphabet = (EditText) findViewById(R.id.alphabet);
|
|
||||||
this._audioFile = (EditText) findViewById(R.id.audioFile);
|
this._audioFile = (EditText) findViewById(R.id.audioFile);
|
||||||
|
|
||||||
this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite");
|
this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite");
|
||||||
this._tfliteStatus.setText("Ready, waiting ...");
|
this._tfliteStatus.setText("Ready, waiting ...");
|
||||||
|
|
||||||
this._alphabet.setText("/sdcard/deepspeech/alphabet.txt");
|
|
||||||
this._audioFile.setText("/sdcard/deepspeech/audio.wav");
|
this._audioFile.setText("/sdcard/deepspeech/audio.wav");
|
||||||
|
|
||||||
this._startInference = (Button) findViewById(R.id.btnStartInference);
|
this._startInference = (Button) findViewById(R.id.btnStartInference);
|
||||||
|
@ -97,25 +97,6 @@
|
|||||||
android:inputType="text" />
|
android:inputType="text" />
|
||||||
</LinearLayout>
|
</LinearLayout>
|
||||||
|
|
||||||
<LinearLayout
|
|
||||||
android:layout_width="match_parent"
|
|
||||||
android:layout_height="wrap_content"
|
|
||||||
android:orientation="horizontal">
|
|
||||||
|
|
||||||
<TextView
|
|
||||||
android:id="@+id/lblAlphabet"
|
|
||||||
android:layout_width="263dp"
|
|
||||||
android:layout_height="wrap_content"
|
|
||||||
android:layout_weight="1"
|
|
||||||
android:text="Alphabet" />
|
|
||||||
|
|
||||||
<EditText
|
|
||||||
android:id="@+id/alphabet"
|
|
||||||
android:layout_width="wrap_content"
|
|
||||||
android:layout_height="wrap_content"
|
|
||||||
android:inputType="text" />
|
|
||||||
</LinearLayout>
|
|
||||||
|
|
||||||
<LinearLayout
|
<LinearLayout
|
||||||
android:layout_width="match_parent"
|
android:layout_width="match_parent"
|
||||||
android:layout_height="wrap_content"
|
android:layout_height="wrap_content"
|
||||||
|
@ -30,7 +30,6 @@ import java.nio.ByteBuffer;
|
|||||||
public class BasicTest {
|
public class BasicTest {
|
||||||
|
|
||||||
public static final String modelFile = "/data/local/tmp/test/output_graph.tflite";
|
public static final String modelFile = "/data/local/tmp/test/output_graph.tflite";
|
||||||
public static final String alphabetFile = "/data/local/tmp/test/alphabet.txt";
|
|
||||||
public static final String lmFile = "/data/local/tmp/test/lm.binary";
|
public static final String lmFile = "/data/local/tmp/test/lm.binary";
|
||||||
public static final String trieFile = "/data/local/tmp/test/trie";
|
public static final String trieFile = "/data/local/tmp/test/trie";
|
||||||
public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav";
|
public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav";
|
||||||
@ -64,7 +63,7 @@ public class BasicTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_basic() {
|
public void loadDeepSpeech_basic() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||||
m.freeModel();
|
m.freeModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,7 +120,7 @@ public class BasicTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_stt_noLM() {
|
public void loadDeepSpeech_stt_noLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||||
|
|
||||||
String decoded = doSTT(m, false);
|
String decoded = doSTT(m, false);
|
||||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||||
@ -130,7 +129,7 @@ public class BasicTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_stt_withLM() {
|
public void loadDeepSpeech_stt_withLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||||
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
||||||
|
|
||||||
String decoded = doSTT(m, false);
|
String decoded = doSTT(m, false);
|
||||||
@ -140,7 +139,7 @@ public class BasicTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_sttWithMetadata_noLM() {
|
public void loadDeepSpeech_sttWithMetadata_noLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||||
|
|
||||||
String decoded = doSTT(m, true);
|
String decoded = doSTT(m, true);
|
||||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||||
@ -149,7 +148,7 @@ public class BasicTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_sttWithMetadata_withLM() {
|
public void loadDeepSpeech_sttWithMetadata_withLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||||
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
||||||
|
|
||||||
String decoded = doSTT(m, true);
|
String decoded = doSTT(m, true);
|
||||||
|
@ -20,15 +20,13 @@ public class DeepSpeechModel {
|
|||||||
* @constructor
|
* @constructor
|
||||||
*
|
*
|
||||||
* @param modelPath The path to the frozen model graph.
|
* @param modelPath The path to the frozen model graph.
|
||||||
* @param alphabetPath The path to the configuration file specifying
|
|
||||||
* the alphabet used by the network. See alphabet.h.
|
|
||||||
* @param beam_width The beam width used by the decoder. A larger beam
|
* @param beam_width The beam width used by the decoder. A larger beam
|
||||||
* width generates better results at the cost of decoding
|
* width generates better results at the cost of decoding
|
||||||
* time.
|
* time.
|
||||||
*/
|
*/
|
||||||
public DeepSpeechModel(String modelPath, String alphabetPath, int beam_width) {
|
public DeepSpeechModel(String modelPath, int beam_width) {
|
||||||
this._mspp = impl.new_modelstatep();
|
this._mspp = impl.new_modelstatep();
|
||||||
impl.CreateModel(modelPath, alphabetPath, beam_width, this._mspp);
|
impl.CreateModel(modelPath, beam_width, this._mspp);
|
||||||
this._msp = impl.modelstatep_value(this._mspp);
|
this._msp = impl.modelstatep_value(this._mspp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ Once everything is installed, you can then use the `deepspeech` binary to do spe
|
|||||||
|
|
||||||
pip3 install deepspeech
|
pip3 install deepspeech
|
||||||
|
|
||||||
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ Alternatively, quicker inference can be performed using a supported NVIDIA GPU o
|
|||||||
|
|
||||||
pip3 install deepspeech-gpu
|
pip3 install deepspeech-gpu
|
||||||
|
|
||||||
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -223,7 +223,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
||||||
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -290,7 +290,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
||||||
./deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio*input.wav
|
./deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio audio*input.wav
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -29,7 +29,6 @@ VersionAction.prototype.call = function(parser) {
|
|||||||
|
|
||||||
var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
|
var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
|
||||||
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
|
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
|
||||||
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
|
|
||||||
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
||||||
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
||||||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
|
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
|
||||||
@ -55,7 +54,7 @@ function metadataToString(metadata) {
|
|||||||
|
|
||||||
console.error('Loading model from file %s', args['model']);
|
console.error('Loading model from file %s', args['model']);
|
||||||
const model_load_start = process.hrtime();
|
const model_load_start = process.hrtime();
|
||||||
var model = new Ds.Model(args['model'], args['alphabet'], args['beam_width']);
|
var model = new Ds.Model(args['model'], args['beam_width']);
|
||||||
const model_load_end = process.hrtime(model_load_start);
|
const model_load_end = process.hrtime(model_load_start);
|
||||||
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
||||||
|
|
||||||
|
@ -25,7 +25,6 @@ if (process.platform === 'win32') {
|
|||||||
* An object providing an interface to a trained DeepSpeech model.
|
* An object providing an interface to a trained DeepSpeech model.
|
||||||
*
|
*
|
||||||
* @param {string} aModelPath The path to the frozen model graph.
|
* @param {string} aModelPath The path to the frozen model graph.
|
||||||
* @param {string} aAlphabetConfigPath The path to the configuration file specifying the alphabet used by the network. See alphabet.h.
|
|
||||||
* @param {number} aBeamWidth The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
|
* @param {number} aBeamWidth The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
|
||||||
*
|
*
|
||||||
* @throws on error
|
* @throws on error
|
||||||
|
@ -30,9 +30,6 @@ class Model(object):
|
|||||||
:param aModelPath: Path to model file to load
|
:param aModelPath: Path to model file to load
|
||||||
:type aModelPath: str
|
:type aModelPath: str
|
||||||
|
|
||||||
:param aAlphabetConfigPath: Path to alphabet file to load
|
|
||||||
:type aAlphabetConfigPath: str
|
|
||||||
|
|
||||||
:param aBeamWidth: Decoder beam width
|
:param aBeamWidth: Decoder beam width
|
||||||
:type aBeamWidth: int
|
:type aBeamWidth: int
|
||||||
"""
|
"""
|
||||||
|
@ -46,8 +46,6 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
||||||
parser.add_argument('--model', required=True,
|
parser.add_argument('--model', required=True,
|
||||||
help='Path to the model (protocol buffer binary file)')
|
help='Path to the model (protocol buffer binary file)')
|
||||||
parser.add_argument('--alphabet', required=True,
|
|
||||||
help='Path to the configuration file specifying the alphabet used by the network')
|
|
||||||
parser.add_argument('--lm', nargs='?',
|
parser.add_argument('--lm', nargs='?',
|
||||||
help='Path to the language model binary file')
|
help='Path to the language model binary file')
|
||||||
parser.add_argument('--trie', nargs='?',
|
parser.add_argument('--trie', nargs='?',
|
||||||
@ -68,7 +66,7 @@ def main():
|
|||||||
|
|
||||||
print('Loading model from file {}'.format(args.model), file=sys.stderr)
|
print('Loading model from file {}'.format(args.model), file=sys.stderr)
|
||||||
model_load_start = timer()
|
model_load_start = timer()
|
||||||
ds = Model(args.model, args.alphabet, args.beam_width)
|
ds = Model(args.model, args.beam_width)
|
||||||
model_load_end = timer() - model_load_start
|
model_load_end = timer() - model_load_start
|
||||||
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
||||||
|
|
||||||
|
@ -25,8 +25,6 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
||||||
parser.add_argument('--model', required=True,
|
parser.add_argument('--model', required=True,
|
||||||
help='Path to the model (protocol buffer binary file)')
|
help='Path to the model (protocol buffer binary file)')
|
||||||
parser.add_argument('--alphabet', required=True,
|
|
||||||
help='Path to the configuration file specifying the alphabet used by the network')
|
|
||||||
parser.add_argument('--lm', nargs='?',
|
parser.add_argument('--lm', nargs='?',
|
||||||
help='Path to the language model binary file')
|
help='Path to the language model binary file')
|
||||||
parser.add_argument('--trie', nargs='?',
|
parser.add_argument('--trie', nargs='?',
|
||||||
@ -37,7 +35,7 @@ def main():
|
|||||||
help='Second audio file to use in interleaved streams')
|
help='Second audio file to use in interleaved streams')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
ds = Model(args.model, args.alphabet, BEAM_WIDTH)
|
ds = Model(args.model, BEAM_WIDTH)
|
||||||
|
|
||||||
if args.lm and args.trie:
|
if args.lm and args.trie:
|
||||||
ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA)
|
ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA)
|
||||||
|
@ -30,7 +30,7 @@ then:
|
|||||||
image: ${build.docker_image}
|
image: ${build.docker_image}
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DEEPSPEECH_MODEL: "https://github.com/lissyx/DeepSpeech/releases/download/test-model-0.6.0a10/models.tar.gz"
|
DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.11/models.tar.gz"
|
||||||
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
|
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
|
||||||
PIP_DEFAULT_TIMEOUT: "60"
|
PIP_DEFAULT_TIMEOUT: "60"
|
||||||
|
|
||||||
|
@ -21,7 +21,6 @@ exec_benchmark()
|
|||||||
--dir /tmp/bench-ds/ \
|
--dir /tmp/bench-ds/ \
|
||||||
--models ${model_file} \
|
--models ${model_file} \
|
||||||
--wav /tmp/LDC93S1.wav \
|
--wav /tmp/LDC93S1.wav \
|
||||||
--alphabet /tmp/alphabet.txt \
|
|
||||||
--lm_binary /tmp/lm.binary \
|
--lm_binary /tmp/lm.binary \
|
||||||
--trie /tmp/trie \
|
--trie /tmp/trie \
|
||||||
--csv ${csv}
|
--csv ${csv}
|
||||||
@ -30,7 +29,6 @@ exec_benchmark()
|
|||||||
--dir /tmp/bench-ds-nolm/ \
|
--dir /tmp/bench-ds-nolm/ \
|
||||||
--models ${model_file} \
|
--models ${model_file} \
|
||||||
--wav /tmp/LDC93S1.wav \
|
--wav /tmp/LDC93S1.wav \
|
||||||
--alphabet /tmp/alphabet.txt \
|
|
||||||
--csv ${csv_nolm}
|
--csv ${csv_nolm}
|
||||||
|
|
||||||
python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_plotter.py \
|
python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_plotter.py \
|
||||||
|
@ -309,12 +309,12 @@ check_runtime_electronjs()
|
|||||||
run_tflite_basic_inference_tests()
|
run_tflite_basic_inference_tests()
|
||||||
{
|
{
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
}
|
}
|
||||||
@ -322,22 +322,22 @@ run_tflite_basic_inference_tests()
|
|||||||
run_netframework_inference_tests()
|
run_netframework_inference_tests()
|
||||||
{
|
{
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
|
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
|
||||||
}
|
}
|
||||||
@ -345,22 +345,22 @@ run_netframework_inference_tests()
|
|||||||
run_electronjs_inference_tests()
|
run_electronjs_inference_tests()
|
||||||
{
|
{
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
set -e
|
set -e
|
||||||
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
|
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
|
||||||
}
|
}
|
||||||
@ -368,25 +368,25 @@ run_electronjs_inference_tests()
|
|||||||
run_basic_inference_tests()
|
run_basic_inference_tests()
|
||||||
{
|
{
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status"
|
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status"
|
||||||
@ -397,24 +397,24 @@ run_all_inference_tests()
|
|||||||
run_basic_inference_tests
|
run_basic_inference_tests
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status"
|
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status"
|
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||||
set -e
|
set -e
|
||||||
assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
|
assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||||
set -e
|
set -e
|
||||||
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
||||||
}
|
}
|
||||||
@ -424,7 +424,6 @@ run_prod_concurrent_stream_tests()
|
|||||||
set +e
|
set +e
|
||||||
output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \
|
output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \
|
||||||
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
||||||
--alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt \
|
|
||||||
--lm ${TASKCLUSTER_TMP_DIR}/lm.binary \
|
--lm ${TASKCLUSTER_TMP_DIR}/lm.binary \
|
||||||
--trie ${TASKCLUSTER_TMP_DIR}/trie \
|
--trie ${TASKCLUSTER_TMP_DIR}/trie \
|
||||||
--audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
--audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
||||||
@ -442,25 +441,25 @@ run_prod_concurrent_stream_tests()
|
|||||||
run_prod_inference_tests()
|
run_prod_inference_tests()
|
||||||
{
|
{
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
|
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
|
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status"
|
assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||||
set -e
|
set -e
|
||||||
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
||||||
}
|
}
|
||||||
@ -468,13 +467,13 @@ run_prod_inference_tests()
|
|||||||
run_multi_inference_tests()
|
run_multi_inference_tests()
|
||||||
{
|
{
|
||||||
set +e -o pipefail
|
set +e -o pipefail
|
||||||
multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
||||||
status=$?
|
status=$?
|
||||||
set -e +o pipefail
|
set -e +o pipefail
|
||||||
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status"
|
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status"
|
||||||
|
|
||||||
set +e -o pipefail
|
set +e -o pipefail
|
||||||
multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
||||||
status=$?
|
status=$?
|
||||||
set -e +o pipefail
|
set -e +o pipefail
|
||||||
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status"
|
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status"
|
||||||
@ -483,7 +482,7 @@ run_multi_inference_tests()
|
|||||||
run_cpp_only_inference_tests()
|
run_cpp_only_inference_tests()
|
||||||
{
|
{
|
||||||
set +e
|
set +e
|
||||||
phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
|
phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
|
||||||
status=$?
|
status=$?
|
||||||
set -e
|
set -e
|
||||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status"
|
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status"
|
||||||
@ -566,7 +565,6 @@ download_data()
|
|||||||
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"
|
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"
|
||||||
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
|
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
|
||||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
|
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
|
||||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
|
||||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary
|
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary
|
||||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie
|
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie
|
||||||
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
|
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
|
||||||
@ -579,7 +577,7 @@ download_material()
|
|||||||
download_native_client_files "${target_dir}"
|
download_native_client_files "${target_dir}"
|
||||||
download_data
|
download_data
|
||||||
|
|
||||||
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav
|
||||||
}
|
}
|
||||||
|
|
||||||
download_benchmark_model()
|
download_benchmark_model()
|
||||||
@ -1595,7 +1593,6 @@ android_setup_ndk_data()
|
|||||||
adb push \
|
adb push \
|
||||||
${TASKCLUSTER_TMP_DIR}/${model_name} \
|
${TASKCLUSTER_TMP_DIR}/${model_name} \
|
||||||
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
||||||
${TASKCLUSTER_TMP_DIR}/alphabet.txt \
|
|
||||||
${ANDROID_TMP_DIR}/ds/
|
${ANDROID_TMP_DIR}/ds/
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1606,7 +1603,6 @@ android_setup_apk_data()
|
|||||||
adb push \
|
adb push \
|
||||||
${TASKCLUSTER_TMP_DIR}/${model_name} \
|
${TASKCLUSTER_TMP_DIR}/${model_name} \
|
||||||
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
||||||
${TASKCLUSTER_TMP_DIR}/alphabet.txt \
|
|
||||||
${TASKCLUSTER_TMP_DIR}/lm.binary \
|
${TASKCLUSTER_TMP_DIR}/lm.binary \
|
||||||
${TASKCLUSTER_TMP_DIR}/trie \
|
${TASKCLUSTER_TMP_DIR}/trie \
|
||||||
${ANDROID_TMP_DIR}/test/
|
${ANDROID_TMP_DIR}/test/
|
||||||
|
@ -133,7 +133,6 @@ def create_flags():
|
|||||||
# Decoder
|
# Decoder
|
||||||
|
|
||||||
f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
|
f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
|
||||||
f.DEFINE_alias('alphabet', 'alphabet_config_path')
|
|
||||||
f.DEFINE_string('lm_binary_path', 'data/lm/lm.binary', 'path to the language model binary file created with KenLM')
|
f.DEFINE_string('lm_binary_path', 'data/lm/lm.binary', 'path to the language model binary file created with KenLM')
|
||||||
f.DEFINE_alias('lm', 'lm_binary_path')
|
f.DEFINE_alias('lm', 'lm_binary_path')
|
||||||
f.DEFINE_string('lm_trie_path', 'data/lm/trie', 'path to the language model trie file created with native_client/generate_trie')
|
f.DEFINE_string('lm_trie_path', 'data/lm/trie', 'path to the language model trie file created with native_client/generate_trie')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user