Remove alphabet param usage
This commit is contained in:
parent
8c82081779
commit
3fdc7d422d
@ -7,7 +7,7 @@ extension-pkg-whitelist=
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS
|
||||
ignore=examples
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
|
@ -34,7 +34,7 @@ To install and use deepspeech all you have to do is:
|
||||
tar xvf audio-0.5.1.tar.gz
|
||||
|
||||
# Transcribe an audio file
|
||||
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
||||
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
||||
|
||||
A pre-trained English model is available for use and can be downloaded using `the instructions below <USING.rst#using-a-pre-trained-model>`_. Currently, only 16-bit, 16 kHz, mono-channel WAVE audio files are supported in the Python client. A package with some example audio files is available for download in our `release notes <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
|
||||
|
||||
@ -50,7 +50,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th
|
||||
pip3 install deepspeech-gpu
|
||||
|
||||
# Transcribe an audio file.
|
||||
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
||||
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
|
||||
|
||||
Please ensure you have the required `CUDA dependencies <USING.rst#cuda-dependency>`_.
|
||||
|
||||
|
@ -105,7 +105,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
|
||||
deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
|
||||
|
||||
The arguments ``--lm`` and ``--trie`` are optional, and represent a language model.
|
||||
|
||||
@ -159,7 +159,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio_input.wav
|
||||
./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav
|
||||
|
||||
See the help output with ``./deepspeech -h`` and the `native client README <native_client/README.rst>`_ for more details.
|
||||
|
||||
|
@ -239,7 +239,7 @@ def delete_tree(dir):
|
||||
except IOError:
|
||||
print('No remote directory: %s' % dir)
|
||||
|
||||
def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries):
|
||||
def setup_tempdir(dir, models, wav, lm_binary, trie, binaries):
|
||||
r'''
|
||||
Copy models, libs and binary to a directory (new one if dir is None)
|
||||
'''
|
||||
@ -268,7 +268,7 @@ def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries):
|
||||
print('Copying %s to %s' % (f, dir))
|
||||
shutil.copy2(f, dir)
|
||||
|
||||
for extra_file in [ wav, alphabet, lm_binary, trie ]:
|
||||
for extra_file in [ wav, lm_binary, trie ]:
|
||||
if extra_file and not os.path.isfile(os.path.join(dir, os.path.basename(extra_file))):
|
||||
print('Copying %s to %s' % (extra_file, dir))
|
||||
shutil.copy2(extra_file, dir)
|
||||
@ -375,10 +375,10 @@ def establish_ssh(target=None, auto_trust=False, allow_agent=True, look_keys=Tru
|
||||
|
||||
return ssh_conn
|
||||
|
||||
def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1):
|
||||
def run_benchmarks(dir, models, wav, lm_binary=None, trie=None, iters=-1):
|
||||
r'''
|
||||
Core of the running of the benchmarks. We will run on all of models, against
|
||||
the WAV file provided as wav, and the provided alphabet.
|
||||
the WAV file provided as wav.
|
||||
'''
|
||||
|
||||
assert_valid_dir(dir)
|
||||
@ -396,9 +396,9 @@ def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=
|
||||
}
|
||||
|
||||
if lm_binary and trie:
|
||||
cmdline = './deepspeech --model "%s" --alphabet "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, alphabet, lm_binary, trie, wav)
|
||||
cmdline = './deepspeech --model "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, lm_binary, trie, wav)
|
||||
else:
|
||||
cmdline = './deepspeech --model "%s" --alphabet "%s" --audio "%s" -t' % (model_filename, alphabet, wav)
|
||||
cmdline = './deepspeech --model "%s" --audio "%s" -t' % (model_filename, wav)
|
||||
|
||||
for it in range(iters):
|
||||
sys.stdout.write('\rRunning %s: %d/%d' % (os.path.basename(model), (it+1), iters))
|
||||
@ -453,8 +453,6 @@ def handle_args():
|
||||
help='List of files (protocolbuffer) to work on. Might be a zip file.')
|
||||
parser.add_argument('--wav', required=False,
|
||||
help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.')
|
||||
parser.add_argument('--alphabet', required=False,
|
||||
help='Text file to pass to native_client for the alphabet.')
|
||||
parser.add_argument('--lm_binary', required=False,
|
||||
help='Path to the LM binary file used by the decoder.')
|
||||
parser.add_argument('--trie', required=False,
|
||||
@ -472,8 +470,8 @@ def handle_args():
|
||||
def do_main():
|
||||
cli_args = handle_args()
|
||||
|
||||
if not cli_args.models or not cli_args.wav or not cli_args.alphabet:
|
||||
raise AssertionError('Missing arguments (models, wav or alphabet)')
|
||||
if not cli_args.models or not cli_args.wav:
|
||||
raise AssertionError('Missing arguments (models or wav)')
|
||||
|
||||
if cli_args.dir is not None and not os.path.isdir(cli_args.dir):
|
||||
raise AssertionError('Inexistent temp directory')
|
||||
@ -484,18 +482,17 @@ def do_main():
|
||||
global ssh_conn
|
||||
ssh_conn = establish_ssh(target=cli_args.target, auto_trust=cli_args.autotrust, allow_agent=cli_args.allowagent, look_keys=cli_args.lookforkeys)
|
||||
|
||||
tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, alphabet=cli_args.alphabet, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries)
|
||||
tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries)
|
||||
|
||||
dest_sorted_models = list(map(lambda x: os.path.join(tempdir, os.path.basename(x)), sorted_models))
|
||||
dest_wav = os.path.join(tempdir, os.path.basename(cli_args.wav))
|
||||
dest_alphabet = os.path.join(tempdir, os.path.basename(cli_args.alphabet))
|
||||
|
||||
if cli_args.lm_binary and cli_args.trie:
|
||||
dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary))
|
||||
dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie))
|
||||
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
|
||||
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
|
||||
else:
|
||||
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)
|
||||
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, iters=cli_args.iters)
|
||||
|
||||
if cli_args.csv:
|
||||
produce_csv(input=inference_times, output=cli_args.csv)
|
||||
|
@ -23,16 +23,16 @@ This module should be self-contained:
|
||||
- pip install native_client/python/dist/deepspeech*.whl
|
||||
- pip install -r requirements_eval_tflite.txt
|
||||
|
||||
Then run with a TF Lite model, alphabet, LM/trie and a CSV test file
|
||||
Then run with a TF Lite model, LM/trie and a CSV test file
|
||||
'''
|
||||
|
||||
BEAM_WIDTH = 500
|
||||
LM_ALPHA = 0.75
|
||||
LM_BETA = 1.85
|
||||
|
||||
def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask):
|
||||
def tflite_worker(model, lm, trie, queue_in, queue_out, gpu_mask):
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask)
|
||||
ds = Model(model, alphabet, BEAM_WIDTH)
|
||||
ds = Model(model, BEAM_WIDTH)
|
||||
ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
|
||||
|
||||
while True:
|
||||
@ -58,8 +58,6 @@ def main():
|
||||
parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
|
||||
parser.add_argument('--model', required=True,
|
||||
help='Path to the model (protocol buffer binary file)')
|
||||
parser.add_argument('--alphabet', required=True,
|
||||
help='Path to the configuration file specifying the alphabet used by the network')
|
||||
parser.add_argument('--lm', required=True,
|
||||
help='Path to the language model binary file')
|
||||
parser.add_argument('--trie', required=True,
|
||||
@ -78,7 +76,7 @@ def main():
|
||||
|
||||
processes = []
|
||||
for i in range(args.proc):
|
||||
worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
|
||||
worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
|
||||
worker_process.start() # Launch reader() as a separate python process
|
||||
processes.append(worker_process)
|
||||
|
||||
|
@ -22,14 +22,12 @@ Here is an example for a local audio file:
|
||||
```bash
|
||||
node ./index.js --audio <AUDIO_FILE> \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
|
||||
Here is an example for a remote RTMP-Stream:
|
||||
```bash
|
||||
node ./index.js --audio rtmp://<IP>:1935/live/teststream \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
|
||||
## Examples
|
||||
@ -39,21 +37,18 @@ node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
```bash
|
||||
node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
```bash
|
||||
node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
Real time streaming inference in combination with a RTMP server.
|
||||
```bash
|
||||
@ -61,7 +56,6 @@ node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
|
||||
--lm $HOME/models/lm.binary \
|
||||
--trie $HOME/models/trie \
|
||||
--model $HOME/models/output_graph.pbmm \
|
||||
--alphabet $HOME/models/alphabet.txt
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
@ -32,7 +32,6 @@ VersionAction.prototype.call = function(parser) {
|
||||
|
||||
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
|
||||
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
|
||||
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
|
||||
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
||||
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
||||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
|
||||
@ -45,7 +44,7 @@ function totalTime(hrtimeValue) {
|
||||
|
||||
console.error('Loading model from file %s', args['model']);
|
||||
const model_load_start = process.hrtime();
|
||||
let model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
|
||||
let model = new Ds.Model(args['model'], BEAM_WIDTH);
|
||||
const model_load_end = process.hrtime(model_load_start);
|
||||
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
||||
|
||||
|
@ -13,18 +13,15 @@ pushd ${THIS}
|
||||
node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
|
||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||
--trie $HOME/DeepSpeech/models/trie \
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm
|
||||
|
||||
node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
|
||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||
--trie $HOME/DeepSpeech/models/trie \
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm
|
||||
|
||||
node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
|
||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||
--trie $HOME/DeepSpeech/models/trie \
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm
|
||||
popd
|
||||
|
@ -29,7 +29,7 @@ Usage
|
||||
.. code-block::
|
||||
|
||||
usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner]
|
||||
[-w SAVEWAV] -m MODEL [-a ALPHABET] [-l LM]
|
||||
[-w SAVEWAV] -m MODEL [-l LM]
|
||||
[-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT]
|
||||
[-la LM_ALPHA] [-lb LM_BETA]
|
||||
[-bw BEAM_WIDTH]
|
||||
@ -49,9 +49,6 @@ Usage
|
||||
Path to the model (protocol buffer binary file, or
|
||||
entire directory containing all standard-named files
|
||||
for model)
|
||||
-a ALPHABET, --alphabet ALPHABET
|
||||
Path to the configuration file specifying the alphabet
|
||||
used by the network. Default: alphabet.txt
|
||||
-l LM, --lm LM Path to the language model binary file. Default:
|
||||
lm.binary
|
||||
-t TRIE, --trie TRIE Path to the language model trie file created with
|
||||
|
@ -156,14 +156,12 @@ def main(ARGS):
|
||||
if os.path.isdir(ARGS.model):
|
||||
model_dir = ARGS.model
|
||||
ARGS.model = os.path.join(model_dir, 'output_graph.pb')
|
||||
ARGS.alphabet = os.path.join(model_dir, ARGS.alphabet if ARGS.alphabet else 'alphabet.txt')
|
||||
ARGS.lm = os.path.join(model_dir, ARGS.lm)
|
||||
ARGS.trie = os.path.join(model_dir, ARGS.trie)
|
||||
|
||||
print('Initializing model...')
|
||||
logging.info("ARGS.model: %s", ARGS.model)
|
||||
logging.info("ARGS.alphabet: %s", ARGS.alphabet)
|
||||
model = deepspeech.Model(ARGS.model, ARGS.alphabet, ARGS.beam_width)
|
||||
model = deepspeech.Model(ARGS.model, ARGS.beam_width)
|
||||
if ARGS.lm and ARGS.trie:
|
||||
logging.info("ARGS.lm: %s", ARGS.lm)
|
||||
logging.info("ARGS.trie: %s", ARGS.trie)
|
||||
@ -219,8 +217,6 @@ if __name__ == '__main__':
|
||||
|
||||
parser.add_argument('-m', '--model', required=True,
|
||||
help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
|
||||
parser.add_argument('-a', '--alphabet', default='alphabet.txt',
|
||||
help="Path to the configuration file specifying the alphabet used by the network. Default: alphabet.txt")
|
||||
parser.add_argument('-l', '--lm', default='lm.binary',
|
||||
help="Path to the language model binary file. Default: lm.binary")
|
||||
parser.add_argument('-t', '--trie', default='trie',
|
||||
|
@ -14,7 +14,6 @@ pushd ${THIS}
|
||||
|
||||
python mic_vad_streaming.py \
|
||||
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||
--alphabet $HOME/DeepSpeech/models/alphabet.txt \
|
||||
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||
--trie $HOME/DeepSpeech/models/trie \
|
||||
--file $HOME/DeepSpeech/audio/2830-3980-0043.wav
|
||||
|
@ -77,7 +77,7 @@ namespace DeepSpeechWPF
|
||||
{
|
||||
try
|
||||
{
|
||||
_sttClient.CreateModel("output_graph.pbmm", "alphabet.txt", BEAM_WIDTH);
|
||||
_sttClient.CreateModel("output_graph.pbmm", BEAM_WIDTH);
|
||||
Dispatcher.Invoke(() => { EnableControls(); });
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
@ -11,7 +11,6 @@ Edit references to models path if necessary:
|
||||
|
||||
```
|
||||
let modelPath = './models/output_graph.pbmm';
|
||||
let alphabetPath = './models/alphabet.txt';
|
||||
let lmPath = './models/lm.binary';
|
||||
let triePath = './models/trie';
|
||||
```
|
||||
|
@ -7,9 +7,8 @@ const Wav = require('node-wav');
|
||||
|
||||
const BEAM_WIDTH = 1024;
|
||||
let modelPath = './models/output_graph.pbmm';
|
||||
let alphabetPath = './models/alphabet.txt';
|
||||
|
||||
let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH);
|
||||
let model = new DeepSpeech.Model(modelPath, BEAM_WIDTH);
|
||||
|
||||
let desiredSampleRate = model.sampleRate();
|
||||
|
||||
|
@ -18,7 +18,7 @@ def main(args):
|
||||
parser.add_argument('--audio', required=False,
|
||||
help='Path to the audio file to run (WAV format)')
|
||||
parser.add_argument('--model', required=True,
|
||||
help='Path to directory that contains all model files (output_graph, lm, trie and alphabet)')
|
||||
help='Path to directory that contains all model files (output_graph, lm and trie)')
|
||||
parser.add_argument('--stream', required=False, action='store_true',
|
||||
help='To use deepspeech streaming interface')
|
||||
args = parser.parse_args()
|
||||
@ -34,10 +34,10 @@ def main(args):
|
||||
dirName = os.path.expanduser(args.model)
|
||||
|
||||
# Resolve all the paths of model files
|
||||
output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName)
|
||||
output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
|
||||
|
||||
# Load output_graph, alpahbet, lm and trie
|
||||
model_retval = wavTranscriber.load_model(output_graph, alphabet, lm, trie)
|
||||
model_retval = wavTranscriber.load_model(output_graph, lm, trie)
|
||||
|
||||
if args.audio is not None:
|
||||
title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']
|
||||
|
@ -109,7 +109,7 @@ class App(QMainWindow):
|
||||
self.microphone = QRadioButton("Microphone")
|
||||
self.fileUpload = QRadioButton("File Upload")
|
||||
self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian")
|
||||
self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, alphabet, lm & trie")
|
||||
self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, lm & trie")
|
||||
self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription")
|
||||
self.browseButton = QPushButton('Browse', self)
|
||||
self.browseButton.setToolTip('Select a wav file')
|
||||
@ -238,9 +238,9 @@ class App(QMainWindow):
|
||||
|
||||
def modelResult(self, dirName):
|
||||
# Fetch and Resolve all the paths of model files
|
||||
output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName)
|
||||
output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
|
||||
# Load output_graph, alpahbet, lm and trie
|
||||
self.model = wavTranscriber.load_model(output_graph, alphabet, lm, trie)
|
||||
self.model = wavTranscriber.load_model(output_graph, lm, trie)
|
||||
|
||||
def modelFinish(self):
|
||||
# self.timer.stop()
|
||||
|
@ -8,20 +8,19 @@ from timeit import default_timer as timer
|
||||
'''
|
||||
Load the pre-trained model into the memory
|
||||
@param models: Output Grapgh Protocol Buffer file
|
||||
@param alphabet: Alphabet.txt file
|
||||
@param lm: Language model file
|
||||
@param trie: Trie file
|
||||
|
||||
@Retval
|
||||
Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
|
||||
'''
|
||||
def load_model(models, alphabet, lm, trie):
|
||||
def load_model(models, lm, trie):
|
||||
BEAM_WIDTH = 500
|
||||
LM_ALPHA = 0.75
|
||||
LM_BETA = 1.85
|
||||
|
||||
model_load_start = timer()
|
||||
ds = Model(models, alphabet, BEAM_WIDTH)
|
||||
ds = Model(models, BEAM_WIDTH)
|
||||
model_load_end = timer() - model_load_start
|
||||
logging.debug("Loaded model in %0.3fs." % (model_load_end))
|
||||
|
||||
@ -61,21 +60,18 @@ Resolve directory path for the models and fetch each of them.
|
||||
@param dirName: Path to the directory containing pre-trained models
|
||||
|
||||
@Retval:
|
||||
Retunns a tuple containing each of the model files (pb, alphabet, lm and trie)
|
||||
Retunns a tuple containing each of the model files (pb, lm and trie)
|
||||
'''
|
||||
def resolve_models(dirName):
|
||||
pb = glob.glob(dirName + "/*.pb")[0]
|
||||
logging.debug("Found Model: %s" % pb)
|
||||
|
||||
alphabet = glob.glob(dirName + "/alphabet.txt")[0]
|
||||
logging.debug("Found Alphabet: %s" % alphabet)
|
||||
|
||||
lm = glob.glob(dirName + "/lm.binary")[0]
|
||||
trie = glob.glob(dirName + "/trie")[0]
|
||||
logging.debug("Found Language Model: %s" % lm)
|
||||
logging.debug("Found Trie: %s" % trie)
|
||||
|
||||
return pb, alphabet, lm, trie
|
||||
return pb, lm, trie
|
||||
|
||||
'''
|
||||
Generate VAD segments. Filters out non-voiced audio frames.
|
||||
|
@ -12,8 +12,6 @@
|
||||
|
||||
char* model = NULL;
|
||||
|
||||
char* alphabet = NULL;
|
||||
|
||||
char* lm = NULL;
|
||||
|
||||
char* trie = NULL;
|
||||
@ -41,12 +39,11 @@ int stream_size = 0;
|
||||
void PrintHelp(const char* bin)
|
||||
{
|
||||
std::cout <<
|
||||
"Usage: " << bin << " --model MODEL --alphabet ALPHABET [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n"
|
||||
"Usage: " << bin << " --model MODEL [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n"
|
||||
"\n"
|
||||
"Running DeepSpeech inference.\n"
|
||||
"\n"
|
||||
" --model MODEL Path to the model (protocol buffer binary file)\n"
|
||||
" --alphabet ALPHABET Path to the configuration file specifying the alphabet used by the network\n"
|
||||
" --lm LM Path to the language model binary file\n"
|
||||
" --trie TRIE Path to the language model trie file created with native_client/generate_trie\n"
|
||||
" --audio AUDIO Path to the audio file to run (WAV format)\n"
|
||||
@ -68,7 +65,6 @@ bool ProcessArgs(int argc, char** argv)
|
||||
const char* const short_opts = "m:a:l:r:w:c:d:b:tehv";
|
||||
const option long_opts[] = {
|
||||
{"model", required_argument, nullptr, 'm'},
|
||||
{"alphabet", required_argument, nullptr, 'a'},
|
||||
{"lm", required_argument, nullptr, 'l'},
|
||||
{"trie", required_argument, nullptr, 'r'},
|
||||
{"audio", required_argument, nullptr, 'w'},
|
||||
@ -98,10 +94,6 @@ bool ProcessArgs(int argc, char** argv)
|
||||
model = optarg;
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
alphabet = optarg;
|
||||
break;
|
||||
|
||||
case 'l':
|
||||
lm = optarg;
|
||||
break;
|
||||
@ -163,7 +155,7 @@ bool ProcessArgs(int argc, char** argv)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!model || !alphabet || !audio) {
|
||||
if (!model || !audio) {
|
||||
PrintHelp(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
@ -29,36 +29,26 @@ namespace DeepSpeechClient
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
public unsafe void CreateModel(string aModelPath,
|
||||
string aAlphabetConfigPath, uint aBeamWidth)
|
||||
uint aBeamWidth)
|
||||
{
|
||||
string exceptionMessage = null;
|
||||
if (string.IsNullOrWhiteSpace(aModelPath))
|
||||
{
|
||||
exceptionMessage = "Model path cannot be empty.";
|
||||
}
|
||||
if (string.IsNullOrWhiteSpace(aAlphabetConfigPath))
|
||||
{
|
||||
exceptionMessage = "Alphabet path cannot be empty.";
|
||||
}
|
||||
if (!File.Exists(aModelPath))
|
||||
{
|
||||
exceptionMessage = $"Cannot find the model file: {aModelPath}";
|
||||
}
|
||||
if (!File.Exists(aAlphabetConfigPath))
|
||||
{
|
||||
exceptionMessage = $"Cannot find the alphabet file: {aAlphabetConfigPath}";
|
||||
}
|
||||
|
||||
if (exceptionMessage != null)
|
||||
{
|
||||
throw new FileNotFoundException(exceptionMessage);
|
||||
}
|
||||
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
||||
aAlphabetConfigPath,
|
||||
aBeamWidth,
|
||||
ref _modelStatePP);
|
||||
EvaluateResultCode(resultCode);
|
||||
@ -86,7 +76,7 @@ namespace DeepSpeechClient
|
||||
case ErrorCodes.DS_ERR_NO_MODEL:
|
||||
throw new ArgumentException("Missing model information.");
|
||||
case ErrorCodes.DS_ERR_INVALID_ALPHABET:
|
||||
throw new ArgumentException("Invalid alphabet file or invalid alphabet size.");
|
||||
throw new ArgumentException("Invalid alphabet embedded in model. (Data corruption?)");
|
||||
case ErrorCodes.DS_ERR_INVALID_SHAPE:
|
||||
throw new ArgumentException("Invalid model shape.");
|
||||
case ErrorCodes.DS_ERR_INVALID_LM:
|
||||
|
@ -17,11 +17,9 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
unsafe void CreateModel(string aModelPath,
|
||||
string aAlphabetConfigPath,
|
||||
uint aBeamWidth);
|
||||
|
||||
/// <summary>
|
||||
|
@ -16,7 +16,6 @@ namespace DeepSpeechClient
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||
string aAlphabetConfigPath,
|
||||
uint aBeamWidth,
|
||||
ref IntPtr** pint);
|
||||
|
||||
|
@ -35,7 +35,6 @@ namespace CSharpExamples
|
||||
static void Main(string[] args)
|
||||
{
|
||||
string model = null;
|
||||
string alphabet = null;
|
||||
string lm = null;
|
||||
string trie = null;
|
||||
string audio = null;
|
||||
@ -43,7 +42,6 @@ namespace CSharpExamples
|
||||
if (args.Length > 0)
|
||||
{
|
||||
model = GetArgument(args, "--model");
|
||||
alphabet = GetArgument(args, "--alphabet");
|
||||
lm = GetArgument(args, "--lm");
|
||||
trie = GetArgument(args, "--trie");
|
||||
audio = GetArgument(args, "--audio");
|
||||
@ -64,7 +62,6 @@ namespace CSharpExamples
|
||||
stopwatch.Start();
|
||||
sttClient.CreateModel(
|
||||
model ?? "output_graph.pbmm",
|
||||
alphabet ?? "alphabet.txt",
|
||||
BEAM_WIDTH);
|
||||
stopwatch.Stop();
|
||||
|
||||
|
@ -51,7 +51,6 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
|
||||
|
||||
|
||||
* ``output_graph.tflite`` which is the TF Lite model
|
||||
* ``alphabet.txt``
|
||||
* ``lm.binary`` and ``trie`` files, if you want to use the language model ; please
|
||||
be aware that too big language model will make the device run out of memory
|
||||
|
||||
|
@ -23,7 +23,6 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||
DeepSpeechModel _m = null;
|
||||
|
||||
EditText _tfliteModel;
|
||||
EditText _alphabet;
|
||||
EditText _audioFile;
|
||||
|
||||
TextView _decodedString;
|
||||
@ -49,10 +48,10 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||
return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24);
|
||||
}
|
||||
|
||||
private void newModel(String tfliteModel, String alphabet) {
|
||||
private void newModel(String tfliteModel) {
|
||||
this._tfliteStatus.setText("Creating model");
|
||||
if (this._m == null) {
|
||||
this._m = new DeepSpeechModel(tfliteModel, alphabet, BEAM_WIDTH);
|
||||
this._m = new DeepSpeechModel(tfliteModel, BEAM_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
@ -61,7 +60,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||
|
||||
this._startInference.setEnabled(false);
|
||||
|
||||
this.newModel(this._tfliteModel.getText().toString(), this._alphabet.getText().toString());
|
||||
this.newModel(this._tfliteModel.getText().toString());
|
||||
|
||||
this._tfliteStatus.setText("Extracting audio features ...");
|
||||
|
||||
@ -128,13 +127,11 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||
this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus);
|
||||
|
||||
this._tfliteModel = (EditText) findViewById(R.id.tfliteModel);
|
||||
this._alphabet = (EditText) findViewById(R.id.alphabet);
|
||||
this._audioFile = (EditText) findViewById(R.id.audioFile);
|
||||
|
||||
this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite");
|
||||
this._tfliteStatus.setText("Ready, waiting ...");
|
||||
|
||||
this._alphabet.setText("/sdcard/deepspeech/alphabet.txt");
|
||||
this._audioFile.setText("/sdcard/deepspeech/audio.wav");
|
||||
|
||||
this._startInference = (Button) findViewById(R.id.btnStartInference);
|
||||
|
@ -97,25 +97,6 @@
|
||||
android:inputType="text" />
|
||||
</LinearLayout>
|
||||
|
||||
<LinearLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:orientation="horizontal">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/lblAlphabet"
|
||||
android:layout_width="263dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:text="Alphabet" />
|
||||
|
||||
<EditText
|
||||
android:id="@+id/alphabet"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:inputType="text" />
|
||||
</LinearLayout>
|
||||
|
||||
<LinearLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
|
@ -30,7 +30,6 @@ import java.nio.ByteBuffer;
|
||||
public class BasicTest {
|
||||
|
||||
public static final String modelFile = "/data/local/tmp/test/output_graph.tflite";
|
||||
public static final String alphabetFile = "/data/local/tmp/test/alphabet.txt";
|
||||
public static final String lmFile = "/data/local/tmp/test/lm.binary";
|
||||
public static final String trieFile = "/data/local/tmp/test/trie";
|
||||
public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav";
|
||||
@ -64,7 +63,7 @@ public class BasicTest {
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_basic() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||
m.freeModel();
|
||||
}
|
||||
|
||||
@ -121,7 +120,7 @@ public class BasicTest {
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_stt_noLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||
|
||||
String decoded = doSTT(m, false);
|
||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||
@ -130,7 +129,7 @@ public class BasicTest {
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_stt_withLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
||||
|
||||
String decoded = doSTT(m, false);
|
||||
@ -140,7 +139,7 @@ public class BasicTest {
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_sttWithMetadata_noLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||
|
||||
String decoded = doSTT(m, true);
|
||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||
@ -149,7 +148,7 @@ public class BasicTest {
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_sttWithMetadata_withLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
|
||||
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
||||
|
||||
String decoded = doSTT(m, true);
|
||||
|
@ -20,15 +20,13 @@ public class DeepSpeechModel {
|
||||
* @constructor
|
||||
*
|
||||
* @param modelPath The path to the frozen model graph.
|
||||
* @param alphabetPath The path to the configuration file specifying
|
||||
* the alphabet used by the network. See alphabet.h.
|
||||
* @param beam_width The beam width used by the decoder. A larger beam
|
||||
* width generates better results at the cost of decoding
|
||||
* time.
|
||||
*/
|
||||
public DeepSpeechModel(String modelPath, String alphabetPath, int beam_width) {
|
||||
public DeepSpeechModel(String modelPath, int beam_width) {
|
||||
this._mspp = impl.new_modelstatep();
|
||||
impl.CreateModel(modelPath, alphabetPath, beam_width, this._mspp);
|
||||
impl.CreateModel(modelPath, beam_width, this._mspp);
|
||||
this._msp = impl.modelstatep_value(this._mspp);
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@ Once everything is installed, you can then use the `deepspeech` binary to do spe
|
||||
|
||||
pip3 install deepspeech
|
||||
|
||||
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||
|
||||
```
|
||||
|
||||
@ -27,7 +27,7 @@ Alternatively, quicker inference can be performed using a supported NVIDIA GPU o
|
||||
|
||||
pip3 install deepspeech-gpu
|
||||
|
||||
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||
|
||||
```
|
||||
|
||||
@ -223,7 +223,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
||||
|
||||
```bash
|
||||
|
||||
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
|
||||
|
||||
```
|
||||
|
||||
@ -290,7 +290,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
||||
|
||||
```bash
|
||||
|
||||
./deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio*input.wav
|
||||
./deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio audio*input.wav
|
||||
|
||||
```
|
||||
|
||||
|
@ -29,7 +29,6 @@ VersionAction.prototype.call = function(parser) {
|
||||
|
||||
var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
|
||||
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
|
||||
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
|
||||
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
|
||||
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
|
||||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
|
||||
@ -55,7 +54,7 @@ function metadataToString(metadata) {
|
||||
|
||||
console.error('Loading model from file %s', args['model']);
|
||||
const model_load_start = process.hrtime();
|
||||
var model = new Ds.Model(args['model'], args['alphabet'], args['beam_width']);
|
||||
var model = new Ds.Model(args['model'], args['beam_width']);
|
||||
const model_load_end = process.hrtime(model_load_start);
|
||||
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
||||
|
||||
|
@ -25,7 +25,6 @@ if (process.platform === 'win32') {
|
||||
* An object providing an interface to a trained DeepSpeech model.
|
||||
*
|
||||
* @param {string} aModelPath The path to the frozen model graph.
|
||||
* @param {string} aAlphabetConfigPath The path to the configuration file specifying the alphabet used by the network. See alphabet.h.
|
||||
* @param {number} aBeamWidth The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
|
||||
*
|
||||
* @throws on error
|
||||
|
@ -30,9 +30,6 @@ class Model(object):
|
||||
:param aModelPath: Path to model file to load
|
||||
:type aModelPath: str
|
||||
|
||||
:param aAlphabetConfigPath: Path to alphabet file to load
|
||||
:type aAlphabetConfigPath: str
|
||||
|
||||
:param aBeamWidth: Decoder beam width
|
||||
:type aBeamWidth: int
|
||||
"""
|
||||
|
@ -46,8 +46,6 @@ def main():
|
||||
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
||||
parser.add_argument('--model', required=True,
|
||||
help='Path to the model (protocol buffer binary file)')
|
||||
parser.add_argument('--alphabet', required=True,
|
||||
help='Path to the configuration file specifying the alphabet used by the network')
|
||||
parser.add_argument('--lm', nargs='?',
|
||||
help='Path to the language model binary file')
|
||||
parser.add_argument('--trie', nargs='?',
|
||||
@ -68,7 +66,7 @@ def main():
|
||||
|
||||
print('Loading model from file {}'.format(args.model), file=sys.stderr)
|
||||
model_load_start = timer()
|
||||
ds = Model(args.model, args.alphabet, args.beam_width)
|
||||
ds = Model(args.model, args.beam_width)
|
||||
model_load_end = timer() - model_load_start
|
||||
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
||||
|
||||
|
@ -25,8 +25,6 @@ def main():
|
||||
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
||||
parser.add_argument('--model', required=True,
|
||||
help='Path to the model (protocol buffer binary file)')
|
||||
parser.add_argument('--alphabet', required=True,
|
||||
help='Path to the configuration file specifying the alphabet used by the network')
|
||||
parser.add_argument('--lm', nargs='?',
|
||||
help='Path to the language model binary file')
|
||||
parser.add_argument('--trie', nargs='?',
|
||||
@ -37,7 +35,7 @@ def main():
|
||||
help='Second audio file to use in interleaved streams')
|
||||
args = parser.parse_args()
|
||||
|
||||
ds = Model(args.model, args.alphabet, BEAM_WIDTH)
|
||||
ds = Model(args.model, BEAM_WIDTH)
|
||||
|
||||
if args.lm and args.trie:
|
||||
ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA)
|
||||
|
@ -30,7 +30,7 @@ then:
|
||||
image: ${build.docker_image}
|
||||
|
||||
env:
|
||||
DEEPSPEECH_MODEL: "https://github.com/lissyx/DeepSpeech/releases/download/test-model-0.6.0a10/models.tar.gz"
|
||||
DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.11/models.tar.gz"
|
||||
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
|
||||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
|
||||
|
@ -21,7 +21,6 @@ exec_benchmark()
|
||||
--dir /tmp/bench-ds/ \
|
||||
--models ${model_file} \
|
||||
--wav /tmp/LDC93S1.wav \
|
||||
--alphabet /tmp/alphabet.txt \
|
||||
--lm_binary /tmp/lm.binary \
|
||||
--trie /tmp/trie \
|
||||
--csv ${csv}
|
||||
@ -30,7 +29,6 @@ exec_benchmark()
|
||||
--dir /tmp/bench-ds-nolm/ \
|
||||
--models ${model_file} \
|
||||
--wav /tmp/LDC93S1.wav \
|
||||
--alphabet /tmp/alphabet.txt \
|
||||
--csv ${csv_nolm}
|
||||
|
||||
python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_plotter.py \
|
||||
|
@ -309,12 +309,12 @@ check_runtime_electronjs()
|
||||
run_tflite_basic_inference_tests()
|
||||
{
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
}
|
||||
@ -322,22 +322,22 @@ run_tflite_basic_inference_tests()
|
||||
run_netframework_inference_tests()
|
||||
{
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
|
||||
}
|
||||
@ -345,22 +345,22 @@ run_netframework_inference_tests()
|
||||
run_electronjs_inference_tests()
|
||||
{
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
set -e
|
||||
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
|
||||
}
|
||||
@ -368,25 +368,25 @@ run_electronjs_inference_tests()
|
||||
run_basic_inference_tests()
|
||||
{
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status"
|
||||
@ -397,24 +397,24 @@ run_all_inference_tests()
|
||||
run_basic_inference_tests
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||
phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||
set -e
|
||||
assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||
set -e
|
||||
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
||||
}
|
||||
@ -424,7 +424,6 @@ run_prod_concurrent_stream_tests()
|
||||
set +e
|
||||
output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \
|
||||
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
|
||||
--alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt \
|
||||
--lm ${TASKCLUSTER_TMP_DIR}/lm.binary \
|
||||
--trie ${TASKCLUSTER_TMP_DIR}/trie \
|
||||
--audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
||||
@ -442,25 +441,25 @@ run_prod_concurrent_stream_tests()
|
||||
run_prod_inference_tests()
|
||||
{
|
||||
set +e
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status"
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
|
||||
set -e
|
||||
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
||||
}
|
||||
@ -468,13 +467,13 @@ run_prod_inference_tests()
|
||||
run_multi_inference_tests()
|
||||
{
|
||||
set +e -o pipefail
|
||||
multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
||||
multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
||||
status=$?
|
||||
set -e +o pipefail
|
||||
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status"
|
||||
|
||||
set +e -o pipefail
|
||||
multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
||||
multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
|
||||
status=$?
|
||||
set -e +o pipefail
|
||||
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status"
|
||||
@ -483,7 +482,7 @@ run_multi_inference_tests()
|
||||
run_cpp_only_inference_tests()
|
||||
{
|
||||
set +e
|
||||
phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
|
||||
phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
|
||||
status=$?
|
||||
set -e
|
||||
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status"
|
||||
@ -566,7 +565,6 @@ download_data()
|
||||
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"
|
||||
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie
|
||||
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
|
||||
@ -579,7 +577,7 @@ download_material()
|
||||
download_native_client_files "${target_dir}"
|
||||
download_data
|
||||
|
||||
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
||||
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav
|
||||
}
|
||||
|
||||
download_benchmark_model()
|
||||
@ -1595,7 +1593,6 @@ android_setup_ndk_data()
|
||||
adb push \
|
||||
${TASKCLUSTER_TMP_DIR}/${model_name} \
|
||||
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
||||
${TASKCLUSTER_TMP_DIR}/alphabet.txt \
|
||||
${ANDROID_TMP_DIR}/ds/
|
||||
}
|
||||
|
||||
@ -1606,7 +1603,6 @@ android_setup_apk_data()
|
||||
adb push \
|
||||
${TASKCLUSTER_TMP_DIR}/${model_name} \
|
||||
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
|
||||
${TASKCLUSTER_TMP_DIR}/alphabet.txt \
|
||||
${TASKCLUSTER_TMP_DIR}/lm.binary \
|
||||
${TASKCLUSTER_TMP_DIR}/trie \
|
||||
${ANDROID_TMP_DIR}/test/
|
||||
|
@ -133,7 +133,6 @@ def create_flags():
|
||||
# Decoder
|
||||
|
||||
f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
|
||||
f.DEFINE_alias('alphabet', 'alphabet_config_path')
|
||||
f.DEFINE_string('lm_binary_path', 'data/lm/lm.binary', 'path to the language model binary file created with KenLM')
|
||||
f.DEFINE_alias('lm', 'lm_binary_path')
|
||||
f.DEFINE_string('lm_trie_path', 'data/lm/trie', 'path to the language model trie file created with native_client/generate_trie')
|
||||
|
Loading…
x
Reference in New Issue
Block a user