Remove alphabet param usage

This commit is contained in:
Reuben Morais 2019-11-01 13:12:35 +01:00
parent 8c82081779
commit 3fdc7d422d
37 changed files with 86 additions and 181 deletions

View File

@ -7,7 +7,7 @@ extension-pkg-whitelist=
# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS
ignore=examples
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.

View File

@ -34,7 +34,7 @@ To install and use deepspeech all you have to do is:
tar xvf audio-0.5.1.tar.gz
# Transcribe an audio file
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
A pre-trained English model is available for use and can be downloaded using `the instructions below <USING.rst#using-a-pre-trained-model>`_. Currently, only 16-bit, 16 kHz, mono-channel WAVE audio files are supported in the Python client. A package with some example audio files is available for download in our `release notes <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
@ -50,7 +50,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th
pip3 install deepspeech-gpu
# Transcribe an audio file.
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --alphabet deepspeech-0.5.1-models/alphabet.txt --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
deepspeech --model deepspeech-0.5.1-models/output_graph.pbmm --lm deepspeech-0.5.1-models/lm.binary --trie deepspeech-0.5.1-models/trie --audio audio/2830-3980-0043.wav
Please ensure you have the required `CUDA dependencies <USING.rst#cuda-dependency>`_.

View File

@ -105,7 +105,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
.. code-block:: bash
deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
The arguments ``--lm`` and ``--trie`` are optional, and represent a language model.
@ -159,7 +159,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
.. code-block:: bash
./deepspeech --model models/output_graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio_input.wav
./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav
See the help output with ``./deepspeech -h`` and the `native client README <native_client/README.rst>`_ for more details.

View File

@ -239,7 +239,7 @@ def delete_tree(dir):
except IOError:
print('No remote directory: %s' % dir)
def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries):
def setup_tempdir(dir, models, wav, lm_binary, trie, binaries):
r'''
Copy models, libs and binary to a directory (new one if dir is None)
'''
@ -268,7 +268,7 @@ def setup_tempdir(dir, models, wav, alphabet, lm_binary, trie, binaries):
print('Copying %s to %s' % (f, dir))
shutil.copy2(f, dir)
for extra_file in [ wav, alphabet, lm_binary, trie ]:
for extra_file in [ wav, lm_binary, trie ]:
if extra_file and not os.path.isfile(os.path.join(dir, os.path.basename(extra_file))):
print('Copying %s to %s' % (extra_file, dir))
shutil.copy2(extra_file, dir)
@ -375,10 +375,10 @@ def establish_ssh(target=None, auto_trust=False, allow_agent=True, look_keys=Tru
return ssh_conn
def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1):
def run_benchmarks(dir, models, wav, lm_binary=None, trie=None, iters=-1):
r'''
Core of the running of the benchmarks. We will run on all of models, against
the WAV file provided as wav, and the provided alphabet.
the WAV file provided as wav.
'''
assert_valid_dir(dir)
@ -396,9 +396,9 @@ def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=
}
if lm_binary and trie:
cmdline = './deepspeech --model "%s" --alphabet "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, alphabet, lm_binary, trie, wav)
cmdline = './deepspeech --model "%s" --lm "%s" --trie "%s" --audio "%s" -t' % (model_filename, lm_binary, trie, wav)
else:
cmdline = './deepspeech --model "%s" --alphabet "%s" --audio "%s" -t' % (model_filename, alphabet, wav)
cmdline = './deepspeech --model "%s" --audio "%s" -t' % (model_filename, wav)
for it in range(iters):
sys.stdout.write('\rRunning %s: %d/%d' % (os.path.basename(model), (it+1), iters))
@ -453,8 +453,6 @@ def handle_args():
help='List of files (protocolbuffer) to work on. Might be a zip file.')
parser.add_argument('--wav', required=False,
help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.')
parser.add_argument('--alphabet', required=False,
help='Text file to pass to native_client for the alphabet.')
parser.add_argument('--lm_binary', required=False,
help='Path to the LM binary file used by the decoder.')
parser.add_argument('--trie', required=False,
@ -472,8 +470,8 @@ def handle_args():
def do_main():
cli_args = handle_args()
if not cli_args.models or not cli_args.wav or not cli_args.alphabet:
raise AssertionError('Missing arguments (models, wav or alphabet)')
if not cli_args.models or not cli_args.wav:
raise AssertionError('Missing arguments (models or wav)')
if cli_args.dir is not None and not os.path.isdir(cli_args.dir):
raise AssertionError('Inexistent temp directory')
@ -484,18 +482,17 @@ def do_main():
global ssh_conn
ssh_conn = establish_ssh(target=cli_args.target, auto_trust=cli_args.autotrust, allow_agent=cli_args.allowagent, look_keys=cli_args.lookforkeys)
tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, alphabet=cli_args.alphabet, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries)
tempdir, sorted_models = setup_tempdir(dir=cli_args.dir, models=cli_args.models, wav=cli_args.wav, lm_binary=cli_args.lm_binary, trie=cli_args.trie, binaries=cli_args.binaries)
dest_sorted_models = list(map(lambda x: os.path.join(tempdir, os.path.basename(x)), sorted_models))
dest_wav = os.path.join(tempdir, os.path.basename(cli_args.wav))
dest_alphabet = os.path.join(tempdir, os.path.basename(cli_args.alphabet))
if cli_args.lm_binary and cli_args.trie:
dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary))
dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie))
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
else:
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)
inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, iters=cli_args.iters)
if cli_args.csv:
produce_csv(input=inference_times, output=cli_args.csv)

View File

@ -23,16 +23,16 @@ This module should be self-contained:
- pip install native_client/python/dist/deepspeech*.whl
- pip install -r requirements_eval_tflite.txt
Then run with a TF Lite model, alphabet, LM/trie and a CSV test file
Then run with a TF Lite model, LM/trie and a CSV test file
'''
BEAM_WIDTH = 500
LM_ALPHA = 0.75
LM_BETA = 1.85
def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask):
def tflite_worker(model, lm, trie, queue_in, queue_out, gpu_mask):
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask)
ds = Model(model, alphabet, BEAM_WIDTH)
ds = Model(model, BEAM_WIDTH)
ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
while True:
@ -58,8 +58,6 @@ def main():
parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
parser.add_argument('--model', required=True,
help='Path to the model (protocol buffer binary file)')
parser.add_argument('--alphabet', required=True,
help='Path to the configuration file specifying the alphabet used by the network')
parser.add_argument('--lm', required=True,
help='Path to the language model binary file')
parser.add_argument('--trie', required=True,
@ -78,7 +76,7 @@ def main():
processes = []
for i in range(args.proc):
worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
worker_process.start() # Launch reader() as a separate python process
processes.append(worker_process)

View File

@ -22,14 +22,12 @@ Here is an example for a local audio file:
```bash
node ./index.js --audio <AUDIO_FILE> \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
Here is an example for a remote RTMP-Stream:
```bash
node ./index.js --audio rtmp://<IP>:1935/live/teststream \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
## Examples
@ -39,21 +37,18 @@ node ./index.js --audio $HOME/audio/2830-3980-0043.wav \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
```bash
node ./index.js --audio $HOME/audio/4507-16021-0012.wav \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
```bash
node ./index.js --audio $HOME/audio/8455-210777-0068.wav \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
Real time streaming inference in combination with a RTMP server.
```bash
@ -61,7 +56,6 @@ node ./index.js --audio rtmp://<HOST>/<APP>/<KEY> \
--lm $HOME/models/lm.binary \
--trie $HOME/models/trie \
--model $HOME/models/output_graph.pbmm \
--alphabet $HOME/models/alphabet.txt
```
## Notes

View File

@ -32,7 +32,6 @@ VersionAction.prototype.call = function(parser) {
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'});
@ -45,7 +44,7 @@ function totalTime(hrtimeValue) {
console.error('Loading model from file %s', args['model']);
const model_load_start = process.hrtime();
let model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
let model = new Ds.Model(args['model'], BEAM_WIDTH);
const model_load_end = process.hrtime(model_load_start);
console.error('Loaded model in %ds.', totalTime(model_load_end));

View File

@ -13,18 +13,15 @@ pushd ${THIS}
node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
--lm $HOME/DeepSpeech/models/lm.binary \
--trie $HOME/DeepSpeech/models/trie \
--model $HOME/DeepSpeech/models/output_graph.pbmm \
--alphabet $HOME/DeepSpeech/models/alphabet.txt
--model $HOME/DeepSpeech/models/output_graph.pbmm
node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
--lm $HOME/DeepSpeech/models/lm.binary \
--trie $HOME/DeepSpeech/models/trie \
--model $HOME/DeepSpeech/models/output_graph.pbmm \
--alphabet $HOME/DeepSpeech/models/alphabet.txt
--model $HOME/DeepSpeech/models/output_graph.pbmm
node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
--lm $HOME/DeepSpeech/models/lm.binary \
--trie $HOME/DeepSpeech/models/trie \
--model $HOME/DeepSpeech/models/output_graph.pbmm \
--alphabet $HOME/DeepSpeech/models/alphabet.txt
--model $HOME/DeepSpeech/models/output_graph.pbmm
popd

View File

@ -29,7 +29,7 @@ Usage
.. code-block::
usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner]
[-w SAVEWAV] -m MODEL [-a ALPHABET] [-l LM]
[-w SAVEWAV] -m MODEL [-l LM]
[-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT]
[-la LM_ALPHA] [-lb LM_BETA]
[-bw BEAM_WIDTH]
@ -49,9 +49,6 @@ Usage
Path to the model (protocol buffer binary file, or
entire directory containing all standard-named files
for model)
-a ALPHABET, --alphabet ALPHABET
Path to the configuration file specifying the alphabet
used by the network. Default: alphabet.txt
-l LM, --lm LM Path to the language model binary file. Default:
lm.binary
-t TRIE, --trie TRIE Path to the language model trie file created with

View File

@ -156,14 +156,12 @@ def main(ARGS):
if os.path.isdir(ARGS.model):
model_dir = ARGS.model
ARGS.model = os.path.join(model_dir, 'output_graph.pb')
ARGS.alphabet = os.path.join(model_dir, ARGS.alphabet if ARGS.alphabet else 'alphabet.txt')
ARGS.lm = os.path.join(model_dir, ARGS.lm)
ARGS.trie = os.path.join(model_dir, ARGS.trie)
print('Initializing model...')
logging.info("ARGS.model: %s", ARGS.model)
logging.info("ARGS.alphabet: %s", ARGS.alphabet)
model = deepspeech.Model(ARGS.model, ARGS.alphabet, ARGS.beam_width)
model = deepspeech.Model(ARGS.model, ARGS.beam_width)
if ARGS.lm and ARGS.trie:
logging.info("ARGS.lm: %s", ARGS.lm)
logging.info("ARGS.trie: %s", ARGS.trie)
@ -219,8 +217,6 @@ if __name__ == '__main__':
parser.add_argument('-m', '--model', required=True,
help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
parser.add_argument('-a', '--alphabet', default='alphabet.txt',
help="Path to the configuration file specifying the alphabet used by the network. Default: alphabet.txt")
parser.add_argument('-l', '--lm', default='lm.binary',
help="Path to the language model binary file. Default: lm.binary")
parser.add_argument('-t', '--trie', default='trie',

View File

@ -14,7 +14,6 @@ pushd ${THIS}
python mic_vad_streaming.py \
--model $HOME/DeepSpeech/models/output_graph.pbmm \
--alphabet $HOME/DeepSpeech/models/alphabet.txt \
--lm $HOME/DeepSpeech/models/lm.binary \
--trie $HOME/DeepSpeech/models/trie \
--file $HOME/DeepSpeech/audio/2830-3980-0043.wav

View File

@ -77,7 +77,7 @@ namespace DeepSpeechWPF
{
try
{
_sttClient.CreateModel("output_graph.pbmm", "alphabet.txt", BEAM_WIDTH);
_sttClient.CreateModel("output_graph.pbmm", BEAM_WIDTH);
Dispatcher.Invoke(() => { EnableControls(); });
}
catch (Exception ex)

View File

@ -11,7 +11,6 @@ Edit references to models path if necessary:
```
let modelPath = './models/output_graph.pbmm';
let alphabetPath = './models/alphabet.txt';
let lmPath = './models/lm.binary';
let triePath = './models/trie';
```

View File

@ -7,9 +7,8 @@ const Wav = require('node-wav');
const BEAM_WIDTH = 1024;
let modelPath = './models/output_graph.pbmm';
let alphabetPath = './models/alphabet.txt';
let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH);
let model = new DeepSpeech.Model(modelPath, BEAM_WIDTH);
let desiredSampleRate = model.sampleRate();

View File

@ -18,7 +18,7 @@ def main(args):
parser.add_argument('--audio', required=False,
help='Path to the audio file to run (WAV format)')
parser.add_argument('--model', required=True,
help='Path to directory that contains all model files (output_graph, lm, trie and alphabet)')
help='Path to directory that contains all model files (output_graph, lm and trie)')
parser.add_argument('--stream', required=False, action='store_true',
help='To use deepspeech streaming interface')
args = parser.parse_args()
@ -34,10 +34,10 @@ def main(args):
dirName = os.path.expanduser(args.model)
# Resolve all the paths of model files
output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName)
output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
# Load output_graph, alpahbet, lm and trie
model_retval = wavTranscriber.load_model(output_graph, alphabet, lm, trie)
model_retval = wavTranscriber.load_model(output_graph, lm, trie)
if args.audio is not None:
title_names = ['Filename', 'Duration(s)', 'Inference Time(s)', 'Model Load Time(s)', 'LM Load Time(s)']

View File

@ -109,7 +109,7 @@ class App(QMainWindow):
self.microphone = QRadioButton("Microphone")
self.fileUpload = QRadioButton("File Upload")
self.browseBox = QLineEdit(self, placeholderText="Wave File, Mono @ 16 kHz, 16bit Little-Endian")
self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, alphabet, lm & trie")
self.modelsBox = QLineEdit(self, placeholderText="Directory path for output_graph, lm & trie")
self.textboxTranscript = QPlainTextEdit(self, placeholderText="Transcription")
self.browseButton = QPushButton('Browse', self)
self.browseButton.setToolTip('Select a wav file')
@ -238,9 +238,9 @@ class App(QMainWindow):
def modelResult(self, dirName):
# Fetch and Resolve all the paths of model files
output_graph, alphabet, lm, trie = wavTranscriber.resolve_models(dirName)
output_graph, lm, trie = wavTranscriber.resolve_models(dirName)
# Load output_graph, alpahbet, lm and trie
self.model = wavTranscriber.load_model(output_graph, alphabet, lm, trie)
self.model = wavTranscriber.load_model(output_graph, lm, trie)
def modelFinish(self):
# self.timer.stop()

View File

@ -8,20 +8,19 @@ from timeit import default_timer as timer
'''
Load the pre-trained model into the memory
@param models: Output Grapgh Protocol Buffer file
@param alphabet: Alphabet.txt file
@param lm: Language model file
@param trie: Trie file
@Retval
Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
'''
def load_model(models, alphabet, lm, trie):
def load_model(models, lm, trie):
BEAM_WIDTH = 500
LM_ALPHA = 0.75
LM_BETA = 1.85
model_load_start = timer()
ds = Model(models, alphabet, BEAM_WIDTH)
ds = Model(models, BEAM_WIDTH)
model_load_end = timer() - model_load_start
logging.debug("Loaded model in %0.3fs." % (model_load_end))
@ -61,21 +60,18 @@ Resolve directory path for the models and fetch each of them.
@param dirName: Path to the directory containing pre-trained models
@Retval:
Retunns a tuple containing each of the model files (pb, alphabet, lm and trie)
Retunns a tuple containing each of the model files (pb, lm and trie)
'''
def resolve_models(dirName):
pb = glob.glob(dirName + "/*.pb")[0]
logging.debug("Found Model: %s" % pb)
alphabet = glob.glob(dirName + "/alphabet.txt")[0]
logging.debug("Found Alphabet: %s" % alphabet)
lm = glob.glob(dirName + "/lm.binary")[0]
trie = glob.glob(dirName + "/trie")[0]
logging.debug("Found Language Model: %s" % lm)
logging.debug("Found Trie: %s" % trie)
return pb, alphabet, lm, trie
return pb, lm, trie
'''
Generate VAD segments. Filters out non-voiced audio frames.

View File

@ -12,8 +12,6 @@
char* model = NULL;
char* alphabet = NULL;
char* lm = NULL;
char* trie = NULL;
@ -41,12 +39,11 @@ int stream_size = 0;
void PrintHelp(const char* bin)
{
std::cout <<
"Usage: " << bin << " --model MODEL --alphabet ALPHABET [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n"
"Usage: " << bin << " --model MODEL [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n"
"\n"
"Running DeepSpeech inference.\n"
"\n"
" --model MODEL Path to the model (protocol buffer binary file)\n"
" --alphabet ALPHABET Path to the configuration file specifying the alphabet used by the network\n"
" --lm LM Path to the language model binary file\n"
" --trie TRIE Path to the language model trie file created with native_client/generate_trie\n"
" --audio AUDIO Path to the audio file to run (WAV format)\n"
@ -68,7 +65,6 @@ bool ProcessArgs(int argc, char** argv)
const char* const short_opts = "m:a:l:r:w:c:d:b:tehv";
const option long_opts[] = {
{"model", required_argument, nullptr, 'm'},
{"alphabet", required_argument, nullptr, 'a'},
{"lm", required_argument, nullptr, 'l'},
{"trie", required_argument, nullptr, 'r'},
{"audio", required_argument, nullptr, 'w'},
@ -98,10 +94,6 @@ bool ProcessArgs(int argc, char** argv)
model = optarg;
break;
case 'a':
alphabet = optarg;
break;
case 'l':
lm = optarg;
break;
@ -163,7 +155,7 @@ bool ProcessArgs(int argc, char** argv)
return false;
}
if (!model || !alphabet || !audio) {
if (!model || !audio) {
PrintHelp(argv[0]);
return false;
}

View File

@ -29,36 +29,26 @@ namespace DeepSpeechClient
/// Create an object providing an interface to a trained DeepSpeech model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
public unsafe void CreateModel(string aModelPath,
string aAlphabetConfigPath, uint aBeamWidth)
uint aBeamWidth)
{
string exceptionMessage = null;
if (string.IsNullOrWhiteSpace(aModelPath))
{
exceptionMessage = "Model path cannot be empty.";
}
if (string.IsNullOrWhiteSpace(aAlphabetConfigPath))
{
exceptionMessage = "Alphabet path cannot be empty.";
}
if (!File.Exists(aModelPath))
{
exceptionMessage = $"Cannot find the model file: {aModelPath}";
}
if (!File.Exists(aAlphabetConfigPath))
{
exceptionMessage = $"Cannot find the alphabet file: {aAlphabetConfigPath}";
}
if (exceptionMessage != null)
{
throw new FileNotFoundException(exceptionMessage);
}
var resultCode = NativeImp.DS_CreateModel(aModelPath,
aAlphabetConfigPath,
aBeamWidth,
ref _modelStatePP);
EvaluateResultCode(resultCode);
@ -86,7 +76,7 @@ namespace DeepSpeechClient
case ErrorCodes.DS_ERR_NO_MODEL:
throw new ArgumentException("Missing model information.");
case ErrorCodes.DS_ERR_INVALID_ALPHABET:
throw new ArgumentException("Invalid alphabet file or invalid alphabet size.");
throw new ArgumentException("Invalid alphabet embedded in model. (Data corruption?)");
case ErrorCodes.DS_ERR_INVALID_SHAPE:
throw new ArgumentException("Invalid model shape.");
case ErrorCodes.DS_ERR_INVALID_LM:

View File

@ -17,11 +17,9 @@ namespace DeepSpeechClient.Interfaces
/// Create an object providing an interface to a trained DeepSpeech model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
unsafe void CreateModel(string aModelPath,
string aAlphabetConfigPath,
uint aBeamWidth);
/// <summary>

View File

@ -16,7 +16,6 @@ namespace DeepSpeechClient
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
string aAlphabetConfigPath,
uint aBeamWidth,
ref IntPtr** pint);

View File

@ -35,7 +35,6 @@ namespace CSharpExamples
static void Main(string[] args)
{
string model = null;
string alphabet = null;
string lm = null;
string trie = null;
string audio = null;
@ -43,7 +42,6 @@ namespace CSharpExamples
if (args.Length > 0)
{
model = GetArgument(args, "--model");
alphabet = GetArgument(args, "--alphabet");
lm = GetArgument(args, "--lm");
trie = GetArgument(args, "--trie");
audio = GetArgument(args, "--audio");
@ -64,7 +62,6 @@ namespace CSharpExamples
stopwatch.Start();
sttClient.CreateModel(
model ?? "output_graph.pbmm",
alphabet ?? "alphabet.txt",
BEAM_WIDTH);
stopwatch.Stop();

View File

@ -51,7 +51,6 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
* ``output_graph.tflite`` which is the TF Lite model
* ``alphabet.txt``
* ``lm.binary`` and ``trie`` files, if you want to use the language model ; please
be aware that too big language model will make the device run out of memory

View File

@ -23,7 +23,6 @@ public class DeepSpeechActivity extends AppCompatActivity {
DeepSpeechModel _m = null;
EditText _tfliteModel;
EditText _alphabet;
EditText _audioFile;
TextView _decodedString;
@ -49,10 +48,10 @@ public class DeepSpeechActivity extends AppCompatActivity {
return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24);
}
private void newModel(String tfliteModel, String alphabet) {
private void newModel(String tfliteModel) {
this._tfliteStatus.setText("Creating model");
if (this._m == null) {
this._m = new DeepSpeechModel(tfliteModel, alphabet, BEAM_WIDTH);
this._m = new DeepSpeechModel(tfliteModel, BEAM_WIDTH);
}
}
@ -61,7 +60,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
this._startInference.setEnabled(false);
this.newModel(this._tfliteModel.getText().toString(), this._alphabet.getText().toString());
this.newModel(this._tfliteModel.getText().toString());
this._tfliteStatus.setText("Extracting audio features ...");
@ -128,13 +127,11 @@ public class DeepSpeechActivity extends AppCompatActivity {
this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus);
this._tfliteModel = (EditText) findViewById(R.id.tfliteModel);
this._alphabet = (EditText) findViewById(R.id.alphabet);
this._audioFile = (EditText) findViewById(R.id.audioFile);
this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite");
this._tfliteStatus.setText("Ready, waiting ...");
this._alphabet.setText("/sdcard/deepspeech/alphabet.txt");
this._audioFile.setText("/sdcard/deepspeech/audio.wav");
this._startInference = (Button) findViewById(R.id.btnStartInference);

View File

@ -97,25 +97,6 @@
android:inputType="text" />
</LinearLayout>
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal">
<TextView
android:id="@+id/lblAlphabet"
android:layout_width="263dp"
android:layout_height="wrap_content"
android:layout_weight="1"
android:text="Alphabet" />
<EditText
android:id="@+id/alphabet"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:inputType="text" />
</LinearLayout>
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"

View File

@ -30,7 +30,6 @@ import java.nio.ByteBuffer;
public class BasicTest {
public static final String modelFile = "/data/local/tmp/test/output_graph.tflite";
public static final String alphabetFile = "/data/local/tmp/test/alphabet.txt";
public static final String lmFile = "/data/local/tmp/test/lm.binary";
public static final String trieFile = "/data/local/tmp/test/trie";
public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav";
@ -64,7 +63,7 @@ public class BasicTest {
@Test
public void loadDeepSpeech_basic() {
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
m.freeModel();
}
@ -121,7 +120,7 @@ public class BasicTest {
@Test
public void loadDeepSpeech_stt_noLM() {
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
String decoded = doSTT(m, false);
assertEquals("she had your dark suit in greasy wash water all year", decoded);
@ -130,7 +129,7 @@ public class BasicTest {
@Test
public void loadDeepSpeech_stt_withLM() {
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
String decoded = doSTT(m, false);
@ -140,7 +139,7 @@ public class BasicTest {
@Test
public void loadDeepSpeech_sttWithMetadata_noLM() {
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
String decoded = doSTT(m, true);
assertEquals("she had your dark suit in greasy wash water all year", decoded);
@ -149,7 +148,7 @@ public class BasicTest {
@Test
public void loadDeepSpeech_sttWithMetadata_withLM() {
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH);
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
String decoded = doSTT(m, true);

View File

@ -20,15 +20,13 @@ public class DeepSpeechModel {
* @constructor
*
* @param modelPath The path to the frozen model graph.
* @param alphabetPath The path to the configuration file specifying
* the alphabet used by the network. See alphabet.h.
* @param beam_width The beam width used by the decoder. A larger beam
* width generates better results at the cost of decoding
* time.
*/
public DeepSpeechModel(String modelPath, String alphabetPath, int beam_width) {
public DeepSpeechModel(String modelPath, int beam_width) {
this._mspp = impl.new_modelstatep();
impl.CreateModel(modelPath, alphabetPath, beam_width, this._mspp);
impl.CreateModel(modelPath, beam_width, this._mspp);
this._msp = impl.modelstatep_value(this._mspp);
}

View File

@ -17,7 +17,7 @@ Once everything is installed, you can then use the `deepspeech` binary to do spe
pip3 install deepspeech
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
```
@ -27,7 +27,7 @@ Alternatively, quicker inference can be performed using a supported NVIDIA GPU o
pip3 install deepspeech-gpu
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
```
@ -223,7 +223,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
```bash
deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio my*audio_file.wav
```
@ -290,7 +290,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
```bash
./deepspeech --model models/output*graph.pbmm --alphabet models/alphabet.txt --lm models/lm.binary --trie models/trie --audio audio*input.wav
./deepspeech --model models/output*graph.pbmm --lm models/lm.binary --trie models/trie --audio audio*input.wav
```

View File

@ -29,7 +29,6 @@ VersionAction.prototype.call = function(parser) {
var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
@ -55,7 +54,7 @@ function metadataToString(metadata) {
console.error('Loading model from file %s', args['model']);
const model_load_start = process.hrtime();
var model = new Ds.Model(args['model'], args['alphabet'], args['beam_width']);
var model = new Ds.Model(args['model'], args['beam_width']);
const model_load_end = process.hrtime(model_load_start);
console.error('Loaded model in %ds.', totalTime(model_load_end));

View File

@ -25,7 +25,6 @@ if (process.platform === 'win32') {
* An object providing an interface to a trained DeepSpeech model.
*
* @param {string} aModelPath The path to the frozen model graph.
* @param {string} aAlphabetConfigPath The path to the configuration file specifying the alphabet used by the network. See alphabet.h.
* @param {number} aBeamWidth The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
*
* @throws on error

View File

@ -30,9 +30,6 @@ class Model(object):
:param aModelPath: Path to model file to load
:type aModelPath: str
:param aAlphabetConfigPath: Path to alphabet file to load
:type aAlphabetConfigPath: str
:param aBeamWidth: Decoder beam width
:type aBeamWidth: int
"""

View File

@ -46,8 +46,6 @@ def main():
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
parser.add_argument('--model', required=True,
help='Path to the model (protocol buffer binary file)')
parser.add_argument('--alphabet', required=True,
help='Path to the configuration file specifying the alphabet used by the network')
parser.add_argument('--lm', nargs='?',
help='Path to the language model binary file')
parser.add_argument('--trie', nargs='?',
@ -68,7 +66,7 @@ def main():
print('Loading model from file {}'.format(args.model), file=sys.stderr)
model_load_start = timer()
ds = Model(args.model, args.alphabet, args.beam_width)
ds = Model(args.model, args.beam_width)
model_load_end = timer() - model_load_start
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)

View File

@ -25,8 +25,6 @@ def main():
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
parser.add_argument('--model', required=True,
help='Path to the model (protocol buffer binary file)')
parser.add_argument('--alphabet', required=True,
help='Path to the configuration file specifying the alphabet used by the network')
parser.add_argument('--lm', nargs='?',
help='Path to the language model binary file')
parser.add_argument('--trie', nargs='?',
@ -37,7 +35,7 @@ def main():
help='Second audio file to use in interleaved streams')
args = parser.parse_args()
ds = Model(args.model, args.alphabet, BEAM_WIDTH)
ds = Model(args.model, BEAM_WIDTH)
if args.lm and args.trie:
ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA)

View File

@ -30,7 +30,7 @@ then:
image: ${build.docker_image}
env:
DEEPSPEECH_MODEL: "https://github.com/lissyx/DeepSpeech/releases/download/test-model-0.6.0a10/models.tar.gz"
DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.11/models.tar.gz"
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
PIP_DEFAULT_TIMEOUT: "60"

View File

@ -21,7 +21,6 @@ exec_benchmark()
--dir /tmp/bench-ds/ \
--models ${model_file} \
--wav /tmp/LDC93S1.wav \
--alphabet /tmp/alphabet.txt \
--lm_binary /tmp/lm.binary \
--trie /tmp/trie \
--csv ${csv}
@ -30,7 +29,6 @@ exec_benchmark()
--dir /tmp/bench-ds-nolm/ \
--models ${model_file} \
--wav /tmp/LDC93S1.wav \
--alphabet /tmp/alphabet.txt \
--csv ${csv_nolm}
python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_plotter.py \

View File

@ -309,12 +309,12 @@ check_runtime_electronjs()
run_tflite_basic_inference_tests()
{
set +e
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --alphabet ${DATA_TMP_DIR}/alphabet.txt --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
}
@ -322,22 +322,22 @@ run_tflite_basic_inference_tests()
run_netframework_inference_tests()
{
set +e
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended yes 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
}
@ -345,22 +345,22 @@ run_netframework_inference_tests()
run_electronjs_inference_tests()
{
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?"
set +e
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
set -e
assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?"
}
@ -368,25 +368,25 @@ run_electronjs_inference_tests()
run_basic_inference_tests()
{
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --extended 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status"
set +e
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status"
@ -397,24 +397,24 @@ run_all_inference_tests()
run_basic_inference_tests
set +e
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status"
set +e
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status"
set +e
phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
set -e
assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
set +e
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
set -e
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
}
@ -424,7 +424,6 @@ run_prod_concurrent_stream_tests()
set +e
output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
--alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt \
--lm ${TASKCLUSTER_TMP_DIR}/lm.binary \
--trie ${TASKCLUSTER_TMP_DIR}/trie \
--audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
@ -442,25 +441,25 @@ run_prod_concurrent_stream_tests()
run_prod_inference_tests()
{
set +e
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
set +e
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status"
set +e
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?
set -e
assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status"
set +e
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
set -e
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
}
@ -468,13 +467,13 @@ run_prod_inference_tests()
run_multi_inference_tests()
{
set +e -o pipefail
multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
multi_phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
status=$?
set -e +o pipefail
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status"
set +e -o pipefail
multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%')
status=$?
set -e +o pipefail
assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status"
@ -483,7 +482,7 @@ run_multi_inference_tests()
run_cpp_only_inference_tests()
{
set +e
phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
status=$?
set -e
assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status"
@ -566,7 +565,6 @@ download_data()
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
@ -579,7 +577,7 @@ download_material()
download_native_client_files "${target_dir}"
download_data
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav
}
download_benchmark_model()
@ -1595,7 +1593,6 @@ android_setup_ndk_data()
adb push \
${TASKCLUSTER_TMP_DIR}/${model_name} \
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
${TASKCLUSTER_TMP_DIR}/alphabet.txt \
${ANDROID_TMP_DIR}/ds/
}
@ -1606,7 +1603,6 @@ android_setup_apk_data()
adb push \
${TASKCLUSTER_TMP_DIR}/${model_name} \
${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \
${TASKCLUSTER_TMP_DIR}/alphabet.txt \
${TASKCLUSTER_TMP_DIR}/lm.binary \
${TASKCLUSTER_TMP_DIR}/trie \
${ANDROID_TMP_DIR}/test/

View File

@ -133,7 +133,6 @@ def create_flags():
# Decoder
f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
f.DEFINE_alias('alphabet', 'alphabet_config_path')
f.DEFINE_string('lm_binary_path', 'data/lm/lm.binary', 'path to the language model binary file created with KenLM')
f.DEFINE_alias('lm', 'lm_binary_path')
f.DEFINE_string('lm_trie_path', 'data/lm/trie', 'path to the language model trie file created with native_client/generate_trie')