diff --git a/data/smoke_test/LDC93S1_pcms16le_1_8000.wav b/data/smoke_test/LDC93S1_pcms16le_1_8000.wav new file mode 100644 index 00000000..86cf5b49 Binary files /dev/null and b/data/smoke_test/LDC93S1_pcms16le_1_8000.wav differ diff --git a/data/smoke_test/LDC93S1_pcms16le_2_44100.wav b/data/smoke_test/LDC93S1_pcms16le_2_44100.wav new file mode 100644 index 00000000..1774af44 Binary files /dev/null and b/data/smoke_test/LDC93S1_pcms16le_2_44100.wav differ diff --git a/native_client/client.cc b/native_client/client.cc index 2eac52f0..1b079cc1 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -89,17 +89,17 @@ main(int argc, char **argv) sox_format_t* input = sox_open_read(argv[2], NULL, NULL, NULL); assert(input); - int sampleRate = (int)input->signal.rate; - // Resample/reformat the audio so we can pass it through the MFCC functions sox_signalinfo_t target_signal = { - SOX_UNSPEC, // Rate + 16000, // Rate 1, // Channels 16, // Precision SOX_UNSPEC, // Length NULL // Effects headroom multiplier }; + sox_signalinfo_t interm_signal; + sox_encodinginfo_t target_encoding = { SOX_ENCODING_SIGN2, // Sample format 16, // Bits per sample @@ -129,28 +129,42 @@ main(int argc, char **argv) assert(output); + int sampleRate = (int)output->signal.rate; + + if ((int)input->signal.rate < 16000) { + fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate); + } + // Setup the effects chain to decode/resample char* sox_args[10]; sox_effects_chain_t* chain = sox_create_effects_chain(&input->encoding, &output->encoding); + interm_signal = input->signal; + sox_effect_t* e = sox_create_effect(sox_find_effect("input")); sox_args[0] = (char*)input; assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS); - assert(sox_add_effect(chain, e, &input->signal, &input->signal) == + assert(sox_add_effect(chain, e, &interm_signal, &input->signal) == + SOX_SUCCESS); + free(e); + + e = sox_create_effect(sox_find_effect("rate")); + assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS); + assert(sox_add_effect(chain, e, &interm_signal, &output->signal) == SOX_SUCCESS); free(e); e = sox_create_effect(sox_find_effect("channels")); assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS); - assert(sox_add_effect(chain, e, &input->signal, &output->signal) == + assert(sox_add_effect(chain, e, &interm_signal, &output->signal) == SOX_SUCCESS); free(e); e = sox_create_effect(sox_find_effect("output")); sox_args[0] = (char*)output; assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS); - assert(sox_add_effect(chain, e, &input->signal, &output->signal) == + assert(sox_add_effect(chain, e, &interm_signal, &output->signal) == SOX_SUCCESS); free(e); diff --git a/native_client/javascript/client.js b/native_client/javascript/client.js index 57525c15..4b2cc667 100644 --- a/native_client/javascript/client.js +++ b/native_client/javascript/client.js @@ -5,6 +5,8 @@ const Sox = require('sox-stream'); const Ds = require('./index.js'); const ArgumentParser = require('argparse').ArgumentParser; const MemoryStream = require('memory-stream'); +const Wav = require('node-wav'); +const Duplex = require('stream').Duplex; // These constants control the beam search decoder @@ -44,10 +46,25 @@ function totalTime(hrtimeValue) { return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); } +const buffer = Fs.readFileSync(args['audio']); +const result = Wav.decode(buffer); + +if (result.sampleRate < 16000) { + console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.'); +} + +function bufferToStream(buffer) { + var stream = new Duplex(); + stream.push(buffer); + stream.push(null); + return stream; +} + var audioStream = new MemoryStream(); -Fs.createReadStream(args['audio']). +bufferToStream(buffer). pipe(Sox({ output: { bits: 16, rate: 16000, channels: 1, type: 'raw' } })). pipe(audioStream); + audioStream.on('finish', () => { audioBuffer = audioStream.toBuffer(); diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index e322e471..8ed9a32f 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -22,7 +22,8 @@ "node-pre-gyp": "0.6.x", "argparse": "1.0.x", "sox-stream": "2.0.x", - "memory-stream": "0.0.3" + "memory-stream": "0.0.3", + "node-wav": "0.0.2" }, "bundledDependencies":["node-pre-gyp"], "devDependencies": { diff --git a/native_client/python/client.py b/native_client/python/client.py index a6ba5a6c..4a5739cf 100644 --- a/native_client/python/client.py +++ b/native_client/python/client.py @@ -37,20 +37,19 @@ N_FEATURES = 26 N_CONTEXT = 9 def convert_samplerate(audio_path): - sox_cmd = 'sox --norm {} -b 16 -t wav - channels 1 rate 16000'.format(audio_path) + sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate 16000 - '.format(audio_path) try: p = subprocess.Popen(sox_cmd.split(), stderr=subprocess.PIPE, stdout=subprocess.PIPE) output, err = p.communicate() if p.returncode: - raise RuntimeError('SoX returned non-zero status') + raise RuntimeError('SoX returned non-zero status: {}'.format(err)) except OSError as e: - raise OSError('SoX not found, use 16kHz files or install it') + raise OSError('SoX not found, use 16kHz files or install it: ', e) - # we already know the header information, get only the data from output - audio = np.fromstring(output.split('data')[1], dtype=np.int16) + audio = np.fromstring(output, dtype=np.int16) return 16000, audio def main(): @@ -83,6 +82,8 @@ def main(): fs, audio = wav.read(args.audio) if fs != 16000: + if fs < 16000: + print('Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.' % (fs), file=sys.stderr) fs, audio = convert_samplerate(args.audio) audio_length = len(audio) * ( 1 / 16000) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 38eac318..1f42ffbb 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -1,6 +1,9 @@ python: packages: apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev' + brew: + setup: 'install_local_homebrew "python-ds-test" && install_pkg_local_homebrew "sox"' + env: 'export EXTRA_ENV="PATH=$TASKCLUSTER_TASK_DIR/python-ds-test.brew/bin/:$PATH"' nodejs: packages: apt: 'nodejs sox' diff --git a/taskcluster/test-linux-opt-base.tyml b/taskcluster/test-linux-opt-base.tyml index 3ffe7c7a..9e527460 100644 --- a/taskcluster/test-linux-opt-base.tyml +++ b/taskcluster/test-linux-opt-base.tyml @@ -53,7 +53,7 @@ then: extraSystemSetup: { $eval: strip(str(build.system_setup)) } installGitlfs: { $eval: strip(str(build.git_lfs.linux)) } in: > - apt-get -qq update && apt-get -qq -y install git pixz libsox2 wget && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install git pixz sox wget && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && echo -e "#!/bin/bash\nset -xe\n ${installGitlfs} && env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && diff --git a/taskcluster/test-python_27-darwin-amd64-opt.yml b/taskcluster/test-python_27-darwin-amd64-opt.yml index e6552acf..41f5402c 100644 --- a/taskcluster/test-python_27-darwin-amd64-opt.yml +++ b/taskcluster/test-python_27-darwin-amd64-opt.yml @@ -3,6 +3,9 @@ build: dependencies: - "darwin-amd64-cpu-opt" - "test-training_upstream-linux-amd64-py27-opt" + system_setup: + > + ${python.brew.setup} && ${python.brew.env} args: tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 2.7.13" metadata: diff --git a/taskcluster/test-python_34-darwin-amd64-opt.yml b/taskcluster/test-python_34-darwin-amd64-opt.yml index f767149d..8d17cfe3 100644 --- a/taskcluster/test-python_34-darwin-amd64-opt.yml +++ b/taskcluster/test-python_34-darwin-amd64-opt.yml @@ -3,6 +3,9 @@ build: dependencies: - "darwin-amd64-cpu-opt" - "test-training_upstream-linux-amd64-py27-opt" + system_setup: + > + ${python.brew.setup} && ${python.brew.env} args: tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.4.6" metadata: diff --git a/taskcluster/test-python_35-darwin-amd64-opt.yml b/taskcluster/test-python_35-darwin-amd64-opt.yml index 108c71ad..61caa452 100644 --- a/taskcluster/test-python_35-darwin-amd64-opt.yml +++ b/taskcluster/test-python_35-darwin-amd64-opt.yml @@ -3,6 +3,9 @@ build: dependencies: - "darwin-amd64-cpu-opt" - "test-training_upstream-linux-amd64-py27-opt" + system_setup: + > + ${python.brew.setup} && ${python.brew.env} args: tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.5.3" metadata: diff --git a/taskcluster/test-python_36-darwin-amd64-opt.yml b/taskcluster/test-python_36-darwin-amd64-opt.yml index b3c10bf5..cd909f05 100644 --- a/taskcluster/test-python_36-darwin-amd64-opt.yml +++ b/taskcluster/test-python_36-darwin-amd64-opt.yml @@ -3,6 +3,9 @@ build: dependencies: - "darwin-amd64-cpu-opt" - "test-training_upstream-linux-amd64-py27-opt" + system_setup: + > + ${python.brew.setup} && ${python.brew.env} args: tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.6.2" metadata: diff --git a/tc-cpp-ds-tests-prod.sh b/tc-cpp-ds-tests-prod.sh index 4418903d..aa237b94 100644 --- a/tc-cpp-ds-tests-prod.sh +++ b/tc-cpp-ds-tests-prod.sh @@ -9,5 +9,6 @@ model_name=$(basename "${model_source}") download_material "${TASKCLUSTER_TMP_DIR}/ds" -phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) -assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" +export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH + +run_prod_inference_tests diff --git a/tc-cpp-ds-tests.sh b/tc-cpp-ds-tests.sh index 8fbbeaf1..1d63b5b9 100644 --- a/tc-cpp-ds-tests.sh +++ b/tc-cpp-ds-tests.sh @@ -8,15 +8,6 @@ aot_model=$1 download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}" -phrase_pbmodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) -assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" +export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH -phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) -assert_correct_ldc93s1 "${phrase_pbmodel_withlm}" - -if [ "${aot_model}" = "--aot" ]; then - phrase_somodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) - phrase_somodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) - - assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}" -fi; +run_all_inference_tests diff --git a/tc-node-tests-prod.sh b/tc-node-tests-prod.sh index c6f6effb..05c760b4 100644 --- a/tc-node-tests-prod.sh +++ b/tc-node-tests-prod.sh @@ -22,5 +22,4 @@ npm install ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz export PATH=$HOME/node_modules/.bin/:$PATH -phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) -assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" +run_prod_inference_tests diff --git a/tc-node-tests.sh b/tc-node-tests.sh index a85160ca..ae241bb5 100644 --- a/tc-node-tests.sh +++ b/tc-node-tests.sh @@ -27,15 +27,4 @@ else npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz fi -phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) -assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" - -phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) -assert_correct_ldc93s1 "${phrase_pbmodel_withlm}" - -if [ "${aot_model}" = "--aot" ]; then - phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) - phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) - - assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}" -fi +run_all_inference_tests diff --git a/tc-python-tests-prod.sh b/tc-python-tests-prod.sh index 180bbda4..e9a48696 100644 --- a/tc-python-tests-prod.sh +++ b/tc-python-tests-prod.sh @@ -46,8 +46,7 @@ deepspeech_pkg="deepspeech-0.1.1-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type} pip install --upgrade ${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg} | cat -phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) -assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" +run_prod_inference_tests deactivate pyenv uninstall --force ${PYENV_NAME} diff --git a/tc-python-tests.sh b/tc-python-tests.sh index c49f84af..8ffb082e 100644 --- a/tc-python-tests.sh +++ b/tc-python-tests.sh @@ -49,18 +49,7 @@ else fi pip install --upgrade ${deepspeech_pkg_url} | cat -phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) -assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" - -phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) -assert_correct_ldc93s1 "${phrase_pbmodel_withlm}" - -if [ "${aot_model}" = "--aot" ]; then - phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) - phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) - - assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}" -fi +run_all_inference_tests deactivate pyenv uninstall --force ${PYENV_NAME} diff --git a/tc-tests-utils.sh b/tc-tests-utils.sh index 1821ad09..23ff5ddf 100755 --- a/tc-tests-utils.sh +++ b/tc-tests-utils.sh @@ -48,6 +48,7 @@ SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-2.7.13 3.4.6 3.5.3 3.6.2} # > ../deepspeech_wrap.cxx:966:23: error: 'WeakCallbackData' in namespace 'v8' does not name a type SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.8.6 5.12.0 6.12.0 7.10.1 8.9.1 9.2.0} +# This verify exact inference result assert_correct_inference() { phrase=$1 @@ -75,6 +76,69 @@ assert_correct_inference() fi; } +# This verify that ${expected} is contained within ${phrase} +assert_working_inference() +{ + phrase=$1 + expected=$2 + + if [ -z "${phrase}" -o -z "${expected}" ]; then + echo "One or more empty strings:" + echo "phrase: <${phrase}>" + echo "expected: <${expected}>" + return 1 + fi; + + case "${phrase}" in + *${expected}*) + echo "Proper output has been produced:" + echo "${phrase}" + return 0 + ;; + + *) + echo "!! Non matching output !!" + echo "got: <${phrase}>" + echo "xxd:"; echo "${phrase}" | xxd + echo "-------------------" + echo "expected: <${expected}>" + echo "xxd:"; echo "${expected}" | xxd + return 1 + ;; + esac +} + +assert_shows_warning() +{ + stderr=$1 + expected=$2 + + if [ -z "${stderr}" -o -z "${expected}" ]; then + echo "One or more empty strings:" + echo "stderr: <${stderr}>" + echo "expected: <${expected}>" + return 1 + fi; + + case "${stderr}" in + *${expected}*) + echo "Proper output has been produced:" + echo "${stderr}" + return 0 + ;; + + *) + echo "!! Non matching output !!" + echo "got: <${stderr}>" + echo "xxd:"; echo "${stderr}" | xxd + echo "-------------------" + echo "expected: <${expected}>" + echo "xxd:"; echo "${expected}" | xxd + return 1 + ;; + esac +} + assert_correct_ldc93s1() { assert_correct_inference "$1" "she had your dark suit in greasy wash water all year" @@ -85,6 +149,11 @@ assert_correct_ldc93s1_prodmodel() assert_correct_inference "$1" "she had the duck so ingrecywachworallyear" } +assert_working_ldc93s1_prodmodel() +{ + assert_working_inference "$1" "she had the duck so" +} + assert_correct_ldc93s1_somodel() { somodel_nolm=$1 @@ -116,6 +185,61 @@ assert_correct_ldc93s1_somodel() fi } +assert_correct_warning_upsampling() +{ + assert_shows_warning "$1" "is lower than 16kHz. Up-sampling might produce erratic speech recognition" +} + +run_all_inference_tests() +{ + phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) + assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" + + phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) + assert_correct_ldc93s1 "${phrase_pbmodel_withlm}" + + phrase_pbmodel_nolm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) + assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" + + phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) + assert_correct_ldc93s1 "${phrase_pbmodel_withlm_stereo_44k}" + + phrase_pbmodel_nolm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null) + assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}" + + phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null) + assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" + + if [ "${aot_model}" = "--aot" ]; then + phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) + phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) + + assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}" + + phrase_somodel_nolm_stereo_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) + phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) + + assert_correct_ldc93s1_somodel "${phrase_somodel_nolm_stereo_44k}" "${phrase_somodel_withlm_stereo_44k}" + + phrase_somodel_nolm_mono_8k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null) + phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null) + + assert_correct_warning_upsampling "${phrase_somodel_nolm_mono_8k}" "${phrase_somodel_withlm_mono_8k}" + fi; +} + +run_prod_inference_tests() +{ + phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) + assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" + + phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) + assert_working_ldc93s1_prodmodel "${phrase_pbmodel_withlm_stereo_44k}" + + phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null) + assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" +} + generic_download_tarxz() { target_dir=$1 @@ -151,7 +275,7 @@ download_ctc_kenlm() download_data() { wget ${model_source} -O ${TASKCLUSTER_TMP_DIR}/${model_name} - wget https://catalog.ldc.upenn.edu/desc/addenda/LDC93S1.wav -O ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav + cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/ cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/lm.binary ${TASKCLUSTER_TMP_DIR}/lm.binary cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/trie ${TASKCLUSTER_TMP_DIR}/trie @@ -170,7 +294,7 @@ download_material() download_data - ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt + ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt } install_pyenv()