Add different samplerate samples

Fixes #1022
This commit is contained in:
Alexandre Lissy 2018-02-05 16:26:25 +01:00
parent 7b78894220
commit 72298b8f6d
19 changed files with 197 additions and 57 deletions

Binary file not shown.

Binary file not shown.

View File

@ -89,17 +89,17 @@ main(int argc, char **argv)
sox_format_t* input = sox_open_read(argv[2], NULL, NULL, NULL); sox_format_t* input = sox_open_read(argv[2], NULL, NULL, NULL);
assert(input); assert(input);
int sampleRate = (int)input->signal.rate;
// Resample/reformat the audio so we can pass it through the MFCC functions // Resample/reformat the audio so we can pass it through the MFCC functions
sox_signalinfo_t target_signal = { sox_signalinfo_t target_signal = {
SOX_UNSPEC, // Rate 16000, // Rate
1, // Channels 1, // Channels
16, // Precision 16, // Precision
SOX_UNSPEC, // Length SOX_UNSPEC, // Length
NULL // Effects headroom multiplier NULL // Effects headroom multiplier
}; };
sox_signalinfo_t interm_signal;
sox_encodinginfo_t target_encoding = { sox_encodinginfo_t target_encoding = {
SOX_ENCODING_SIGN2, // Sample format SOX_ENCODING_SIGN2, // Sample format
16, // Bits per sample 16, // Bits per sample
@ -129,28 +129,42 @@ main(int argc, char **argv)
assert(output); assert(output);
int sampleRate = (int)output->signal.rate;
if ((int)input->signal.rate < 16000) {
fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate);
}
// Setup the effects chain to decode/resample // Setup the effects chain to decode/resample
char* sox_args[10]; char* sox_args[10];
sox_effects_chain_t* chain = sox_effects_chain_t* chain =
sox_create_effects_chain(&input->encoding, &output->encoding); sox_create_effects_chain(&input->encoding, &output->encoding);
interm_signal = input->signal;
sox_effect_t* e = sox_create_effect(sox_find_effect("input")); sox_effect_t* e = sox_create_effect(sox_find_effect("input"));
sox_args[0] = (char*)input; sox_args[0] = (char*)input;
assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS); assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &input->signal, &input->signal) == assert(sox_add_effect(chain, e, &interm_signal, &input->signal) ==
SOX_SUCCESS);
free(e);
e = sox_create_effect(sox_find_effect("rate"));
assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
SOX_SUCCESS); SOX_SUCCESS);
free(e); free(e);
e = sox_create_effect(sox_find_effect("channels")); e = sox_create_effect(sox_find_effect("channels"));
assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS); assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &input->signal, &output->signal) == assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
SOX_SUCCESS); SOX_SUCCESS);
free(e); free(e);
e = sox_create_effect(sox_find_effect("output")); e = sox_create_effect(sox_find_effect("output"));
sox_args[0] = (char*)output; sox_args[0] = (char*)output;
assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS); assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &input->signal, &output->signal) == assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
SOX_SUCCESS); SOX_SUCCESS);
free(e); free(e);

View File

@ -5,6 +5,8 @@ const Sox = require('sox-stream');
const Ds = require('./index.js'); const Ds = require('./index.js');
const ArgumentParser = require('argparse').ArgumentParser; const ArgumentParser = require('argparse').ArgumentParser;
const MemoryStream = require('memory-stream'); const MemoryStream = require('memory-stream');
const Wav = require('node-wav');
const Duplex = require('stream').Duplex;
// These constants control the beam search decoder // These constants control the beam search decoder
@ -44,10 +46,25 @@ function totalTime(hrtimeValue) {
return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
} }
const buffer = Fs.readFileSync(args['audio']);
const result = Wav.decode(buffer);
if (result.sampleRate < 16000) {
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
}
function bufferToStream(buffer) {
var stream = new Duplex();
stream.push(buffer);
stream.push(null);
return stream;
}
var audioStream = new MemoryStream(); var audioStream = new MemoryStream();
Fs.createReadStream(args['audio']). bufferToStream(buffer).
pipe(Sox({ output: { bits: 16, rate: 16000, channels: 1, type: 'raw' } })). pipe(Sox({ output: { bits: 16, rate: 16000, channels: 1, type: 'raw' } })).
pipe(audioStream); pipe(audioStream);
audioStream.on('finish', () => { audioStream.on('finish', () => {
audioBuffer = audioStream.toBuffer(); audioBuffer = audioStream.toBuffer();

View File

@ -22,7 +22,8 @@
"node-pre-gyp": "0.6.x", "node-pre-gyp": "0.6.x",
"argparse": "1.0.x", "argparse": "1.0.x",
"sox-stream": "2.0.x", "sox-stream": "2.0.x",
"memory-stream": "0.0.3" "memory-stream": "0.0.3",
"node-wav": "0.0.2"
}, },
"bundledDependencies":["node-pre-gyp"], "bundledDependencies":["node-pre-gyp"],
"devDependencies": { "devDependencies": {

View File

@ -37,20 +37,19 @@ N_FEATURES = 26
N_CONTEXT = 9 N_CONTEXT = 9
def convert_samplerate(audio_path): def convert_samplerate(audio_path):
sox_cmd = 'sox --norm {} -b 16 -t wav - channels 1 rate 16000'.format(audio_path) sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate 16000 - '.format(audio_path)
try: try:
p = subprocess.Popen(sox_cmd.split(), p = subprocess.Popen(sox_cmd.split(),
stderr=subprocess.PIPE, stdout=subprocess.PIPE) stderr=subprocess.PIPE, stdout=subprocess.PIPE)
output, err = p.communicate() output, err = p.communicate()
if p.returncode: if p.returncode:
raise RuntimeError('SoX returned non-zero status') raise RuntimeError('SoX returned non-zero status: {}'.format(err))
except OSError as e: except OSError as e:
raise OSError('SoX not found, use 16kHz files or install it') raise OSError('SoX not found, use 16kHz files or install it: ', e)
# we already know the header information, get only the data from output audio = np.fromstring(output, dtype=np.int16)
audio = np.fromstring(output.split('data')[1], dtype=np.int16)
return 16000, audio return 16000, audio
def main(): def main():
@ -83,6 +82,8 @@ def main():
fs, audio = wav.read(args.audio) fs, audio = wav.read(args.audio)
if fs != 16000: if fs != 16000:
if fs < 16000:
print('Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.' % (fs), file=sys.stderr)
fs, audio = convert_samplerate(args.audio) fs, audio = convert_samplerate(args.audio)
audio_length = len(audio) * ( 1 / 16000) audio_length = len(audio) * ( 1 / 16000)

View File

@ -1,6 +1,9 @@
python: python:
packages: packages:
apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev' apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev'
brew:
setup: 'install_local_homebrew "python-ds-test" && install_pkg_local_homebrew "sox"'
env: 'export EXTRA_ENV="PATH=$TASKCLUSTER_TASK_DIR/python-ds-test.brew/bin/:$PATH"'
nodejs: nodejs:
packages: packages:
apt: 'nodejs sox' apt: 'nodejs sox'

View File

@ -53,7 +53,7 @@ then:
extraSystemSetup: { $eval: strip(str(build.system_setup)) } extraSystemSetup: { $eval: strip(str(build.system_setup)) }
installGitlfs: { $eval: strip(str(build.git_lfs.linux)) } installGitlfs: { $eval: strip(str(build.git_lfs.linux)) }
in: > in: >
apt-get -qq update && apt-get -qq -y install git pixz libsox2 wget && ${extraSystemSetup} && apt-get -qq update && apt-get -qq -y install git pixz sox wget && ${extraSystemSetup} &&
adduser --system --home ${system.homedir.linux} ${system.username} && adduser --system --home ${system.homedir.linux} ${system.username} &&
cd ${system.homedir.linux} && cd ${system.homedir.linux} &&
echo -e "#!/bin/bash\nset -xe\n ${installGitlfs} && env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && echo -e "#!/bin/bash\nset -xe\n ${installGitlfs} && env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&

View File

@ -3,6 +3,9 @@ build:
dependencies: dependencies:
- "darwin-amd64-cpu-opt" - "darwin-amd64-cpu-opt"
- "test-training_upstream-linux-amd64-py27-opt" - "test-training_upstream-linux-amd64-py27-opt"
system_setup:
>
${python.brew.setup} && ${python.brew.env}
args: args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 2.7.13" tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 2.7.13"
metadata: metadata:

View File

@ -3,6 +3,9 @@ build:
dependencies: dependencies:
- "darwin-amd64-cpu-opt" - "darwin-amd64-cpu-opt"
- "test-training_upstream-linux-amd64-py27-opt" - "test-training_upstream-linux-amd64-py27-opt"
system_setup:
>
${python.brew.setup} && ${python.brew.env}
args: args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.4.6" tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.4.6"
metadata: metadata:

View File

@ -3,6 +3,9 @@ build:
dependencies: dependencies:
- "darwin-amd64-cpu-opt" - "darwin-amd64-cpu-opt"
- "test-training_upstream-linux-amd64-py27-opt" - "test-training_upstream-linux-amd64-py27-opt"
system_setup:
>
${python.brew.setup} && ${python.brew.env}
args: args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.5.3" tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.5.3"
metadata: metadata:

View File

@ -3,6 +3,9 @@ build:
dependencies: dependencies:
- "darwin-amd64-cpu-opt" - "darwin-amd64-cpu-opt"
- "test-training_upstream-linux-amd64-py27-opt" - "test-training_upstream-linux-amd64-py27-opt"
system_setup:
>
${python.brew.setup} && ${python.brew.env}
args: args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.6.2" tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.6.2"
metadata: metadata:

View File

@ -9,5 +9,6 @@ model_name=$(basename "${model_source}")
download_material "${TASKCLUSTER_TMP_DIR}/ds" download_material "${TASKCLUSTER_TMP_DIR}/ds"
phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
run_prod_inference_tests

View File

@ -8,15 +8,6 @@ aot_model=$1
download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}" download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}"
phrase_pbmodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) run_all_inference_tests
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
if [ "${aot_model}" = "--aot" ]; then
phrase_somodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
phrase_somodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
fi;

View File

@ -22,5 +22,4 @@ npm install ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz
export PATH=$HOME/node_modules/.bin/:$PATH export PATH=$HOME/node_modules/.bin/:$PATH
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) run_prod_inference_tests
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"

View File

@ -27,15 +27,4 @@ else
npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz
fi fi
phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) run_all_inference_tests
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
if [ "${aot_model}" = "--aot" ]; then
phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
fi

View File

@ -46,8 +46,7 @@ deepspeech_pkg="deepspeech-0.1.1-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}
pip install --upgrade ${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg} | cat pip install --upgrade ${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg} | cat
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie) run_prod_inference_tests
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
deactivate deactivate
pyenv uninstall --force ${PYENV_NAME} pyenv uninstall --force ${PYENV_NAME}

View File

@ -49,18 +49,7 @@ else
fi fi
pip install --upgrade ${deepspeech_pkg_url} | cat pip install --upgrade ${deepspeech_pkg_url} | cat
phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt) run_all_inference_tests
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
if [ "${aot_model}" = "--aot" ]; then
phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
fi
deactivate deactivate
pyenv uninstall --force ${PYENV_NAME} pyenv uninstall --force ${PYENV_NAME}

View File

@ -48,6 +48,7 @@ SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-2.7.13 3.4.6 3.5.3 3.6.2}
# > ../deepspeech_wrap.cxx:966:23: error: 'WeakCallbackData' in namespace 'v8' does not name a type # > ../deepspeech_wrap.cxx:966:23: error: 'WeakCallbackData' in namespace 'v8' does not name a type
SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.8.6 5.12.0 6.12.0 7.10.1 8.9.1 9.2.0} SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.8.6 5.12.0 6.12.0 7.10.1 8.9.1 9.2.0}
# This verify exact inference result
assert_correct_inference() assert_correct_inference()
{ {
phrase=$1 phrase=$1
@ -75,6 +76,69 @@ assert_correct_inference()
fi; fi;
} }
# This verify that ${expected} is contained within ${phrase}
assert_working_inference()
{
phrase=$1
expected=$2
if [ -z "${phrase}" -o -z "${expected}" ]; then
echo "One or more empty strings:"
echo "phrase: <${phrase}>"
echo "expected: <${expected}>"
return 1
fi;
case "${phrase}" in
*${expected}*)
echo "Proper output has been produced:"
echo "${phrase}"
return 0
;;
*)
echo "!! Non matching output !!"
echo "got: <${phrase}>"
echo "xxd:"; echo "${phrase}" | xxd
echo "-------------------"
echo "expected: <${expected}>"
echo "xxd:"; echo "${expected}" | xxd
return 1
;;
esac
}
assert_shows_warning()
{
stderr=$1
expected=$2
if [ -z "${stderr}" -o -z "${expected}" ]; then
echo "One or more empty strings:"
echo "stderr: <${stderr}>"
echo "expected: <${expected}>"
return 1
fi;
case "${stderr}" in
*${expected}*)
echo "Proper output has been produced:"
echo "${stderr}"
return 0
;;
*)
echo "!! Non matching output !!"
echo "got: <${stderr}>"
echo "xxd:"; echo "${stderr}" | xxd
echo "-------------------"
echo "expected: <${expected}>"
echo "xxd:"; echo "${expected}" | xxd
return 1
;;
esac
}
assert_correct_ldc93s1() assert_correct_ldc93s1()
{ {
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year" assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
@ -85,6 +149,11 @@ assert_correct_ldc93s1_prodmodel()
assert_correct_inference "$1" "she had the duck so ingrecywachworallyear" assert_correct_inference "$1" "she had the duck so ingrecywachworallyear"
} }
assert_working_ldc93s1_prodmodel()
{
assert_working_inference "$1" "she had the duck so"
}
assert_correct_ldc93s1_somodel() assert_correct_ldc93s1_somodel()
{ {
somodel_nolm=$1 somodel_nolm=$1
@ -116,6 +185,61 @@ assert_correct_ldc93s1_somodel()
fi fi
} }
assert_correct_warning_upsampling()
{
assert_shows_warning "$1" "is lower than 16kHz. Up-sampling might produce erratic speech recognition"
}
run_all_inference_tests()
{
phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
phrase_pbmodel_nolm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}"
phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1 "${phrase_pbmodel_withlm_stereo_44k}"
phrase_pbmodel_nolm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null)
assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
if [ "${aot_model}" = "--aot" ]; then
phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
phrase_somodel_nolm_stereo_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm_stereo_44k}" "${phrase_somodel_withlm_stereo_44k}"
phrase_somodel_nolm_mono_8k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null)
phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
assert_correct_warning_upsampling "${phrase_somodel_nolm_mono_8k}" "${phrase_somodel_withlm_mono_8k}"
fi;
}
run_prod_inference_tests()
{
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
assert_working_ldc93s1_prodmodel "${phrase_pbmodel_withlm_stereo_44k}"
phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
}
generic_download_tarxz() generic_download_tarxz()
{ {
target_dir=$1 target_dir=$1
@ -151,7 +275,7 @@ download_ctc_kenlm()
download_data() download_data()
{ {
wget ${model_source} -O ${TASKCLUSTER_TMP_DIR}/${model_name} wget ${model_source} -O ${TASKCLUSTER_TMP_DIR}/${model_name}
wget https://catalog.ldc.upenn.edu/desc/addenda/LDC93S1.wav -O ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/lm.binary ${TASKCLUSTER_TMP_DIR}/lm.binary cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/lm.binary ${TASKCLUSTER_TMP_DIR}/lm.binary
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/trie ${TASKCLUSTER_TMP_DIR}/trie cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/trie ${TASKCLUSTER_TMP_DIR}/trie
@ -170,7 +294,7 @@ download_material()
download_data download_data
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
} }
install_pyenv() install_pyenv()