parent
7b78894220
commit
72298b8f6d
Binary file not shown.
Binary file not shown.
|
@ -89,17 +89,17 @@ main(int argc, char **argv)
|
|||
sox_format_t* input = sox_open_read(argv[2], NULL, NULL, NULL);
|
||||
assert(input);
|
||||
|
||||
int sampleRate = (int)input->signal.rate;
|
||||
|
||||
// Resample/reformat the audio so we can pass it through the MFCC functions
|
||||
sox_signalinfo_t target_signal = {
|
||||
SOX_UNSPEC, // Rate
|
||||
16000, // Rate
|
||||
1, // Channels
|
||||
16, // Precision
|
||||
SOX_UNSPEC, // Length
|
||||
NULL // Effects headroom multiplier
|
||||
};
|
||||
|
||||
sox_signalinfo_t interm_signal;
|
||||
|
||||
sox_encodinginfo_t target_encoding = {
|
||||
SOX_ENCODING_SIGN2, // Sample format
|
||||
16, // Bits per sample
|
||||
|
@ -129,28 +129,42 @@ main(int argc, char **argv)
|
|||
|
||||
assert(output);
|
||||
|
||||
int sampleRate = (int)output->signal.rate;
|
||||
|
||||
if ((int)input->signal.rate < 16000) {
|
||||
fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate);
|
||||
}
|
||||
|
||||
// Setup the effects chain to decode/resample
|
||||
char* sox_args[10];
|
||||
sox_effects_chain_t* chain =
|
||||
sox_create_effects_chain(&input->encoding, &output->encoding);
|
||||
|
||||
interm_signal = input->signal;
|
||||
|
||||
sox_effect_t* e = sox_create_effect(sox_find_effect("input"));
|
||||
sox_args[0] = (char*)input;
|
||||
assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS);
|
||||
assert(sox_add_effect(chain, e, &input->signal, &input->signal) ==
|
||||
assert(sox_add_effect(chain, e, &interm_signal, &input->signal) ==
|
||||
SOX_SUCCESS);
|
||||
free(e);
|
||||
|
||||
e = sox_create_effect(sox_find_effect("rate"));
|
||||
assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
|
||||
assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
|
||||
SOX_SUCCESS);
|
||||
free(e);
|
||||
|
||||
e = sox_create_effect(sox_find_effect("channels"));
|
||||
assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
|
||||
assert(sox_add_effect(chain, e, &input->signal, &output->signal) ==
|
||||
assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
|
||||
SOX_SUCCESS);
|
||||
free(e);
|
||||
|
||||
e = sox_create_effect(sox_find_effect("output"));
|
||||
sox_args[0] = (char*)output;
|
||||
assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS);
|
||||
assert(sox_add_effect(chain, e, &input->signal, &output->signal) ==
|
||||
assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
|
||||
SOX_SUCCESS);
|
||||
free(e);
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@ const Sox = require('sox-stream');
|
|||
const Ds = require('./index.js');
|
||||
const ArgumentParser = require('argparse').ArgumentParser;
|
||||
const MemoryStream = require('memory-stream');
|
||||
const Wav = require('node-wav');
|
||||
const Duplex = require('stream').Duplex;
|
||||
|
||||
// These constants control the beam search decoder
|
||||
|
||||
|
@ -44,10 +46,25 @@ function totalTime(hrtimeValue) {
|
|||
return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
|
||||
}
|
||||
|
||||
const buffer = Fs.readFileSync(args['audio']);
|
||||
const result = Wav.decode(buffer);
|
||||
|
||||
if (result.sampleRate < 16000) {
|
||||
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
|
||||
}
|
||||
|
||||
function bufferToStream(buffer) {
|
||||
var stream = new Duplex();
|
||||
stream.push(buffer);
|
||||
stream.push(null);
|
||||
return stream;
|
||||
}
|
||||
|
||||
var audioStream = new MemoryStream();
|
||||
Fs.createReadStream(args['audio']).
|
||||
bufferToStream(buffer).
|
||||
pipe(Sox({ output: { bits: 16, rate: 16000, channels: 1, type: 'raw' } })).
|
||||
pipe(audioStream);
|
||||
|
||||
audioStream.on('finish', () => {
|
||||
audioBuffer = audioStream.toBuffer();
|
||||
|
||||
|
|
|
@ -22,7 +22,8 @@
|
|||
"node-pre-gyp": "0.6.x",
|
||||
"argparse": "1.0.x",
|
||||
"sox-stream": "2.0.x",
|
||||
"memory-stream": "0.0.3"
|
||||
"memory-stream": "0.0.3",
|
||||
"node-wav": "0.0.2"
|
||||
},
|
||||
"bundledDependencies":["node-pre-gyp"],
|
||||
"devDependencies": {
|
||||
|
|
|
@ -37,20 +37,19 @@ N_FEATURES = 26
|
|||
N_CONTEXT = 9
|
||||
|
||||
def convert_samplerate(audio_path):
|
||||
sox_cmd = 'sox --norm {} -b 16 -t wav - channels 1 rate 16000'.format(audio_path)
|
||||
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate 16000 - '.format(audio_path)
|
||||
try:
|
||||
p = subprocess.Popen(sox_cmd.split(),
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
output, err = p.communicate()
|
||||
|
||||
if p.returncode:
|
||||
raise RuntimeError('SoX returned non-zero status')
|
||||
raise RuntimeError('SoX returned non-zero status: {}'.format(err))
|
||||
|
||||
except OSError as e:
|
||||
raise OSError('SoX not found, use 16kHz files or install it')
|
||||
raise OSError('SoX not found, use 16kHz files or install it: ', e)
|
||||
|
||||
# we already know the header information, get only the data from output
|
||||
audio = np.fromstring(output.split('data')[1], dtype=np.int16)
|
||||
audio = np.fromstring(output, dtype=np.int16)
|
||||
return 16000, audio
|
||||
|
||||
def main():
|
||||
|
@ -83,6 +82,8 @@ def main():
|
|||
|
||||
fs, audio = wav.read(args.audio)
|
||||
if fs != 16000:
|
||||
if fs < 16000:
|
||||
print('Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.' % (fs), file=sys.stderr)
|
||||
fs, audio = convert_samplerate(args.audio)
|
||||
audio_length = len(audio) * ( 1 / 16000)
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
python:
|
||||
packages:
|
||||
apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev'
|
||||
brew:
|
||||
setup: 'install_local_homebrew "python-ds-test" && install_pkg_local_homebrew "sox"'
|
||||
env: 'export EXTRA_ENV="PATH=$TASKCLUSTER_TASK_DIR/python-ds-test.brew/bin/:$PATH"'
|
||||
nodejs:
|
||||
packages:
|
||||
apt: 'nodejs sox'
|
||||
|
|
|
@ -53,7 +53,7 @@ then:
|
|||
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
|
||||
installGitlfs: { $eval: strip(str(build.git_lfs.linux)) }
|
||||
in: >
|
||||
apt-get -qq update && apt-get -qq -y install git pixz libsox2 wget && ${extraSystemSetup} &&
|
||||
apt-get -qq update && apt-get -qq -y install git pixz sox wget && ${extraSystemSetup} &&
|
||||
adduser --system --home ${system.homedir.linux} ${system.username} &&
|
||||
cd ${system.homedir.linux} &&
|
||||
echo -e "#!/bin/bash\nset -xe\n ${installGitlfs} && env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
|
||||
|
|
|
@ -3,6 +3,9 @@ build:
|
|||
dependencies:
|
||||
- "darwin-amd64-cpu-opt"
|
||||
- "test-training_upstream-linux-amd64-py27-opt"
|
||||
system_setup:
|
||||
>
|
||||
${python.brew.setup} && ${python.brew.env}
|
||||
args:
|
||||
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 2.7.13"
|
||||
metadata:
|
||||
|
|
|
@ -3,6 +3,9 @@ build:
|
|||
dependencies:
|
||||
- "darwin-amd64-cpu-opt"
|
||||
- "test-training_upstream-linux-amd64-py27-opt"
|
||||
system_setup:
|
||||
>
|
||||
${python.brew.setup} && ${python.brew.env}
|
||||
args:
|
||||
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.4.6"
|
||||
metadata:
|
||||
|
|
|
@ -3,6 +3,9 @@ build:
|
|||
dependencies:
|
||||
- "darwin-amd64-cpu-opt"
|
||||
- "test-training_upstream-linux-amd64-py27-opt"
|
||||
system_setup:
|
||||
>
|
||||
${python.brew.setup} && ${python.brew.env}
|
||||
args:
|
||||
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.5.3"
|
||||
metadata:
|
||||
|
|
|
@ -3,6 +3,9 @@ build:
|
|||
dependencies:
|
||||
- "darwin-amd64-cpu-opt"
|
||||
- "test-training_upstream-linux-amd64-py27-opt"
|
||||
system_setup:
|
||||
>
|
||||
${python.brew.setup} && ${python.brew.env}
|
||||
args:
|
||||
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.6.2"
|
||||
metadata:
|
||||
|
|
|
@ -9,5 +9,6 @@ model_name=$(basename "${model_source}")
|
|||
|
||||
download_material "${TASKCLUSTER_TMP_DIR}/ds"
|
||||
|
||||
phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
|
||||
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||
|
||||
run_prod_inference_tests
|
||||
|
|
|
@ -8,15 +8,6 @@ aot_model=$1
|
|||
|
||||
download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}"
|
||||
|
||||
phrase_pbmodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||
|
||||
phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
|
||||
|
||||
if [ "${aot_model}" = "--aot" ]; then
|
||||
phrase_somodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
phrase_somodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
|
||||
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
|
||||
fi;
|
||||
run_all_inference_tests
|
||||
|
|
|
@ -22,5 +22,4 @@ npm install ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz
|
|||
|
||||
export PATH=$HOME/node_modules/.bin/:$PATH
|
||||
|
||||
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
|
||||
run_prod_inference_tests
|
||||
|
|
|
@ -27,15 +27,4 @@ else
|
|||
npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz
|
||||
fi
|
||||
|
||||
phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
|
||||
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
|
||||
|
||||
if [ "${aot_model}" = "--aot" ]; then
|
||||
phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
|
||||
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
|
||||
fi
|
||||
run_all_inference_tests
|
||||
|
|
|
@ -46,8 +46,7 @@ deepspeech_pkg="deepspeech-0.1.1-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}
|
|||
|
||||
pip install --upgrade ${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg} | cat
|
||||
|
||||
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
|
||||
run_prod_inference_tests
|
||||
|
||||
deactivate
|
||||
pyenv uninstall --force ${PYENV_NAME}
|
||||
|
|
|
@ -49,18 +49,7 @@ else
|
|||
fi
|
||||
pip install --upgrade ${deepspeech_pkg_url} | cat
|
||||
|
||||
phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
|
||||
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
|
||||
|
||||
if [ "${aot_model}" = "--aot" ]; then
|
||||
phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
|
||||
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
|
||||
fi
|
||||
run_all_inference_tests
|
||||
|
||||
deactivate
|
||||
pyenv uninstall --force ${PYENV_NAME}
|
||||
|
|
|
@ -48,6 +48,7 @@ SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-2.7.13 3.4.6 3.5.3 3.6.2}
|
|||
# > ../deepspeech_wrap.cxx:966:23: error: 'WeakCallbackData' in namespace 'v8' does not name a type
|
||||
SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.8.6 5.12.0 6.12.0 7.10.1 8.9.1 9.2.0}
|
||||
|
||||
# This verify exact inference result
|
||||
assert_correct_inference()
|
||||
{
|
||||
phrase=$1
|
||||
|
@ -75,6 +76,69 @@ assert_correct_inference()
|
|||
fi;
|
||||
}
|
||||
|
||||
# This verify that ${expected} is contained within ${phrase}
|
||||
assert_working_inference()
|
||||
{
|
||||
phrase=$1
|
||||
expected=$2
|
||||
|
||||
if [ -z "${phrase}" -o -z "${expected}" ]; then
|
||||
echo "One or more empty strings:"
|
||||
echo "phrase: <${phrase}>"
|
||||
echo "expected: <${expected}>"
|
||||
return 1
|
||||
fi;
|
||||
|
||||
case "${phrase}" in
|
||||
*${expected}*)
|
||||
echo "Proper output has been produced:"
|
||||
echo "${phrase}"
|
||||
return 0
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "!! Non matching output !!"
|
||||
echo "got: <${phrase}>"
|
||||
echo "xxd:"; echo "${phrase}" | xxd
|
||||
echo "-------------------"
|
||||
echo "expected: <${expected}>"
|
||||
echo "xxd:"; echo "${expected}" | xxd
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
assert_shows_warning()
|
||||
{
|
||||
stderr=$1
|
||||
expected=$2
|
||||
|
||||
if [ -z "${stderr}" -o -z "${expected}" ]; then
|
||||
echo "One or more empty strings:"
|
||||
echo "stderr: <${stderr}>"
|
||||
echo "expected: <${expected}>"
|
||||
return 1
|
||||
fi;
|
||||
|
||||
case "${stderr}" in
|
||||
*${expected}*)
|
||||
echo "Proper output has been produced:"
|
||||
echo "${stderr}"
|
||||
return 0
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "!! Non matching output !!"
|
||||
echo "got: <${stderr}>"
|
||||
echo "xxd:"; echo "${stderr}" | xxd
|
||||
echo "-------------------"
|
||||
echo "expected: <${expected}>"
|
||||
echo "xxd:"; echo "${expected}" | xxd
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
assert_correct_ldc93s1()
|
||||
{
|
||||
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
|
||||
|
@ -85,6 +149,11 @@ assert_correct_ldc93s1_prodmodel()
|
|||
assert_correct_inference "$1" "she had the duck so ingrecywachworallyear"
|
||||
}
|
||||
|
||||
assert_working_ldc93s1_prodmodel()
|
||||
{
|
||||
assert_working_inference "$1" "she had the duck so"
|
||||
}
|
||||
|
||||
assert_correct_ldc93s1_somodel()
|
||||
{
|
||||
somodel_nolm=$1
|
||||
|
@ -116,6 +185,61 @@ assert_correct_ldc93s1_somodel()
|
|||
fi
|
||||
}
|
||||
|
||||
assert_correct_warning_upsampling()
|
||||
{
|
||||
assert_shows_warning "$1" "is lower than 16kHz. Up-sampling might produce erratic speech recognition"
|
||||
}
|
||||
|
||||
run_all_inference_tests()
|
||||
{
|
||||
phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
|
||||
|
||||
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
|
||||
|
||||
phrase_pbmodel_nolm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}"
|
||||
|
||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1 "${phrase_pbmodel_withlm_stereo_44k}"
|
||||
|
||||
phrase_pbmodel_nolm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null)
|
||||
assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
|
||||
|
||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
|
||||
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
||||
|
||||
if [ "${aot_model}" = "--aot" ]; then
|
||||
phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
|
||||
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
|
||||
|
||||
phrase_somodel_nolm_stereo_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
|
||||
phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
|
||||
assert_correct_ldc93s1_somodel "${phrase_somodel_nolm_stereo_44k}" "${phrase_somodel_withlm_stereo_44k}"
|
||||
|
||||
phrase_somodel_nolm_mono_8k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null)
|
||||
phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
|
||||
|
||||
assert_correct_warning_upsampling "${phrase_somodel_nolm_mono_8k}" "${phrase_somodel_withlm_mono_8k}"
|
||||
fi;
|
||||
}
|
||||
|
||||
run_prod_inference_tests()
|
||||
{
|
||||
phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
|
||||
|
||||
phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
|
||||
assert_working_ldc93s1_prodmodel "${phrase_pbmodel_withlm_stereo_44k}"
|
||||
|
||||
phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
|
||||
assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
|
||||
}
|
||||
|
||||
generic_download_tarxz()
|
||||
{
|
||||
target_dir=$1
|
||||
|
@ -151,7 +275,7 @@ download_ctc_kenlm()
|
|||
download_data()
|
||||
{
|
||||
wget ${model_source} -O ${TASKCLUSTER_TMP_DIR}/${model_name}
|
||||
wget https://catalog.ldc.upenn.edu/desc/addenda/LDC93S1.wav -O ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/lm.binary ${TASKCLUSTER_TMP_DIR}/lm.binary
|
||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/trie ${TASKCLUSTER_TMP_DIR}/trie
|
||||
|
@ -170,7 +294,7 @@ download_material()
|
|||
|
||||
download_data
|
||||
|
||||
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
||||
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
|
||||
}
|
||||
|
||||
install_pyenv()
|
||||
|
|
Loading…
Reference in New Issue