Add different samplerate samples

Fixes #1022
2018-02-05 16:26:25 +01:00 · 2018-02-05 16:26:25 +01:00 · 72298b8f6d
parent 7b78894220
commit 72298b8f6d
19 changed files with 197 additions and 57 deletions
--- a/data/smoke_test/LDC93S1_pcms16le_1_8000.wav
+++ b/data/smoke_test/LDC93S1_pcms16le_1_8000.wav
--- a/data/smoke_test/LDC93S1_pcms16le_2_44100.wav
+++ b/data/smoke_test/LDC93S1_pcms16le_2_44100.wav
--- a/native_client/client.cc
+++ b/native_client/client.cc
@ -89,17 +89,17 @@ main(int argc, char **argv)
  sox_format_t* input = sox_open_read(argv[2], NULL, NULL, NULL);
  assert(input);

-  int sampleRate = (int)input->signal.rate;
-
  // Resample/reformat the audio so we can pass it through the MFCC functions
  sox_signalinfo_t target_signal = {
-      SOX_UNSPEC, // Rate
+      16000, // Rate
      1, // Channels
      16, // Precision
      SOX_UNSPEC, // Length
      NULL // Effects headroom multiplier
  };

+  sox_signalinfo_t interm_signal;
+
  sox_encodinginfo_t target_encoding = {
    SOX_ENCODING_SIGN2, // Sample format
    16, // Bits per sample
@ -129,28 +129,42 @@ main(int argc, char **argv)

  assert(output);

+  int sampleRate = (int)output->signal.rate;
+
+  if ((int)input->signal.rate < 16000) {
+    fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate);
+  }
+
  // Setup the effects chain to decode/resample
  char* sox_args[10];
  sox_effects_chain_t* chain =
    sox_create_effects_chain(&input->encoding, &output->encoding);

+  interm_signal = input->signal;
+
  sox_effect_t* e = sox_create_effect(sox_find_effect("input"));
  sox_args[0] = (char*)input;
  assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS);
-  assert(sox_add_effect(chain, e, &input->signal, &input->signal) ==
+  assert(sox_add_effect(chain, e, &interm_signal, &input->signal) ==
+         SOX_SUCCESS);
+  free(e);
+
+  e = sox_create_effect(sox_find_effect("rate"));
+  assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
+  assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
         SOX_SUCCESS);
  free(e);

  e = sox_create_effect(sox_find_effect("channels"));
  assert(sox_effect_options(e, 0, NULL) == SOX_SUCCESS);
-  assert(sox_add_effect(chain, e, &input->signal, &output->signal) ==
+  assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
         SOX_SUCCESS);
  free(e);

  e = sox_create_effect(sox_find_effect("output"));
  sox_args[0] = (char*)output;
  assert(sox_effect_options(e, 1, sox_args) == SOX_SUCCESS);
-  assert(sox_add_effect(chain, e, &input->signal, &output->signal) ==
+  assert(sox_add_effect(chain, e, &interm_signal, &output->signal) ==
         SOX_SUCCESS);
  free(e);

--- a/native_client/javascript/client.js
+++ b/native_client/javascript/client.js
@ -5,6 +5,8 @@ const Sox = require('sox-stream');
 const Ds = require('./index.js');
 const ArgumentParser = require('argparse').ArgumentParser;
 const MemoryStream = require('memory-stream');
+const Wav = require('node-wav');
+const Duplex = require('stream').Duplex;

 // These constants control the beam search decoder

@ -44,10 +46,25 @@ function totalTime(hrtimeValue) {
  return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
 }

+const buffer = Fs.readFileSync(args['audio']);
+const result = Wav.decode(buffer);
+
+if (result.sampleRate < 16000) {
+  console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
+}
+
+function bufferToStream(buffer) {
+  var stream = new Duplex();
+  stream.push(buffer);
+  stream.push(null);
+  return stream;
+}
+
 var audioStream = new MemoryStream();
-Fs.createReadStream(args['audio']).
+bufferToStream(buffer).
  pipe(Sox({ output: { bits: 16, rate: 16000, channels: 1, type: 'raw' } })).
  pipe(audioStream);
+
 audioStream.on('finish', () => {
  audioBuffer = audioStream.toBuffer();

--- a/native_client/javascript/package.json.in
+++ b/native_client/javascript/package.json.in
@ -22,7 +22,8 @@
      "node-pre-gyp": "0.6.x",
      "argparse": "1.0.x",
      "sox-stream": "2.0.x",
-      "memory-stream": "0.0.3"
+      "memory-stream": "0.0.3",
+      "node-wav": "0.0.2"
    },
    "bundledDependencies":["node-pre-gyp"],
    "devDependencies": {
--- a/native_client/python/client.py
+++ b/native_client/python/client.py
@ -37,20 +37,19 @@ N_FEATURES = 26
 N_CONTEXT = 9

 def convert_samplerate(audio_path):
-    sox_cmd = 'sox --norm {} -b 16 -t wav - channels 1 rate 16000'.format(audio_path)
+    sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate 16000 - '.format(audio_path)
    try:
        p = subprocess.Popen(sox_cmd.split(),
                             stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        output, err = p.communicate()

        if p.returncode:
-            raise RuntimeError('SoX returned non-zero status')
+            raise RuntimeError('SoX returned non-zero status: {}'.format(err))

    except OSError as e:
-        raise OSError('SoX not found, use 16kHz files or install it')
+        raise OSError('SoX not found, use 16kHz files or install it: ', e)

-    # we already know the header information, get only the data from output
-    audio = np.fromstring(output.split('data')[1], dtype=np.int16)
+    audio = np.fromstring(output, dtype=np.int16)
    return 16000, audio

 def main():
@ -83,6 +82,8 @@ def main():

    fs, audio = wav.read(args.audio)
    if fs != 16000:
+        if fs < 16000:
+            print('Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.' % (fs), file=sys.stderr)
        fs, audio = convert_samplerate(args.audio)
    audio_length = len(audio) * ( 1 / 16000)

--- a/taskcluster/.shared.yml
+++ b/taskcluster/.shared.yml
@ -1,6 +1,9 @@
 python:
  packages:
    apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev'
+  brew:
+    setup: 'install_local_homebrew "python-ds-test" && install_pkg_local_homebrew "sox"'
+    env: 'export EXTRA_ENV="PATH=$TASKCLUSTER_TASK_DIR/python-ds-test.brew/bin/:$PATH"'
 nodejs:
  packages:
    apt: 'nodejs sox'
--- a/taskcluster/test-linux-opt-base.tyml
+++ b/taskcluster/test-linux-opt-base.tyml
@ -53,7 +53,7 @@ then:
          extraSystemSetup: { $eval: strip(str(build.system_setup)) }
          installGitlfs: { $eval: strip(str(build.git_lfs.linux)) }
        in: >
-          apt-get -qq update && apt-get -qq -y install git pixz libsox2 wget && ${extraSystemSetup} &&
+          apt-get -qq update && apt-get -qq -y install git pixz sox wget && ${extraSystemSetup} &&
          adduser --system --home ${system.homedir.linux} ${system.username} &&
          cd ${system.homedir.linux} &&
          echo -e "#!/bin/bash\nset -xe\n ${installGitlfs} && env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
--- a/taskcluster/test-python_27-darwin-amd64-opt.yml
+++ b/taskcluster/test-python_27-darwin-amd64-opt.yml
@ -3,6 +3,9 @@ build:
  dependencies:
    - "darwin-amd64-cpu-opt"
    - "test-training_upstream-linux-amd64-py27-opt"
+  system_setup:
+    >
+      ${python.brew.setup} && ${python.brew.env}
  args:
    tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 2.7.13"
  metadata:
--- a/taskcluster/test-python_34-darwin-amd64-opt.yml
+++ b/taskcluster/test-python_34-darwin-amd64-opt.yml
@ -3,6 +3,9 @@ build:
  dependencies:
    - "darwin-amd64-cpu-opt"
    - "test-training_upstream-linux-amd64-py27-opt"
+  system_setup:
+    >
+      ${python.brew.setup} && ${python.brew.env}
  args:
    tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.4.6"
  metadata:
--- a/taskcluster/test-python_35-darwin-amd64-opt.yml
+++ b/taskcluster/test-python_35-darwin-amd64-opt.yml
@ -3,6 +3,9 @@ build:
  dependencies:
    - "darwin-amd64-cpu-opt"
    - "test-training_upstream-linux-amd64-py27-opt"
+  system_setup:
+    >
+      ${python.brew.setup} && ${python.brew.env}
  args:
    tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.5.3"
  metadata:
--- a/taskcluster/test-python_36-darwin-amd64-opt.yml
+++ b/taskcluster/test-python_36-darwin-amd64-opt.yml
@ -3,6 +3,9 @@ build:
  dependencies:
    - "darwin-amd64-cpu-opt"
    - "test-training_upstream-linux-amd64-py27-opt"
+  system_setup:
+    >
+      ${python.brew.setup} && ${python.brew.env}
  args:
    tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/tc-python-tests.sh 3.6.2"
  metadata:
--- a/tc-cpp-ds-tests-prod.sh
+++ b/tc-cpp-ds-tests-prod.sh
@ -9,5 +9,6 @@ model_name=$(basename "${model_source}")

 download_material "${TASKCLUSTER_TMP_DIR}/ds"

-phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
+export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
+
+run_prod_inference_tests
--- a/tc-cpp-ds-tests.sh
+++ b/tc-cpp-ds-tests.sh
@ -8,15 +8,6 @@ aot_model=$1

 download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}"

-phrase_pbmodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
-assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
+export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH

-phrase_pbmodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
-
-if [ "${aot_model}" = "--aot" ]; then
-    phrase_somodel_nolm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
-    phrase_somodel_withlm=$(LD_LIBRARY_PATH=${TASKCLUSTER_TMP_DIR}/ds/:$LD_LIBRARY_PATH ${TASKCLUSTER_TMP_DIR}/ds/deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-
-    assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
-fi;
+run_all_inference_tests
--- a/tc-node-tests-prod.sh
+++ b/tc-node-tests-prod.sh
@ -22,5 +22,4 @@ npm install ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz

 export PATH=$HOME/node_modules/.bin/:$PATH

-phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
+run_prod_inference_tests
--- a/tc-node-tests.sh
+++ b/tc-node-tests.sh
@ -27,15 +27,4 @@ else
    npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-0.1.1.tgz
 fi

-phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
-assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
-
-phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
-
-if [ "${aot_model}" = "--aot" ]; then
-    phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
-    phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-
-    assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
-fi
+run_all_inference_tests
--- a/tc-python-tests-prod.sh
+++ b/tc-python-tests-prod.sh
@ -46,8 +46,7 @@ deepspeech_pkg="deepspeech-0.1.1-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}

 pip install --upgrade ${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg} | cat

-phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
+run_prod_inference_tests

 deactivate
 pyenv uninstall --force ${PYENV_NAME}
--- a/tc-python-tests.sh
+++ b/tc-python-tests.sh
@ -49,18 +49,7 @@ else
 fi
 pip install --upgrade ${deepspeech_pkg_url} | cat

-phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
-assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
-
-phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
-
-if [ "${aot_model}" = "--aot" ]; then
-    phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
-    phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
-
-    assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
-fi
+run_all_inference_tests

 deactivate
 pyenv uninstall --force ${PYENV_NAME}
--- a/tc-tests-utils.sh
+++ b/tc-tests-utils.sh
@ -48,6 +48,7 @@ SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-2.7.13 3.4.6 3.5.3 3.6.2}
 # > ../deepspeech_wrap.cxx:966:23: error: 'WeakCallbackData' in namespace 'v8' does not name a type
 SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.8.6 5.12.0 6.12.0 7.10.1 8.9.1 9.2.0}

+# This verify exact inference result
 assert_correct_inference()
 {
  phrase=$1
@ -75,6 +76,69 @@ assert_correct_inference()
  fi;
 }

+# This verify that ${expected} is contained within ${phrase}
+assert_working_inference()
+{
+  phrase=$1
+  expected=$2
+
+  if [ -z "${phrase}" -o -z "${expected}" ]; then
+      echo "One or more empty strings:"
+      echo "phrase: <${phrase}>"
+      echo "expected: <${expected}>"
+      return 1
+  fi;
+
+  case "${phrase}" in
+      *${expected}*)
+          echo "Proper output has been produced:"
+          echo "${phrase}"
+          return 0
+      ;;
+
+      *)
+          echo "!! Non matching output !!"
+          echo "got: <${phrase}>"
+          echo "xxd:"; echo "${phrase}" | xxd
+          echo "-------------------"
+          echo "expected: <${expected}>"
+          echo "xxd:"; echo "${expected}" | xxd
+          return 1
+      ;;
+  esac
+}
+
+assert_shows_warning()
+{
+  stderr=$1
+  expected=$2
+
+  if [ -z "${stderr}" -o -z "${expected}" ]; then
+      echo "One or more empty strings:"
+      echo "stderr: <${stderr}>"
+      echo "expected: <${expected}>"
+      return 1
+  fi;
+
+  case "${stderr}" in
+      *${expected}*)
+          echo "Proper output has been produced:"
+          echo "${stderr}"
+          return 0
+      ;;
+
+      *)
+          echo "!! Non matching output !!"
+          echo "got: <${stderr}>"
+          echo "xxd:"; echo "${stderr}" | xxd
+          echo "-------------------"
+          echo "expected: <${expected}>"
+          echo "xxd:"; echo "${expected}" | xxd
+          return 1
+      ;;
+  esac
+}
+
 assert_correct_ldc93s1()
 {
  assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
@ -85,6 +149,11 @@ assert_correct_ldc93s1_prodmodel()
  assert_correct_inference "$1" "she had the duck so ingrecywachworallyear"
 }

+assert_working_ldc93s1_prodmodel()
+{
+  assert_working_inference "$1" "she had the duck so"
+}
+
 assert_correct_ldc93s1_somodel()
 {
    somodel_nolm=$1
@ -116,6 +185,61 @@ assert_correct_ldc93s1_somodel()
    fi
 }

+assert_correct_warning_upsampling()
+{
+  assert_shows_warning "$1" "is lower than 16kHz. Up-sampling might produce erratic speech recognition"
+}
+
+run_all_inference_tests()
+{
+  phrase_pbmodel_nolm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
+  assert_correct_ldc93s1 "${phrase_pbmodel_nolm}"
+
+  phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
+  assert_correct_ldc93s1 "${phrase_pbmodel_withlm}"
+
+  phrase_pbmodel_nolm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
+  assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}"
+
+  phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
+  assert_correct_ldc93s1 "${phrase_pbmodel_withlm_stereo_44k}"
+
+  phrase_pbmodel_nolm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null)
+  assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}"
+
+  phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
+  assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
+
+  if [ "${aot_model}" = "--aot" ]; then
+      phrase_somodel_nolm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
+      phrase_somodel_withlm=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
+
+      assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
+
+      phrase_somodel_nolm_stereo_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt)
+      phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
+
+      assert_correct_ldc93s1_somodel "${phrase_somodel_nolm_stereo_44k}" "${phrase_somodel_withlm_stereo_44k}"
+
+      phrase_somodel_nolm_mono_8k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt 2>&1 1>/dev/null)
+      phrase_somodel_withlm_stere_44k=$(deepspeech "" ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
+
+      assert_correct_warning_upsampling "${phrase_somodel_nolm_mono_8k}" "${phrase_somodel_withlm_mono_8k}"
+  fi;
+}
+
+run_prod_inference_tests()
+{
+  phrase_pbmodel_withlm=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
+  assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}"
+
+  phrase_pbmodel_withlm_stereo_44k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie)
+  assert_working_ldc93s1_prodmodel "${phrase_pbmodel_withlm_stereo_44k}"
+
+  phrase_pbmodel_withlm_mono_8k=$(deepspeech ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt ${TASKCLUSTER_TMP_DIR}/lm.binary ${TASKCLUSTER_TMP_DIR}/trie 2>&1 1>/dev/null)
+  assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
+}
+
 generic_download_tarxz()
 {
  target_dir=$1
@ -151,7 +275,7 @@ download_ctc_kenlm()
 download_data()
 {
  wget ${model_source} -O ${TASKCLUSTER_TMP_DIR}/${model_name}
-  wget https://catalog.ldc.upenn.edu/desc/addenda/LDC93S1.wav -O ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav
+  cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
  cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
  cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/lm.binary ${TASKCLUSTER_TMP_DIR}/lm.binary
  cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/trie ${TASKCLUSTER_TMP_DIR}/trie
@ -170,7 +294,7 @@ download_material()

  download_data

-  ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
+  ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
 }

 install_pyenv()