From 0241f725cdf2b905ada67fb17550d522f5fde8a4 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 21:45:33 +0200
Subject: [PATCH 1/7] Expose model sample rate in API

---
 native_client/deepspeech.cc |  6 ++++++
 native_client/deepspeech.h  | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc
index 439702a6..9aee0f8e 100644
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@@ -292,6 +292,12 @@ DS_CreateModel(const char* aModelPath,
   return DS_ERR_OK;
 }
 
+int
+DS_GetModelSampleRate(ModelState* aCtx)
+{
+  return aCtx->sample_rate_;
+}
+
 void
 DS_FreeModel(ModelState* ctx)
 {
diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h
index ef25e985..ed9d8638 100644
--- a/native_client/deepspeech.h
+++ b/native_client/deepspeech.h
@@ -92,6 +92,16 @@ int DS_CreateModel(const char* aModelPath,
                    unsigned int aBeamWidth,
                    ModelState** retval);
 
+/**
+ * @brief Return the sample rate expected by a model.
+ *
+ * @param aCtx A ModelState pointer created with {@link DS_CreateModel}.
+ *
+ * @return Sample rate expected by the model for its input.
+ */
+DEEPSPEECH_EXPORT
+int DS_GetModelSampleRate(ModelState* aCtx);
+
 /**
  * @brief Frees associated resources and destroys model object.
  */

From c1ed6d711d68bc7e5e59e0f35054adda261fac77 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 21:46:01 +0200
Subject: [PATCH 2/7] Use model sample rate in client.cc

---
 native_client/client.cc | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/native_client/client.cc b/native_client/client.cc
index 358f527f..80663fe6 100644
--- a/native_client/client.cc
+++ b/native_client/client.cc
@@ -111,7 +111,7 @@ typedef struct {
 } ds_audio_buffer;
 
 ds_audio_buffer
-GetAudioBuffer(const char* path)
+GetAudioBuffer(const char* path, int desired_sample_rate)
 {
   ds_audio_buffer res = {0};
 
@@ -121,7 +121,7 @@ GetAudioBuffer(const char* path)
 
   // Resample/reformat the audio so we can pass it through the MFCC functions
   sox_signalinfo_t target_signal = {
-      16000, // Rate
+      static_cast<sox_rate_t>(desired_sample_rate), // Rate
       1, // Channels
       16, // Precision
       SOX_UNSPEC, // Length
@@ -158,8 +158,10 @@ GetAudioBuffer(const char* path)
 
   assert(output);
 
-  if ((int)input->signal.rate < 16000) {
-    fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate);
+  if ((int)input->signal.rate < desired_sample_rate) {
+    fprintf(stderr, "Warning: original sample rate (%d) is lower than %dkHz. "
+                    "Up-sampling might produce erratic speech recognition.\n",
+                    desired_sample_rate, (int)input->signal.rate);
   }
 
   // Setup the effects chain to decode/resample
@@ -205,7 +207,7 @@ GetAudioBuffer(const char* path)
 #endif // NO_SOX
 
 #ifdef NO_SOX
-  // FIXME: Hack and support only 16kHz mono 16-bits PCM
+  // FIXME: Hack and support only mono 16-bits PCM with standard SoX header
   FILE* wave = fopen(path, "r");
 
   size_t rv;
@@ -224,12 +226,12 @@ GetAudioBuffer(const char* path)
 
   assert(audio_format == 1); // 1 is PCM
   assert(num_channels == 1); // MONO
-  assert(sample_rate == 16000); // 16000 Hz
+  assert(sample_rate == desired_sample_rate); // at desired sample rate
   assert(bits_per_sample == 16); // 16 bits per sample
 
   fprintf(stderr, "audio_format=%d\n", audio_format);
   fprintf(stderr, "num_channels=%d\n", num_channels);
-  fprintf(stderr, "sample_rate=%d\n", sample_rate);
+  fprintf(stderr, "sample_rate=%d (desired=%d)\n", sample_rate, desired_sample_rate);
   fprintf(stderr, "bits_per_sample=%d\n", bits_per_sample);
 
   fseek(wave, 40, SEEK_SET); rv = fread(&res.buffer_size, 4, 1, wave);
@@ -257,7 +259,7 @@ GetAudioBuffer(const char* path)
 void
 ProcessFile(ModelState* context, const char* path, bool show_times)
 {
-  ds_audio_buffer audio = GetAudioBuffer(path);
+  ds_audio_buffer audio = GetAudioBuffer(path, DS_GetModelSampleRate(context));
 
   // Pass audio to DeepSpeech
   // We take half of buffer_size because buffer is a char* while

From afea2b423189411c41234ab94b27a1e5d50a2a89 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 21:50:15 +0200
Subject: [PATCH 3/7] Expose and use model sample rate in Python

---
 native_client/python/__init__.py |  9 +++++++++
 native_client/python/client.py   | 21 ++++++++++-----------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py
index 62ea1eb5..b9166632 100644
--- a/native_client/python/__init__.py
+++ b/native_client/python/__init__.py
@@ -44,6 +44,15 @@ class Model(object):
             deepspeech.impl.FreeModel(self._impl)
             self._impl = None
 
+    def sampleRate(self):
+        """
+        Return the sample rate expected by the model.
+
+        :return: Sample rate.
+        :type: int
+        """
+        return deepspeech.impl.GetModelSampleRate(self._impl)
+
     def enableDecoderWithLM(self, *args, **kwargs):
         """
         Enable decoding using beam scoring with a KenLM language model.
diff --git a/native_client/python/client.py b/native_client/python/client.py
index b44c5122..3792a406 100644
--- a/native_client/python/client.py
+++ b/native_client/python/client.py
@@ -17,9 +17,6 @@ try:
 except ImportError:
     from pipes import quote
 
-# Define the sample rate for audio
-
-SAMPLE_RATE = 16000
 # These constants control the beam search decoder
 
 # Beam width used in the CTC decoder when building candidate transcriptions
@@ -32,16 +29,16 @@ LM_ALPHA = 0.75
 LM_BETA = 1.85
 
 
-def convert_samplerate(audio_path):
-    sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), SAMPLE_RATE)
+def convert_samplerate(audio_path, desired_sample_rate):
+    sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), desired_sample_rate)
     try:
         output = subprocess.check_output(shlex.split(sox_cmd), stderr=subprocess.PIPE)
     except subprocess.CalledProcessError as e:
         raise RuntimeError('SoX returned non-zero status: {}'.format(e.stderr))
     except OSError as e:
-        raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(SAMPLE_RATE, e.strerror))
+        raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(desired_sample_rate, e.strerror))
 
-    return SAMPLE_RATE, np.frombuffer(output, np.int16)
+    return desired_sample_rate, np.frombuffer(output, np.int16)
 
 
 def metadata_to_string(metadata):
@@ -81,6 +78,8 @@ def main():
     model_load_end = timer() - model_load_start
     print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
 
+    desired_sample_rate = ds.sampleRate()
+
     if args.lm and args.trie:
         print('Loading language model from files {} {}'.format(args.lm, args.trie), file=sys.stderr)
         lm_load_start = timer()
@@ -90,13 +89,13 @@ def main():
 
     fin = wave.open(args.audio, 'rb')
     fs = fin.getframerate()
-    if fs != SAMPLE_RATE:
-        print('Warning: original sample rate ({}) is different than {}hz. Resampling might produce erratic speech recognition.'.format(fs, SAMPLE_RATE), file=sys.stderr)
-        fs, audio = convert_samplerate(args.audio)
+    if fs != desired_sample_rate:
+        print('Warning: original sample rate ({}) is different than {}hz. Resampling might produce erratic speech recognition.'.format(fs, desired_sample_rate), file=sys.stderr)
+        fs, audio = convert_samplerate(args.audio, desired_sample_rate)
     else:
         audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
 
-    audio_length = fin.getnframes() * (1/SAMPLE_RATE)
+    audio_length = fin.getnframes() * (1/fs)
     fin.close()
 
     print('Running inference.', file=sys.stderr)

From 0be2787e4ec96edfb92d1e2ac80c4e6f74327198 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 21:55:08 +0200
Subject: [PATCH 4/7] Expose and use model sample rate in JavaScript

---
 native_client/javascript/client.js | 40 ++++++++++++++++--------------
 native_client/javascript/index.js  |  9 +++++++
 2 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/native_client/javascript/client.js b/native_client/javascript/client.js
index e356c2e8..8bbdce12 100644
--- a/native_client/javascript/client.js
+++ b/native_client/javascript/client.js
@@ -62,11 +62,29 @@ function metadataToString(metadata) {
   return retval;
 }
 
+console.error('Loading model from file %s', args['model']);
+const model_load_start = process.hrtime();
+var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
+const model_load_end = process.hrtime(model_load_start);
+console.error('Loaded model in %ds.', totalTime(model_load_end));
+
+var desired_sample_rate = model.sampleRate();
+
+if (args['lm'] && args['trie']) {
+  console.error('Loading language model from files %s %s', args['lm'], args['trie']);
+  const lm_load_start = process.hrtime();
+  model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
+  const lm_load_end = process.hrtime(lm_load_start);
+  console.error('Loaded language model in %ds.', totalTime(lm_load_end));
+}
+
 const buffer = Fs.readFileSync(args['audio']);
 const result = Wav.decode(buffer);
 
-if (result.sampleRate < 16000) {
-  console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
+if (result.sampleRate < desired_sample_rate) {
+  console.error('Warning: original sample rate (' + result.sampleRate + ') ' +
+                'is lower than ' + desired_sample_rate + 'Hz. ' +
+                'Up-sampling might produce erratic speech recognition.');
 }
 
 function bufferToStream(buffer) {
@@ -84,7 +102,7 @@ bufferToStream(buffer).
     },
     output: {
       bits: 16,
-      rate: 16000,
+      rate: desired_sample_rate,
       channels: 1,
       encoding: 'signed-integer',
       endian: 'little',
@@ -97,23 +115,9 @@ bufferToStream(buffer).
 audioStream.on('finish', () => {
   let audioBuffer = audioStream.toBuffer();
 
-  console.error('Loading model from file %s', args['model']);
-  const model_load_start = process.hrtime();
-  var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
-  const model_load_end = process.hrtime(model_load_start);
-  console.error('Loaded model in %ds.', totalTime(model_load_end));
-
-  if (args['lm'] && args['trie']) {
-    console.error('Loading language model from files %s %s', args['lm'], args['trie']);
-    const lm_load_start = process.hrtime();
-    model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
-    const lm_load_end = process.hrtime(lm_load_start);
-    console.error('Loaded language model in %ds.', totalTime(lm_load_end));
-  }
-
   const inference_start = process.hrtime();
   console.error('Running inference.');
-  const audioLength = (audioBuffer.length / 2) * ( 1 / 16000);
+  const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate);
 
   // We take half of the buffer_size because buffer is a char* while
   // LocalDsSTT() expected a short*
diff --git a/native_client/javascript/index.js b/native_client/javascript/index.js
index f6446f4d..ad639099 100644
--- a/native_client/javascript/index.js
+++ b/native_client/javascript/index.js
@@ -45,6 +45,15 @@ function Model() {
     this._impl = impl;
 }
 
+/**
+ * Return the sample rate expected by the model.
+ *
+ * @return {number} Sample rate.
+ */
+Model.prototype.sampleRate = function() {
+    return binding.GetModelSampleRate(this._impl);
+}
+
 /**
  * Enable decoding using beam scoring with a KenLM language model.
  *

From 5cb15ca6ed80d35529e536b4024cc123845955ae Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 22:04:33 +0200
Subject: [PATCH 5/7] Use model sample rate in examples

---
 .../net_framework/DeepSpeechWPF/MainWindow.xaml.cs    |  8 +++-----
 examples/nodejs_wav/index.js                          | 11 ++++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
index e332da6d..31b1f9d4 100644
--- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
+++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
@@ -202,15 +202,13 @@ namespace DeepSpeechWPF
             {
                 _audioCapture.Device = _audioCaptureDevices[cbxAudioInputs.SelectedIndex]; 
             }
-            InitilizeAudioCapture();
+            InitializeAudioCapture(_sttClient.GetModelSampleRate());
         }
 
-       
-
         /// <summary>
         /// Initializes the recorder and setup the native stream.
         /// </summary>
-        private void InitilizeAudioCapture()
+        private void InitializeAudioCapture(int desiredSampleRate)
         {
             _audioCapture.Initialize();
             _audioCapture.DataAvailable += _capture_DataAvailable;
@@ -218,7 +216,7 @@ namespace DeepSpeechWPF
             //create a source, that converts the data provided by the
             //soundInSource to required by the deepspeech model
             _convertedSource = _soundInSource
-               .ChangeSampleRate(16000) // sample rate
+               .ChangeSampleRate(desiredSampleRate) // sample rate
                .ToSampleSource()
                .ToWaveSource(16); //bits per sample
              
diff --git a/examples/nodejs_wav/index.js b/examples/nodejs_wav/index.js
index 20ccb2ab..7883a010 100644
--- a/examples/nodejs_wav/index.js
+++ b/examples/nodejs_wav/index.js
@@ -11,6 +11,8 @@ let alphabetPath = './models/alphabet.txt';
 
 let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH);
 
+let desiredSampleRate = model.sampleRate();
+
 const LM_ALPHA = 0.75;
 const LM_BETA = 1.85;
 let lmPath = './models/lm.binary';
@@ -28,8 +30,8 @@ if (!Fs.existsSync(audioFile)) {
 const buffer = Fs.readFileSync(audioFile);
 const result = Wav.decode(buffer);
 
-if (result.sampleRate < 16000) {
-	console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.');
+if (result.sampleRate < desiredSampleRate) {
+	console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than ' + desiredSampleRate + 'Hz. Up-sampling might produce erratic speech recognition.');
 }
 
 function bufferToStream(buffer) {
@@ -47,7 +49,7 @@ pipe(Sox({
 	},
 	output: {
 		bits: 16,
-		rate: 16000,
+		rate: desiredSampleRate,
 		channels: 1,
 		encoding: 'signed-integer',
 		endian: 'little',
@@ -58,10 +60,9 @@ pipe(Sox({
 pipe(audioStream);
 
 audioStream.on('finish', () => {
-	
 	let audioBuffer = audioStream.toBuffer();
 	
-	const audioLength = (audioBuffer.length / 2) * ( 1 / 16000);
+	const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate);
 	console.log('audio length', audioLength);
 	
 	let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2));

From 4dc18dd8ee943438b649861bb483ebcf140815ca Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 22:04:44 +0200
Subject: [PATCH 6/7] Expose and use model sample rate in .NET

---
 native_client/dotnet/DeepSpeechClient/DeepSpeech.cs      | 9 +++++++++
 .../dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs    | 6 ++++++
 native_client/dotnet/DeepSpeechClient/NativeImp.cs       | 3 +++
 3 files changed, 18 insertions(+)

diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
index 25fcc109..9bbf5e3c 100644
--- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
@@ -64,6 +64,15 @@ namespace DeepSpeechClient
             EvaluateResultCode(resultCode);
         }
 
+        /// <summary>
+        /// Return the sample rate expected by the model.
+        /// </summary>
+        /// <returns>Sample rate.</returns>
+        public unsafe int GetModelSampleRate()
+        {
+            return NativeImp.DS_GetModelSampleRate(_modelStatePP);
+        }
+
         /// <summary>
         /// Evaluate the result code and will raise an exception if necessary.
         /// </summary>
diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
index 79af2964..f7bbee98 100644
--- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
@@ -24,6 +24,12 @@ namespace DeepSpeechClient.Interfaces
                    string aAlphabetConfigPath,
                    uint aBeamWidth);
 
+        /// <summary>
+        /// Return the sample rate expected by the model.
+        /// </summary>
+        /// <returns>Sample rate.</returns>
+        unsafe int GetModelSampleRate();
+
         /// <summary>
         /// Enable decoding using beam scoring with a KenLM language model.
         /// </summary>
diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
index 74de9197..92cdb150 100644
--- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs
+++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
@@ -20,6 +20,9 @@ namespace DeepSpeechClient
                    uint aBeamWidth,
                    ref IntPtr** pint);
 
+        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
+        internal unsafe static extern int DS_GetModelSampleRate(IntPtr** aCtx);
+
         [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
         internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(IntPtr** aCtx,
                   string aLMPath,

From 673d620a67644f73aace97964851b4b9405350c1 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Thu, 10 Oct 2019 22:07:30 +0200
Subject: [PATCH 7/7] Expose and use model sample rate in Java

---
 .../java/org/mozilla/deepspeech/DeepSpeechActivity.java  | 2 +-
 .../deepspeech/libdeepspeech/DeepSpeechModel.java        | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
index 6b9c45b3..b44fdfab 100644
--- a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
+++ b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
@@ -77,7 +77,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
             // tv_numChannels.setText("numChannels=" + (numChannels == 1 ? "MONO" : "!MONO"));
 
             wave.seek(24); int sampleRate = this.readLEInt(wave);
-            assert (sampleRate == 16000); // 16000 Hz
+            assert (sampleRate == this._m.sampleRate()); // desired sample rate
             // tv_sampleRate.setText("sampleRate=" + (sampleRate == 16000 ? "16kHz" : "!16kHz"));
 
             wave.seek(34); char bitsPerSample = this.readLEChar(wave);
diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
index 3a665c5e..0bbc8fcc 100644
--- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
+++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
@@ -32,6 +32,15 @@ public class DeepSpeechModel {
         this._msp  = impl.modelstatep_value(this._mspp);
     }
 
+   /**
+    * @brief Return the sample rate expected by the model.
+    *
+    * @return Sample rate.
+    */
+    public int sampleRate() {
+        return impl.GetModelSampleRate(this._msp);
+    }
+
    /**
     * @brief Frees associated resources and destroys model object.
     */