Merge pull request #2420 from mozilla/remove-sr-param

Remove unused sample rate param from API
2019-10-10 21:05:29 +02:00 · 2019-10-10 21:05:29 +02:00 · 315a67bf69
parent 42726b3612 2b68c56025
commit 315a67bf69
21 changed files with 114 additions and 114 deletions
--- a/evaluate_tflite.py
+++ b/evaluate_tflite.py
@ -45,7 +45,7 @@ def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask):
        audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
        fin.close()
-        decoded = ds.stt(audio, fs)
+        decoded = ds.stt(audio)
        queue_out.put({'wav': wavname, 'prediction': decoded, 'ground_truth': msg['transcript']})
        print(queue_out.qsize(), end='\r') # Update the current progress
--- a/examples/ffmpeg_vad_streaming/index.js
+++ b/examples/ffmpeg_vad_streaming/index.js
@ -95,7 +95,7 @@ const ffmpeg = spawn('ffmpeg', [
 ]);
 let audioLength = 0;
-let sctx = model.createStream(AUDIO_SAMPLE_RATE);
+let sctx = model.createStream();
 function finishStream() {
 	const model_load_start = process.hrtime();
@ -108,7 +108,7 @@ function finishStream() {
 function intermediateDecode() {
 	finishStream();
-	sctx = model.createStream(AUDIO_SAMPLE_RATE);
+	sctx = model.createStream();
 }
 function feedAudioContent(chunk) {
--- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
+++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
@ -130,7 +130,7 @@ namespace DeepSpeechWPF
                watch.Start();
                await Task.Run(() =>
                {
-                    string speechResult = _sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);
+                    string speechResult = _sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                    watch.Stop();
                    Dispatcher.Invoke(() =>
                    {
@ -250,7 +250,7 @@ namespace DeepSpeechWPF
        private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
        {
-            _sttClient.CreateStream(16000);
+            _sttClient.CreateStream();
            _audioCapture.Start();
            btnStartRecording.IsEnabled = false;
            btnStopRecording.IsEnabled = true;
--- a/examples/nodejs_wav/index.js
+++ b/examples/nodejs_wav/index.js
@ -64,7 +64,7 @@ audioStream.on('finish', () => {
 	const audioLength = (audioBuffer.length / 2) * ( 1 / 16000);
 	console.log('audio length', audioLength);
-	let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000);
+	let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2));
 	console.log('result:', result);
 });
--- a/examples/vad_transcriber/wavTranscriber.py
+++ b/examples/vad_transcriber/wavTranscriber.py
@ -44,12 +44,12 @@ Returns a list [Inference, Inference Time, Audio Length]
 '''
 def stt(ds, audio, fs):
    inference_time = 0.0
-    audio_length = len(audio) * (1 / 16000)
+    audio_length = len(audio) * (1 / fs)
    # Run Deepspeech
    logging.debug('Running inference...')
    inference_start = timer()
-    output = ds.stt(audio, fs)
+    output = ds.stt(audio)
    inference_end = timer() - inference_start
    inference_time += inference_end
    logging.debug('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length))
--- a/native_client/client.cc
+++ b/native_client/client.cc
@ -54,23 +54,23 @@ char* JSONOutput(Metadata* metadata);
 ds_result
 LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
-           int aSampleRate, bool extended_output, bool json_output)
+           bool extended_output, bool json_output)
 {
  ds_result res = {0};
  clock_t ds_start_time = clock();
  if (extended_output) {
-    Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, aSampleRate);
+    Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize);
    res.string = metadataToString(metadata);
    DS_FreeMetadata(metadata);
  } else if (json_output) {
-    Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, aSampleRate);
+    Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize);
    res.string = JSONOutput(metadata);
    DS_FreeMetadata(metadata);
  } else if (stream_size > 0) {
    StreamingState* ctx;
-    int status = DS_CreateStream(aCtx, aSampleRate, &ctx);
+    int status = DS_CreateStream(aCtx, &ctx);
    if (status != DS_ERR_OK) {
      res.string = strdup("");
      return res;
@ -94,7 +94,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
    }
    res.string = DS_FinishStream(ctx);
  } else {
-    res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize, aSampleRate);
+    res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize);
  }
  clock_t ds_end_infer = clock();
@ -108,7 +108,6 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
 typedef struct {
  char*  buffer;
  size_t buffer_size;
  int    sample_rate;
 } ds_audio_buffer;
 ds_audio_buffer
@ -159,8 +158,6 @@ GetAudioBuffer(const char* path)
  assert(output);
  res.sample_rate = (int)output->signal.rate;
  if ((int)input->signal.rate < 16000) {
    fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate);
  }
@ -221,7 +218,6 @@ GetAudioBuffer(const char* path)
  unsigned int sample_rate;
  fseek(wave, 24, SEEK_SET); rv = fread(&sample_rate, 4, 1, wave);
  res.sample_rate = (int)sample_rate;
  unsigned short bits_per_sample;
  fseek(wave, 34, SEEK_SET); rv = fread(&bits_per_sample, 2, 1, wave);
@ -269,7 +265,6 @@ ProcessFile(ModelState* context, const char* path, bool show_times)
  ds_result result = LocalDsSTT(context,
                                (const short*)audio.buffer,
                                audio.buffer_size / 2,
                                audio.sample_rate,
                                extended_metadata,
                                json_output);
  free(audio.buffer);
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -318,7 +318,6 @@ DS_EnableDecoderWithLM(ModelState* aCtx,
 int
 DS_CreateStream(ModelState* aCtx,
                unsigned int aSampleRate,
                StreamingState** retval)
 {
  *retval = nullptr;
@ -383,11 +382,10 @@ DS_FinishStreamWithMetadata(StreamingState* aSctx)
 StreamingState*
 CreateStreamAndFeedAudioContent(ModelState* aCtx,
                                const short* aBuffer,
-                                unsigned int aBufferSize,
+                                unsigned int aBufferSize)
                                unsigned int aSampleRate)
 {
  StreamingState* ctx;
-  int status = DS_CreateStream(aCtx, aSampleRate, &ctx);
+  int status = DS_CreateStream(aCtx, &ctx);
  if (status != DS_ERR_OK) {
    return nullptr;
  }
@ -398,20 +396,18 @@ CreateStreamAndFeedAudioContent(ModelState* aCtx,
 char*
 DS_SpeechToText(ModelState* aCtx,
                const short* aBuffer,
-                unsigned int aBufferSize,
+                unsigned int aBufferSize)
                unsigned int aSampleRate)
 {
-  StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize, aSampleRate);
+  StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize);
  return DS_FinishStream(ctx);
 }
 Metadata*
 DS_SpeechToTextWithMetadata(ModelState* aCtx,
                            const short* aBuffer,
-                            unsigned int aBufferSize,
+                            unsigned int aBufferSize)
                            unsigned int aSampleRate)
 {
-  StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize, aSampleRate);
+  StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize);
  return DS_FinishStreamWithMetadata(ctx);
 }
--- a/native_client/deepspeech.h
+++ b/native_client/deepspeech.h
@ -124,9 +124,8 @@ int DS_EnableDecoderWithLM(ModelState* aCtx,
 *
 * @param aCtx The ModelState pointer for the model to use.
 * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
- *                sample rate.
+ *                sample rate (matching what the model was trained on).
 * @param aBufferSize The number of samples in the audio signal.
 * @param aSampleRate The sample-rate of the audio signal.
 *
 * @return The STT result. The user is responsible for freeing the string using
 *         {@link DS_FreeString()}. Returns NULL on error.
@ -134,8 +133,7 @@ int DS_EnableDecoderWithLM(ModelState* aCtx,
 DEEPSPEECH_EXPORT
 char* DS_SpeechToText(ModelState* aCtx,
                      const short* aBuffer,
-                      unsigned int aBufferSize,
+                      unsigned int aBufferSize);
                      unsigned int aSampleRate);
 /**
 * @brief Use the DeepSpeech model to perform Speech-To-Text and output metadata 
@ -143,9 +141,8 @@ char* DS_SpeechToText(ModelState* aCtx,
 *
 * @param aCtx The ModelState pointer for the model to use.
 * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
- *                sample rate.
+ *                sample rate (matching what the model was trained on).
 * @param aBufferSize The number of samples in the audio signal.
 * @param aSampleRate The sample-rate of the audio signal.
 *
 * @return Outputs a struct of individual letters along with their timing information. 
 *         The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
@ -153,8 +150,7 @@ char* DS_SpeechToText(ModelState* aCtx,
 DEEPSPEECH_EXPORT
 Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
                                      const short* aBuffer,
-                                      unsigned int aBufferSize,
+                                      unsigned int aBufferSize);
                                      unsigned int aSampleRate);
 /**
 * @brief Create a new streaming inference state. The streaming state returned
@ -162,7 +158,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
 *        and {@link DS_FinishStream()}.
 *
 * @param aCtx The ModelState pointer for the model to use.
 * @param aSampleRate The sample-rate of the audio signal.
 * @param[out] retval an opaque pointer that represents the streaming state. Can
 *                    be NULL if an error occurs.
 *
@ -170,7 +165,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
 */
 DEEPSPEECH_EXPORT
 int DS_CreateStream(ModelState* aCtx,
                    unsigned int aSampleRate,
                    StreamingState** retval);
 /**
@ -178,7 +172,7 @@ int DS_CreateStream(ModelState* aCtx,
 *
 * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
 * @param aBuffer An array of 16-bit, mono raw audio samples at the
- *                appropriate sample rate.
+ *                appropriate sample rate (matching what the model was trained on).
 * @param aBufferSize The number of samples in @p aBuffer.
 */
 DEEPSPEECH_EXPORT
--- a/native_client/deepspeech_compat.h
+++ b/native_client/deepspeech_compat.h
@ -71,17 +71,17 @@ int DS_EnableDecoderWithLM(ModelState* aCtx,
 *        and {@link DS_FinishStream()}.
 *
 * @param aCtx The ModelState pointer for the model to use.
- * @param aSampleRate The sample-rate of the audio signal.
+ * @param aSampleRate UNUSED, DEPRECATED.
 * @param[out] retval an opaque pointer that represents the streaming state. Can
 *                    be NULL if an error occurs.
 *
 * @return Zero for success, non-zero on failure.
 */
 int DS_SetupStream(ModelState* aCtx,
-                   unsigned int aSampleRate,
+                   unsigned int /*aSampleRate*/,
                   StreamingState** retval)
 {
-  return DS_CreateStream(aCtx, aSampleRate, retval);
+  return DS_CreateStream(aCtx, retval);
 }
 /**
@ -98,4 +98,45 @@ void DS_DiscardStream(StreamingState* aSctx)
  return DS_FreeStream(aSctx);
 }
 /**
 * @brief Use the DeepSpeech model to perform Speech-To-Text.
 *
 * @param aCtx The ModelState pointer for the model to use.
 * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
 *                sample rate (matching what the model was trained on).
 * @param aBufferSize The number of samples in the audio signal.
 * @param aSampleRate UNUSED, DEPRECATED.
 *
 * @return The STT result. The user is responsible for freeing the string using
 *         {@link DS_FreeString()}. Returns NULL on error.
 */
 char* DS_SpeechToText(ModelState* aCtx,
                      const short* aBuffer,
                      unsigned int aBufferSize,
                      unsigned int /*aSampleRate*/)
 {
  return DS_SpeechToText(aCtx, aBuffer, aBufferSize);
 }
 /**
 * @brief Use the DeepSpeech model to perform Speech-To-Text and output metadata
 * about the results.
 *
 * @param aCtx The ModelState pointer for the model to use.
 * @param aBuffer A 16-bit, mono raw audio signal at the appropriate
 *                sample rate (matching what the model was trained on).
 * @param aBufferSize The number of samples in the audio signal.
 * @param aSampleRate UNUSED, DEPRECATED.
 *
 * @return Outputs a struct of individual letters along with their timing information.
 *         The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
 */
 Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
                                      const short* aBuffer,
                                      unsigned int aBufferSize,
                                      unsigned int /*aSampleRate*/)
 {
  return DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize);
 }
 #endif /* DEEPSPEECH_COMPAT_H */
--- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
@ -148,7 +148,7 @@ namespace DeepSpeechClient
        /// <summary>
        /// Feeds audio samples to an ongoing streaming inference.
        /// </summary>
-        /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate.</param>
+        /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
        public unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize)
        {
            NativeImp.DS_FeedAudioContent(_streamingStatePP, aBuffer, aBufferSize);
@ -193,11 +193,10 @@ namespace DeepSpeechClient
        /// <summary>
        /// Creates a new streaming inference state.
        /// </summary>
        /// <param name="aSampleRate">The sample-rate of the audio signal</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
-        public unsafe void CreateStream(uint aSampleRate)
+        public unsafe void CreateStream()
        {
-            var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
+            var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref _streamingStatePP);
            EvaluateResultCode(resultCode);
        }
@ -230,25 +229,23 @@ namespace DeepSpeechClient
        /// <summary>
        /// Use the DeepSpeech model to perform Speech-To-Text.
        /// </summary>
-        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
+        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
        /// <param name="aBufferSize">The number of samples in the audio signal.</param>
        /// <param name="aSampleRate">The sample-rate of the audio signal.</param>
        /// <returns>The STT result. The user is responsible for freeing the string.  Returns NULL on error.</returns>
-        public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize, uint aSampleRate)
+        public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize)
        {
-            return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize, aSampleRate).PtrToString();
+            return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString();
        }
        /// <summary>
        /// Use the DeepSpeech model to perform Speech-To-Text.
        /// </summary>
-        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
+        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
        /// <param name="aBufferSize">The number of samples in the audio signal.</param>
        /// <param name="aSampleRate">The sample-rate of the audio signal.</param>
        /// <returns>The extended metadata. The user is responsible for freeing the struct.  Returns NULL on error.</returns>
-        public unsafe Models.Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aSampleRate)
+        public unsafe Models.Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize)
        {
-            return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aSampleRate).PtrToMetadata();
+            return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize).PtrToMetadata();
        }
        #endregion
--- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
@ -40,24 +40,20 @@ namespace DeepSpeechClient.Interfaces
        /// <summary>
        /// Use the DeepSpeech model to perform Speech-To-Text.
        /// </summary>
-        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
+        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
        /// <param name="aBufferSize">The number of samples in the audio signal.</param>
        /// <param name="aSampleRate">The sample-rate of the audio signal.</param>
        /// <returns>The STT result. The user is responsible for freeing the string.  Returns NULL on error.</returns>
        unsafe string SpeechToText(short[] aBuffer,
-                uint aBufferSize,
+                uint aBufferSize);
                uint aSampleRate);
        /// <summary>
        /// Use the DeepSpeech model to perform Speech-To-Text.
        /// </summary>
-        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
+        /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
        /// <param name="aBufferSize">The number of samples in the audio signal.</param>
        /// <param name="aSampleRate">The sample-rate of the audio signal.</param>
        /// <returns>The extended metadata result. The user is responsible for freeing the struct.  Returns NULL on error.</returns>
        unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer,
-                uint aBufferSize,
+                uint aBufferSize);
                uint aSampleRate);
        /// <summary>
        /// Destroy a streaming state without decoding the computed logits.
@ -79,14 +75,13 @@ namespace DeepSpeechClient.Interfaces
        /// <summary>
        /// Creates a new streaming inference state.
        /// </summary>
        /// <param name="aSampleRate">The sample-rate of the audio signal</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
-        unsafe void CreateStream(uint aSampleRate);
+        unsafe void CreateStream();
        /// <summary>
        /// Feeds audio samples to an ongoing streaming inference.
        /// </summary>
-        /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate.</param>
+        /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
        unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize);
        /// <summary>
--- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs
+++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
@ -31,21 +31,19 @@ namespace DeepSpeechClient
            CharSet = CharSet.Ansi, SetLastError = true)]
        internal static unsafe extern IntPtr DS_SpeechToText(IntPtr** aCtx,
                 short[] aBuffer,
-                uint aBufferSize,
+                uint aBufferSize);
                uint aSampleRate);
        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)]
        internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(IntPtr** aCtx,
                 short[] aBuffer,
-                uint aBufferSize,
+                uint aBufferSize);
                uint aSampleRate);
        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal static unsafe extern void DS_FreeModel(IntPtr** aCtx);
        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal static unsafe extern ErrorCodes DS_CreateStream(IntPtr** aCtx,
-               uint aSampleRate, ref IntPtr** retval);
+               ref IntPtr** retval);
        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal static unsafe extern void DS_FreeStream(ref IntPtr** aSctx);
--- a/native_client/dotnet/DeepSpeechConsole/Program.cs
+++ b/native_client/dotnet/DeepSpeechConsole/Program.cs
@ -91,12 +91,12 @@ namespace CSharpExamples
                        string speechResult;
                        if (extended)
                        {
-                            Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);
+                            Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                            speechResult = MetadataToString(metaResult);
                        }
                        else
                        {
-                            speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);
+                            speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
                        }
                        stopwatch.Stop();
--- a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
+++ b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java
@ -100,7 +100,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
            long inferenceStartTime = System.currentTimeMillis();
-            String decoded = this._m.stt(shorts, shorts.length, sampleRate);
+            String decoded = this._m.stt(shorts, shorts.length);
            inferenceExecTime = System.currentTimeMillis() - inferenceStartTime;
--- a/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java
+++ b/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java
@ -104,9 +104,9 @@ public class BasicTest {
            ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
            if (extendedMetadata) {
-                return metadataToString(m.sttWithMetadata(shorts, shorts.length, sampleRate));
+                return metadataToString(m.sttWithMetadata(shorts, shorts.length));
            } else {
-                return m.stt(shorts, shorts.length, sampleRate);
+                return m.stt(shorts, shorts.length);
            }
        } catch (FileNotFoundException ex) {
--- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
+++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
@ -57,14 +57,13 @@ public class DeepSpeechModel {
    * @brief Use the DeepSpeech model to perform Speech-To-Text.
    *
    * @param buffer A 16-bit, mono raw audio signal at the appropriate
-    *                sample rate.
+    *                sample rate (matching what the model was trained on).
    * @param buffer_size The number of samples in the audio signal.
    * @param sample_rate The sample-rate of the audio signal.
    *
    * @return The STT result.
    */
-    public String stt(short[] buffer, int buffer_size, int sample_rate) {
+    public String stt(short[] buffer, int buffer_size) {
-        return impl.SpeechToText(this._msp, buffer, buffer_size, sample_rate);
+        return impl.SpeechToText(this._msp, buffer, buffer_size);
    }
   /**
@ -72,14 +71,13 @@ public class DeepSpeechModel {
    * about the results.
    *
    * @param buffer A 16-bit, mono raw audio signal at the appropriate
-    *                sample rate.
+    *                sample rate (matching what the model was trained on).
    * @param buffer_size The number of samples in the audio signal.
    * @param sample_rate The sample-rate of the audio signal.
    *
    * @return Outputs a Metadata object of individual letters along with their timing information.
    */
-    public Metadata sttWithMetadata(short[] buffer, int buffer_size, int sample_rate) {
+    public Metadata sttWithMetadata(short[] buffer, int buffer_size) {
-        return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
+        return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size);
    }
   /**
@ -87,12 +85,11 @@ public class DeepSpeechModel {
    *        by this function can then be passed to feedAudioContent()
    *        and finishStream().
    *
    * @param sample_rate The sample-rate of the audio signal.
    * @return An opaque object that represents the streaming state.
    */
-    public DeepSpeechStreamingState createStream(int sample_rate) {
+    public DeepSpeechStreamingState createStream() {
        SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
-        impl.CreateStream(this._msp, sample_rate, ssp);
+        impl.CreateStream(this._msp, ssp);
        return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
    }
@ -101,7 +98,7 @@ public class DeepSpeechModel {
    *
    * @param cctx A streaming state pointer returned by createStream().
    * @param buffer An array of 16-bit, mono raw audio samples at the
-    *                appropriate sample rate.
+    *                appropriate sample rate (matching what the model was trained on).
    * @param buffer_size The number of samples in @p buffer.
    */
    public void feedAudioContent(DeepSpeechStreamingState ctx, short[] buffer, int buffer_size) {
--- a/native_client/javascript/client.js
+++ b/native_client/javascript/client.js
@ -118,9 +118,9 @@ audioStream.on('finish', () => {
  // We take half of the buffer_size because buffer is a char* while
  // LocalDsSTT() expected a short*
  if (args['extended']) {
-    console.log(metadataToString(model.sttWithMetadata(audioBuffer.slice(0, audioBuffer.length / 2), 16000)));
+    console.log(metadataToString(model.sttWithMetadata(audioBuffer.slice(0, audioBuffer.length / 2))));
  } else {
-    console.log(model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000));
+    console.log(model.stt(audioBuffer.slice(0, audioBuffer.length / 2)));
  }
  const inference_stop = process.hrtime(inference_start);
  console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4));
--- a/native_client/javascript/index.js
+++ b/native_client/javascript/index.js
@ -64,9 +64,8 @@ Model.prototype.enableDecoderWithLM = function() {
 /**
 * Use the DeepSpeech model to perform Speech-To-Text.
 *
- * @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate.
+ * @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
 * @param {number} aBufferSize The number of samples in the audio signal.
 * @param {number} aSampleRate The sample-rate of the audio signal.
 *
 * @return {string} The STT result. Returns undefined on error.
 */
@ -79,9 +78,8 @@ Model.prototype.stt = function() {
 * Use the DeepSpeech model to perform Speech-To-Text and output metadata
 * about the results.
 *
- * @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate.
+ * @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
 * @param {number} aBufferSize The number of samples in the audio signal.
 * @param {number} aSampleRate The sample-rate of the audio signal.
 *
 * @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
 */
@ -93,7 +91,6 @@ Model.prototype.sttWithMetadata = function() {
 /**
 * Create a new streaming inference state. The streaming state returned by this function can then be passed to :js:func:`Model.feedAudioContent` and :js:func:`Model.finishStream`.
 *
 * @param {number} aSampleRate The sample-rate of the audio signal.
 * @return {object} an opaque object that represents the streaming state.
 *
 * @throws on error
@ -114,7 +111,7 @@ Model.prototype.createStream = function() {
 *
 * @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`.
 * @param {buffer} aBuffer An array of 16-bit, mono raw audio samples at the
- *                 appropriate sample rate.
+ *                 appropriate sample rate (matching what the model was trained on).
 * @param {number} aBufferSize The number of samples in @param aBuffer.
 */
 Model.prototype.feedAudioContent = function() {
--- a/native_client/python/init.py
+++ b/native_client/python/init.py
@ -69,15 +69,12 @@ class Model(object):
        """
        Use the DeepSpeech model to perform Speech-To-Text.
-        :param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate.
+        :param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
        :type aBuffer: int array
        :param aBufferSize: The number of samples in the audio signal.
        :type aBufferSize: int
        :param aSampleRate: The sample-rate of the audio signal.
        :type aSampleRate: int
        :return: The STT result.
        :type: str
        """
@ -87,34 +84,27 @@ class Model(object):
        """
        Use the DeepSpeech model to perform Speech-To-Text and output metadata about the results.
-        :param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate.
+        :param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
        :type aBuffer: int array
        :param aBufferSize: The number of samples in the audio signal.
        :type aBufferSize: int
        :param aSampleRate: The sample-rate of the audio signal.
        :type aSampleRate: int
        :return: Outputs a struct of individual letters along with their timing information.
        :type: :func:`Metadata`
        """
        return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
-    def createStream(self, sample_rate=16000):
+    def createStream(self):
        """
        Create a new streaming inference state. The streaming state returned
        by this function can then be passed to :func:`feedAudioContent()` and :func:`finishStream()`.
        :param aSampleRate: The sample-rate of the audio signal.
        :type aSampleRate: int
        :return: Object holding the stream
        :throws: RuntimeError on error
        """
-        status, ctx = deepspeech.impl.CreateStream(self._impl,
+        status, ctx = deepspeech.impl.CreateStream(self._impl)
                                                   aSampleRate=sample_rate)
        if status != 0:
            raise RuntimeError("CreateStream failed with error code {}".format(status))
        return ctx
@ -127,7 +117,7 @@ class Model(object):
        :param aSctx: A streaming state pointer returned by :func:`createStream()`.
        :type aSctx: object
-        :param aBuffer: An array of 16-bit, mono raw audio samples at the appropriate sample rate.
+        :param aBuffer: An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).
        :type aBuffer: int array
        :param aBufferSize: The number of samples in @p aBuffer.
--- a/native_client/python/client.py
+++ b/native_client/python/client.py
@ -102,9 +102,9 @@ def main():
    print('Running inference.', file=sys.stderr)
    inference_start = timer()
    if args.extended:
-        print(metadata_to_string(ds.sttWithMetadata(audio, fs)))
+        print(metadata_to_string(ds.sttWithMetadata(audio)))
    else:
-        print(ds.stt(audio, fs))
+        print(ds.stt(audio))
    inference_end = timer() - inference_start
    print('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length), file=sys.stderr)
--- a/native_client/test/concurrent_streams.py
+++ b/native_client/test/concurrent_streams.py
@ -52,8 +52,8 @@ def main():
    audio2 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
    fin.close()
-    stream1 = ds.createStream(sample_rate=fs1)
+    stream1 = ds.createStream()
-    stream2 = ds.createStream(sample_rate=fs2)
+    stream2 = ds.createStream()
    splits1 = np.array_split(audio1, 10)
    splits2 = np.array_split(audio2, 10)