Merge pull request #2308 from lissyx/remove-prealloc

Remove ununsed prealloc frames
2019-08-21 14:53:47 +02:00 · 2019-08-21 14:53:47 +02:00 · 50e2a99316
commit 50e2a99316
parent 0e47048f9c 81b3b159c4
10 changed files with 11 additions and 32 deletions
--- a/examples/ffmpeg_vad_streaming/index.js
+++ b/examples/ffmpeg_vad_streaming/index.js
@ -68,9 +68,6 @@ if (args['lm'] && args['trie']) {
 	console.error('Loaded language model in %ds.', totalTime(lm_load_end));
 }

-// Default initial allocation = 3 seconds := 150
-const PRE_ALLOC_FRAMES = 150;
-
 // Default is 16kHz
 const AUDIO_SAMPLE_RATE = 16000;

@ -109,7 +106,7 @@ const ffmpeg = spawn('ffmpeg', [
 ]);

 let audioLength = 0;
-let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
+let sctx = model.setupStream(AUDIO_SAMPLE_RATE);

 function finishStream() {
 	const model_load_start = process.hrtime();
@ -122,7 +119,7 @@ function finishStream() {

 function intermediateDecode() {
 	finishStream();
-	sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
+	sctx = model.setupStream(AUDIO_SAMPLE_RATE);
 }

 function feedAudioContent(chunk) {
--- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
+++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
@ -252,7 +252,7 @@ namespace DeepSpeechWPF

        private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
        {
-            _sttClient.SetupStream(0, 16000);
+            _sttClient.SetupStream(16000);
            _audioCapture.Start();
            btnStartRecording.IsEnabled = false;
            btnStopRecording.IsEnabled = true;
--- a/native_client/client.cc
+++ b/native_client/client.cc
@ -72,7 +72,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
    DS_FreeMetadata(metadata);
  } else if (stream_size > 0) {
    StreamingState* ctx;
-    int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
+    int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
    if (status != DS_ERR_OK) {
      res.string = strdup("");
      return res;
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -329,7 +329,6 @@ DS_EnableDecoderWithLM(ModelState* aCtx,

 int
 DS_SetupStream(ModelState* aCtx,
-               unsigned int aPreAllocFrames,
               unsigned int aSampleRate,
               StreamingState** retval)
 {
@ -343,11 +342,6 @@ DS_SetupStream(ModelState* aCtx,

  const size_t num_classes = aCtx->alphabet_->GetSize() + 1; // +1 for blank

-  // Default initial allocation = 3 seconds.
-  if (aPreAllocFrames == 0) {
-    aPreAllocFrames = 150;
-  }
-
  ctx->audio_buffer_.reserve(aCtx->audio_win_len_);
  ctx->mfcc_buffer_.reserve(aCtx->mfcc_feats_per_timestep_);
  ctx->mfcc_buffer_.resize(aCtx->n_features_*aCtx->n_context_, 0.f);
@ -399,7 +393,7 @@ SetupStreamAndFeedAudioContent(ModelState* aCtx,
                               unsigned int aSampleRate)
 {
  StreamingState* ctx;
-  int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
+  int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
  if (status != DS_ERR_OK) {
    return nullptr;
  }
--- a/native_client/deepspeech.h
+++ b/native_client/deepspeech.h
@ -151,9 +151,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
 *        and {@link DS_FinishStream()}.
 *
 * @param aCtx The ModelState pointer for the model to use.
- * @param aPreAllocFrames Number of timestep frames to reserve. One timestep
- *                        is equivalent to two window lengths (20ms). If set to 
- *                        0 we reserve enough frames for 3 seconds of audio (150).
 * @param aSampleRate The sample-rate of the audio signal.
 * @param[out] retval an opaque pointer that represents the streaming state. Can
 *                    be NULL if an error occurs.
@ -162,7 +159,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
 */
 DEEPSPEECH_EXPORT
 int DS_SetupStream(ModelState* aCtx,
-                   unsigned int aPreAllocFrames,
                   unsigned int aSampleRate,
                   StreamingState** retval);

--- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
@ -204,14 +204,11 @@ namespace DeepSpeechClient
        /// <summary>
        /// Creates a new streaming inference state.
        /// </summary>
-        /// <param name="aPreAllocFrames">Number of timestep frames to reserve.
-        /// One timestep is equivalent to two window lengths(20ms).
-        /// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
        /// <param name="aSampleRate">The sample-rate of the audio signal</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
-        public unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate)
+        public unsafe void SetupStream(uint aSampleRate)
        {
-            var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP);
+            var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
            EvaluateResultCode(resultCode);
        }

--- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
@ -84,12 +84,9 @@ namespace DeepSpeechClient.Interfaces
        /// <summary>
        /// Creates a new streaming inference state.
        /// </summary>
-        /// <param name="aPreAllocFrames">Number of timestep frames to reserve.
-        /// One timestep is equivalent to two window lengths(20ms).
-        /// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
        /// <param name="aSampleRate">The sample-rate of the audio signal</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
-        unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate);
+        unsafe void SetupStream(uint aSampleRate);

        /// <summary>
        /// Feeds audio samples to an ongoing streaming inference.
--- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs
+++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
@ -49,7 +49,6 @@ namespace DeepSpeechClient

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
-               uint aPreAllocFrames,
               uint aSampleRate, ref StreamingState** retval);

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
--- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
+++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
@ -33,9 +33,9 @@ public class DeepSpeechModel {
        return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
    }

-    public DeepSpeechStreamingState setupStream(int prealloc_frames, int sample_rate) {
+    public DeepSpeechStreamingState setupStream(int sample_rate) {
        SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
-        impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp);
+        impl.SetupStream(this._msp, sample_rate, ssp);
        return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
    }

--- a/native_client/python/init.py
+++ b/native_client/python/init.py
@ -37,9 +37,8 @@ class Model(object):
    def sttWithMetadata(self, *args, **kwargs):
        return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)

-    def setupStream(self, pre_alloc_frames=150, sample_rate=16000):
+    def setupStream(self, sample_rate=16000):
        status, ctx = deepspeech.impl.SetupStream(self._impl,
-                                                  aPreAllocFrames=pre_alloc_frames,
                                                  aSampleRate=sample_rate)
        if status != 0:
            raise RuntimeError("SetupStream failed with error code {}".format(status))