From 81b3b159c457de07ad8e3b82f894654678ec5650 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 20 Aug 2019 16:54:14 +0200 Subject: [PATCH] Remove ununsed prealloc frames Fixes #2298 --- examples/ffmpeg_vad_streaming/index.js | 7 ++----- examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs | 2 +- native_client/client.cc | 2 +- native_client/deepspeech.cc | 8 +------- native_client/deepspeech.h | 4 ---- native_client/dotnet/DeepSpeechClient/DeepSpeech.cs | 7 ++----- .../dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs | 5 +---- native_client/dotnet/DeepSpeechClient/NativeImp.cs | 1 - .../mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java | 4 ++-- native_client/python/__init__.py | 3 +-- 10 files changed, 11 insertions(+), 32 deletions(-) diff --git a/examples/ffmpeg_vad_streaming/index.js b/examples/ffmpeg_vad_streaming/index.js index cffc8e16..a2b61ea3 100644 --- a/examples/ffmpeg_vad_streaming/index.js +++ b/examples/ffmpeg_vad_streaming/index.js @@ -68,9 +68,6 @@ if (args['lm'] && args['trie']) { console.error('Loaded language model in %ds.', totalTime(lm_load_end)); } -// Default initial allocation = 3 seconds := 150 -const PRE_ALLOC_FRAMES = 150; - // Default is 16kHz const AUDIO_SAMPLE_RATE = 16000; @@ -109,7 +106,7 @@ const ffmpeg = spawn('ffmpeg', [ ]); let audioLength = 0; -let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE); +let sctx = model.setupStream(AUDIO_SAMPLE_RATE); function finishStream() { const model_load_start = process.hrtime(); @@ -122,7 +119,7 @@ function finishStream() { function intermediateDecode() { finishStream(); - sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE); + sctx = model.setupStream(AUDIO_SAMPLE_RATE); } function feedAudioContent(chunk) { diff --git a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs index e855f181..2ad16f45 100644 --- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs +++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs @@ -252,7 +252,7 @@ namespace DeepSpeechWPF private void BtnStartRecording_Click(object sender, RoutedEventArgs e) { - _sttClient.SetupStream(0, 16000); + _sttClient.SetupStream(16000); _audioCapture.Start(); btnStartRecording.IsEnabled = false; btnStopRecording.IsEnabled = true; diff --git a/native_client/client.cc b/native_client/client.cc index f1148ebc..1eea8e49 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -72,7 +72,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, DS_FreeMetadata(metadata); } else if (stream_size > 0) { StreamingState* ctx; - int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx); + int status = DS_SetupStream(aCtx, aSampleRate, &ctx); if (status != DS_ERR_OK) { res.string = strdup(""); return res; diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 05caf467..1578ed6d 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -329,7 +329,6 @@ DS_EnableDecoderWithLM(ModelState* aCtx, int DS_SetupStream(ModelState* aCtx, - unsigned int aPreAllocFrames, unsigned int aSampleRate, StreamingState** retval) { @@ -343,11 +342,6 @@ DS_SetupStream(ModelState* aCtx, const size_t num_classes = aCtx->alphabet_->GetSize() + 1; // +1 for blank - // Default initial allocation = 3 seconds. - if (aPreAllocFrames == 0) { - aPreAllocFrames = 150; - } - ctx->audio_buffer_.reserve(aCtx->audio_win_len_); ctx->mfcc_buffer_.reserve(aCtx->mfcc_feats_per_timestep_); ctx->mfcc_buffer_.resize(aCtx->n_features_*aCtx->n_context_, 0.f); @@ -399,7 +393,7 @@ SetupStreamAndFeedAudioContent(ModelState* aCtx, unsigned int aSampleRate) { StreamingState* ctx; - int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx); + int status = DS_SetupStream(aCtx, aSampleRate, &ctx); if (status != DS_ERR_OK) { return nullptr; } diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index b40da606..5f9f10b5 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -151,9 +151,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, * and {@link DS_FinishStream()}. * * @param aCtx The ModelState pointer for the model to use. - * @param aPreAllocFrames Number of timestep frames to reserve. One timestep - * is equivalent to two window lengths (20ms). If set to - * 0 we reserve enough frames for 3 seconds of audio (150). * @param aSampleRate The sample-rate of the audio signal. * @param[out] retval an opaque pointer that represents the streaming state. Can * be NULL if an error occurs. @@ -162,7 +159,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, */ DEEPSPEECH_EXPORT int DS_SetupStream(ModelState* aCtx, - unsigned int aPreAllocFrames, unsigned int aSampleRate, StreamingState** retval); diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs index 5271e2e7..d0b2a7d3 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs @@ -204,14 +204,11 @@ namespace DeepSpeechClient /// /// Creates a new streaming inference state. /// - /// Number of timestep frames to reserve. - /// One timestep is equivalent to two window lengths(20ms). - /// If set to 0 we reserve enough frames for 3 seconds of audio(150). /// The sample-rate of the audio signal /// Thrown when the native binary failed to initialize the streaming mode. - public unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate) + public unsafe void SetupStream(uint aSampleRate) { - var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP); + var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP); EvaluateResultCode(resultCode); } diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs index 3d27a56c..3c00b996 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs @@ -84,12 +84,9 @@ namespace DeepSpeechClient.Interfaces /// /// Creates a new streaming inference state. /// - /// Number of timestep frames to reserve. - /// One timestep is equivalent to two window lengths(20ms). - /// If set to 0 we reserve enough frames for 3 seconds of audio(150). /// The sample-rate of the audio signal /// Thrown when the native binary failed to initialize the streaming mode. - unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate); + unsafe void SetupStream(uint aSampleRate); /// /// Feeds audio samples to an ongoing streaming inference. diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs index ec7d527b..cde7beac 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs @@ -49,7 +49,6 @@ namespace DeepSpeechClient [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx, - uint aPreAllocFrames, uint aSampleRate, ref StreamingState** retval); [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index 06f9e717..bdd995c5 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -33,9 +33,9 @@ public class DeepSpeechModel { return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate); } - public DeepSpeechStreamingState setupStream(int prealloc_frames, int sample_rate) { + public DeepSpeechStreamingState setupStream(int sample_rate) { SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep(); - impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp); + impl.SetupStream(this._msp, sample_rate, ssp); return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp)); } diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index f4923f84..287903d2 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -37,9 +37,8 @@ class Model(object): def sttWithMetadata(self, *args, **kwargs): return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs) - def setupStream(self, pre_alloc_frames=150, sample_rate=16000): + def setupStream(self, sample_rate=16000): status, ctx = deepspeech.impl.SetupStream(self._impl, - aPreAllocFrames=pre_alloc_frames, aSampleRate=sample_rate) if status != 0: raise RuntimeError("SetupStream failed with error code {}".format(status))