Merge pull request #2308 from lissyx/remove-prealloc

Remove ununsed prealloc frames
This commit is contained in:
lissyx 2019-08-21 14:53:47 +02:00 committed by GitHub
commit 50e2a99316
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 11 additions and 32 deletions

View File

@ -68,9 +68,6 @@ if (args['lm'] && args['trie']) {
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
}
// Default initial allocation = 3 seconds := 150
const PRE_ALLOC_FRAMES = 150;
// Default is 16kHz
const AUDIO_SAMPLE_RATE = 16000;
@ -109,7 +106,7 @@ const ffmpeg = spawn('ffmpeg', [
]);
let audioLength = 0;
let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
let sctx = model.setupStream(AUDIO_SAMPLE_RATE);
function finishStream() {
const model_load_start = process.hrtime();
@ -122,7 +119,7 @@ function finishStream() {
function intermediateDecode() {
finishStream();
sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
sctx = model.setupStream(AUDIO_SAMPLE_RATE);
}
function feedAudioContent(chunk) {

View File

@ -252,7 +252,7 @@ namespace DeepSpeechWPF
private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
{
_sttClient.SetupStream(0, 16000);
_sttClient.SetupStream(16000);
_audioCapture.Start();
btnStartRecording.IsEnabled = false;
btnStopRecording.IsEnabled = true;

View File

@ -72,7 +72,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
DS_FreeMetadata(metadata);
} else if (stream_size > 0) {
StreamingState* ctx;
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
if (status != DS_ERR_OK) {
res.string = strdup("");
return res;

View File

@ -329,7 +329,6 @@ DS_EnableDecoderWithLM(ModelState* aCtx,
int
DS_SetupStream(ModelState* aCtx,
unsigned int aPreAllocFrames,
unsigned int aSampleRate,
StreamingState** retval)
{
@ -343,11 +342,6 @@ DS_SetupStream(ModelState* aCtx,
const size_t num_classes = aCtx->alphabet_->GetSize() + 1; // +1 for blank
// Default initial allocation = 3 seconds.
if (aPreAllocFrames == 0) {
aPreAllocFrames = 150;
}
ctx->audio_buffer_.reserve(aCtx->audio_win_len_);
ctx->mfcc_buffer_.reserve(aCtx->mfcc_feats_per_timestep_);
ctx->mfcc_buffer_.resize(aCtx->n_features_*aCtx->n_context_, 0.f);
@ -399,7 +393,7 @@ SetupStreamAndFeedAudioContent(ModelState* aCtx,
unsigned int aSampleRate)
{
StreamingState* ctx;
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
if (status != DS_ERR_OK) {
return nullptr;
}

View File

@ -151,9 +151,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
* and {@link DS_FinishStream()}.
*
* @param aCtx The ModelState pointer for the model to use.
* @param aPreAllocFrames Number of timestep frames to reserve. One timestep
* is equivalent to two window lengths (20ms). If set to
* 0 we reserve enough frames for 3 seconds of audio (150).
* @param aSampleRate The sample-rate of the audio signal.
* @param[out] retval an opaque pointer that represents the streaming state. Can
* be NULL if an error occurs.
@ -162,7 +159,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
*/
DEEPSPEECH_EXPORT
int DS_SetupStream(ModelState* aCtx,
unsigned int aPreAllocFrames,
unsigned int aSampleRate,
StreamingState** retval);

View File

@ -204,14 +204,11 @@ namespace DeepSpeechClient
/// <summary>
/// Creates a new streaming inference state.
/// </summary>
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
/// One timestep is equivalent to two window lengths(20ms).
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
public unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate)
public unsafe void SetupStream(uint aSampleRate)
{
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP);
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
EvaluateResultCode(resultCode);
}

View File

@ -84,12 +84,9 @@ namespace DeepSpeechClient.Interfaces
/// <summary>
/// Creates a new streaming inference state.
/// </summary>
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
/// One timestep is equivalent to two window lengths(20ms).
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate);
unsafe void SetupStream(uint aSampleRate);
/// <summary>
/// Feeds audio samples to an ongoing streaming inference.

View File

@ -49,7 +49,6 @@ namespace DeepSpeechClient
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
uint aPreAllocFrames,
uint aSampleRate, ref StreamingState** retval);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]

View File

@ -33,9 +33,9 @@ public class DeepSpeechModel {
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
}
public DeepSpeechStreamingState setupStream(int prealloc_frames, int sample_rate) {
public DeepSpeechStreamingState setupStream(int sample_rate) {
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp);
impl.SetupStream(this._msp, sample_rate, ssp);
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
}

View File

@ -37,9 +37,8 @@ class Model(object):
def sttWithMetadata(self, *args, **kwargs):
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
def setupStream(self, pre_alloc_frames=150, sample_rate=16000):
def setupStream(self, sample_rate=16000):
status, ctx = deepspeech.impl.SetupStream(self._impl,
aPreAllocFrames=pre_alloc_frames,
aSampleRate=sample_rate)
if status != 0:
raise RuntimeError("SetupStream failed with error code {}".format(status))