Merge pull request #2308 from lissyx/remove-prealloc
Remove ununsed prealloc frames
This commit is contained in:
commit
50e2a99316
@ -68,9 +68,6 @@ if (args['lm'] && args['trie']) {
|
||||
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
||||
}
|
||||
|
||||
// Default initial allocation = 3 seconds := 150
|
||||
const PRE_ALLOC_FRAMES = 150;
|
||||
|
||||
// Default is 16kHz
|
||||
const AUDIO_SAMPLE_RATE = 16000;
|
||||
|
||||
@ -109,7 +106,7 @@ const ffmpeg = spawn('ffmpeg', [
|
||||
]);
|
||||
|
||||
let audioLength = 0;
|
||||
let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
|
||||
let sctx = model.setupStream(AUDIO_SAMPLE_RATE);
|
||||
|
||||
function finishStream() {
|
||||
const model_load_start = process.hrtime();
|
||||
@ -122,7 +119,7 @@ function finishStream() {
|
||||
|
||||
function intermediateDecode() {
|
||||
finishStream();
|
||||
sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
|
||||
sctx = model.setupStream(AUDIO_SAMPLE_RATE);
|
||||
}
|
||||
|
||||
function feedAudioContent(chunk) {
|
||||
|
@ -252,7 +252,7 @@ namespace DeepSpeechWPF
|
||||
|
||||
private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
|
||||
{
|
||||
_sttClient.SetupStream(0, 16000);
|
||||
_sttClient.SetupStream(16000);
|
||||
_audioCapture.Start();
|
||||
btnStartRecording.IsEnabled = false;
|
||||
btnStopRecording.IsEnabled = true;
|
||||
|
@ -72,7 +72,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
||||
DS_FreeMetadata(metadata);
|
||||
} else if (stream_size > 0) {
|
||||
StreamingState* ctx;
|
||||
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
|
||||
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
|
||||
if (status != DS_ERR_OK) {
|
||||
res.string = strdup("");
|
||||
return res;
|
||||
|
@ -329,7 +329,6 @@ DS_EnableDecoderWithLM(ModelState* aCtx,
|
||||
|
||||
int
|
||||
DS_SetupStream(ModelState* aCtx,
|
||||
unsigned int aPreAllocFrames,
|
||||
unsigned int aSampleRate,
|
||||
StreamingState** retval)
|
||||
{
|
||||
@ -343,11 +342,6 @@ DS_SetupStream(ModelState* aCtx,
|
||||
|
||||
const size_t num_classes = aCtx->alphabet_->GetSize() + 1; // +1 for blank
|
||||
|
||||
// Default initial allocation = 3 seconds.
|
||||
if (aPreAllocFrames == 0) {
|
||||
aPreAllocFrames = 150;
|
||||
}
|
||||
|
||||
ctx->audio_buffer_.reserve(aCtx->audio_win_len_);
|
||||
ctx->mfcc_buffer_.reserve(aCtx->mfcc_feats_per_timestep_);
|
||||
ctx->mfcc_buffer_.resize(aCtx->n_features_*aCtx->n_context_, 0.f);
|
||||
@ -399,7 +393,7 @@ SetupStreamAndFeedAudioContent(ModelState* aCtx,
|
||||
unsigned int aSampleRate)
|
||||
{
|
||||
StreamingState* ctx;
|
||||
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
|
||||
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
|
||||
if (status != DS_ERR_OK) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -151,9 +151,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
* and {@link DS_FinishStream()}.
|
||||
*
|
||||
* @param aCtx The ModelState pointer for the model to use.
|
||||
* @param aPreAllocFrames Number of timestep frames to reserve. One timestep
|
||||
* is equivalent to two window lengths (20ms). If set to
|
||||
* 0 we reserve enough frames for 3 seconds of audio (150).
|
||||
* @param aSampleRate The sample-rate of the audio signal.
|
||||
* @param[out] retval an opaque pointer that represents the streaming state. Can
|
||||
* be NULL if an error occurs.
|
||||
@ -162,7 +159,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
*/
|
||||
DEEPSPEECH_EXPORT
|
||||
int DS_SetupStream(ModelState* aCtx,
|
||||
unsigned int aPreAllocFrames,
|
||||
unsigned int aSampleRate,
|
||||
StreamingState** retval);
|
||||
|
||||
|
@ -204,14 +204,11 @@ namespace DeepSpeechClient
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
|
||||
/// One timestep is equivalent to two window lengths(20ms).
|
||||
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
|
||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||
public unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate)
|
||||
public unsafe void SetupStream(uint aSampleRate)
|
||||
{
|
||||
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP);
|
||||
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
|
@ -84,12 +84,9 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
|
||||
/// One timestep is equivalent to two window lengths(20ms).
|
||||
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
|
||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||
unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate);
|
||||
unsafe void SetupStream(uint aSampleRate);
|
||||
|
||||
/// <summary>
|
||||
/// Feeds audio samples to an ongoing streaming inference.
|
||||
|
@ -49,7 +49,6 @@ namespace DeepSpeechClient
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
|
||||
uint aPreAllocFrames,
|
||||
uint aSampleRate, ref StreamingState** retval);
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
|
@ -33,9 +33,9 @@ public class DeepSpeechModel {
|
||||
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
|
||||
}
|
||||
|
||||
public DeepSpeechStreamingState setupStream(int prealloc_frames, int sample_rate) {
|
||||
public DeepSpeechStreamingState setupStream(int sample_rate) {
|
||||
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
||||
impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp);
|
||||
impl.SetupStream(this._msp, sample_rate, ssp);
|
||||
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
||||
}
|
||||
|
||||
|
@ -37,9 +37,8 @@ class Model(object):
|
||||
def sttWithMetadata(self, *args, **kwargs):
|
||||
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
|
||||
|
||||
def setupStream(self, pre_alloc_frames=150, sample_rate=16000):
|
||||
def setupStream(self, sample_rate=16000):
|
||||
status, ctx = deepspeech.impl.SetupStream(self._impl,
|
||||
aPreAllocFrames=pre_alloc_frames,
|
||||
aSampleRate=sample_rate)
|
||||
if status != 0:
|
||||
raise RuntimeError("SetupStream failed with error code {}".format(status))
|
||||
|
Loading…
Reference in New Issue
Block a user