Merge pull request #2308 from lissyx/remove-prealloc
Remove ununsed prealloc frames
This commit is contained in:
commit
50e2a99316
@ -68,9 +68,6 @@ if (args['lm'] && args['trie']) {
|
|||||||
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default initial allocation = 3 seconds := 150
|
|
||||||
const PRE_ALLOC_FRAMES = 150;
|
|
||||||
|
|
||||||
// Default is 16kHz
|
// Default is 16kHz
|
||||||
const AUDIO_SAMPLE_RATE = 16000;
|
const AUDIO_SAMPLE_RATE = 16000;
|
||||||
|
|
||||||
@ -109,7 +106,7 @@ const ffmpeg = spawn('ffmpeg', [
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
let audioLength = 0;
|
let audioLength = 0;
|
||||||
let sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
|
let sctx = model.setupStream(AUDIO_SAMPLE_RATE);
|
||||||
|
|
||||||
function finishStream() {
|
function finishStream() {
|
||||||
const model_load_start = process.hrtime();
|
const model_load_start = process.hrtime();
|
||||||
@ -122,7 +119,7 @@ function finishStream() {
|
|||||||
|
|
||||||
function intermediateDecode() {
|
function intermediateDecode() {
|
||||||
finishStream();
|
finishStream();
|
||||||
sctx = model.setupStream(PRE_ALLOC_FRAMES, AUDIO_SAMPLE_RATE);
|
sctx = model.setupStream(AUDIO_SAMPLE_RATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
function feedAudioContent(chunk) {
|
function feedAudioContent(chunk) {
|
||||||
|
@ -252,7 +252,7 @@ namespace DeepSpeechWPF
|
|||||||
|
|
||||||
private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
|
private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
|
||||||
{
|
{
|
||||||
_sttClient.SetupStream(0, 16000);
|
_sttClient.SetupStream(16000);
|
||||||
_audioCapture.Start();
|
_audioCapture.Start();
|
||||||
btnStartRecording.IsEnabled = false;
|
btnStartRecording.IsEnabled = false;
|
||||||
btnStopRecording.IsEnabled = true;
|
btnStopRecording.IsEnabled = true;
|
||||||
|
@ -72,7 +72,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||||||
DS_FreeMetadata(metadata);
|
DS_FreeMetadata(metadata);
|
||||||
} else if (stream_size > 0) {
|
} else if (stream_size > 0) {
|
||||||
StreamingState* ctx;
|
StreamingState* ctx;
|
||||||
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
|
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
|
||||||
if (status != DS_ERR_OK) {
|
if (status != DS_ERR_OK) {
|
||||||
res.string = strdup("");
|
res.string = strdup("");
|
||||||
return res;
|
return res;
|
||||||
|
@ -329,7 +329,6 @@ DS_EnableDecoderWithLM(ModelState* aCtx,
|
|||||||
|
|
||||||
int
|
int
|
||||||
DS_SetupStream(ModelState* aCtx,
|
DS_SetupStream(ModelState* aCtx,
|
||||||
unsigned int aPreAllocFrames,
|
|
||||||
unsigned int aSampleRate,
|
unsigned int aSampleRate,
|
||||||
StreamingState** retval)
|
StreamingState** retval)
|
||||||
{
|
{
|
||||||
@ -343,11 +342,6 @@ DS_SetupStream(ModelState* aCtx,
|
|||||||
|
|
||||||
const size_t num_classes = aCtx->alphabet_->GetSize() + 1; // +1 for blank
|
const size_t num_classes = aCtx->alphabet_->GetSize() + 1; // +1 for blank
|
||||||
|
|
||||||
// Default initial allocation = 3 seconds.
|
|
||||||
if (aPreAllocFrames == 0) {
|
|
||||||
aPreAllocFrames = 150;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx->audio_buffer_.reserve(aCtx->audio_win_len_);
|
ctx->audio_buffer_.reserve(aCtx->audio_win_len_);
|
||||||
ctx->mfcc_buffer_.reserve(aCtx->mfcc_feats_per_timestep_);
|
ctx->mfcc_buffer_.reserve(aCtx->mfcc_feats_per_timestep_);
|
||||||
ctx->mfcc_buffer_.resize(aCtx->n_features_*aCtx->n_context_, 0.f);
|
ctx->mfcc_buffer_.resize(aCtx->n_features_*aCtx->n_context_, 0.f);
|
||||||
@ -399,7 +393,7 @@ SetupStreamAndFeedAudioContent(ModelState* aCtx,
|
|||||||
unsigned int aSampleRate)
|
unsigned int aSampleRate)
|
||||||
{
|
{
|
||||||
StreamingState* ctx;
|
StreamingState* ctx;
|
||||||
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
|
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
|
||||||
if (status != DS_ERR_OK) {
|
if (status != DS_ERR_OK) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -151,9 +151,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
|||||||
* and {@link DS_FinishStream()}.
|
* and {@link DS_FinishStream()}.
|
||||||
*
|
*
|
||||||
* @param aCtx The ModelState pointer for the model to use.
|
* @param aCtx The ModelState pointer for the model to use.
|
||||||
* @param aPreAllocFrames Number of timestep frames to reserve. One timestep
|
|
||||||
* is equivalent to two window lengths (20ms). If set to
|
|
||||||
* 0 we reserve enough frames for 3 seconds of audio (150).
|
|
||||||
* @param aSampleRate The sample-rate of the audio signal.
|
* @param aSampleRate The sample-rate of the audio signal.
|
||||||
* @param[out] retval an opaque pointer that represents the streaming state. Can
|
* @param[out] retval an opaque pointer that represents the streaming state. Can
|
||||||
* be NULL if an error occurs.
|
* be NULL if an error occurs.
|
||||||
@ -162,7 +159,6 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
|||||||
*/
|
*/
|
||||||
DEEPSPEECH_EXPORT
|
DEEPSPEECH_EXPORT
|
||||||
int DS_SetupStream(ModelState* aCtx,
|
int DS_SetupStream(ModelState* aCtx,
|
||||||
unsigned int aPreAllocFrames,
|
|
||||||
unsigned int aSampleRate,
|
unsigned int aSampleRate,
|
||||||
StreamingState** retval);
|
StreamingState** retval);
|
||||||
|
|
||||||
|
@ -204,14 +204,11 @@ namespace DeepSpeechClient
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Creates a new streaming inference state.
|
/// Creates a new streaming inference state.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
|
|
||||||
/// One timestep is equivalent to two window lengths(20ms).
|
|
||||||
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
|
|
||||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||||
public unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate)
|
public unsafe void SetupStream(uint aSampleRate)
|
||||||
{
|
{
|
||||||
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP);
|
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
||||||
EvaluateResultCode(resultCode);
|
EvaluateResultCode(resultCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,12 +84,9 @@ namespace DeepSpeechClient.Interfaces
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Creates a new streaming inference state.
|
/// Creates a new streaming inference state.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aPreAllocFrames">Number of timestep frames to reserve.
|
|
||||||
/// One timestep is equivalent to two window lengths(20ms).
|
|
||||||
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).</param>
|
|
||||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||||
unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate);
|
unsafe void SetupStream(uint aSampleRate);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Feeds audio samples to an ongoing streaming inference.
|
/// Feeds audio samples to an ongoing streaming inference.
|
||||||
|
@ -49,7 +49,6 @@ namespace DeepSpeechClient
|
|||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
|
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
|
||||||
uint aPreAllocFrames,
|
|
||||||
uint aSampleRate, ref StreamingState** retval);
|
uint aSampleRate, ref StreamingState** retval);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
|
@ -33,9 +33,9 @@ public class DeepSpeechModel {
|
|||||||
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
|
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DeepSpeechStreamingState setupStream(int prealloc_frames, int sample_rate) {
|
public DeepSpeechStreamingState setupStream(int sample_rate) {
|
||||||
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
||||||
impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp);
|
impl.SetupStream(this._msp, sample_rate, ssp);
|
||||||
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,9 +37,8 @@ class Model(object):
|
|||||||
def sttWithMetadata(self, *args, **kwargs):
|
def sttWithMetadata(self, *args, **kwargs):
|
||||||
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
|
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
|
||||||
|
|
||||||
def setupStream(self, pre_alloc_frames=150, sample_rate=16000):
|
def setupStream(self, sample_rate=16000):
|
||||||
status, ctx = deepspeech.impl.SetupStream(self._impl,
|
status, ctx = deepspeech.impl.SetupStream(self._impl,
|
||||||
aPreAllocFrames=pre_alloc_frames,
|
|
||||||
aSampleRate=sample_rate)
|
aSampleRate=sample_rate)
|
||||||
if status != 0:
|
if status != 0:
|
||||||
raise RuntimeError("SetupStream failed with error code {}".format(status))
|
raise RuntimeError("SetupStream failed with error code {}".format(status))
|
||||||
|
Loading…
Reference in New Issue
Block a user