Merge branch 'intermediateDecodeExpensive' of github.com:jeremiahrose/STT into intermediateDecodeExpensive
This commit is contained in:
commit
252f702f37
@ -314,6 +314,41 @@ STT_EXPORT
|
|||||||
Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
||||||
unsigned int aNumResults);
|
unsigned int aNumResults);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
|
included in the result, but is more expensive than STT_IntermediateDecode()
|
||||||
|
because buffers are processed through the acoustic model.
|
||||||
|
*
|
||||||
|
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||||
|
*
|
||||||
|
* @return The STT result. The user is responsible for freeing the string using
|
||||||
|
* {@link STT_FreeString()}.
|
||||||
|
*
|
||||||
|
* @note This method will free the state pointer (@p aSctx).
|
||||||
|
*/
|
||||||
|
STT_EXPORT
|
||||||
|
char* STT_IntermediateDecodeExpensive(StreamingState* aSctx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
|
included in the result, but is more expensive than
|
||||||
|
STT_IntermediateDecodeWithMetadata() because buffers are processed through
|
||||||
|
the acoustic model. Return results including metadata.
|
||||||
|
*
|
||||||
|
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||||
|
* @param aNumResults The number of candidate transcripts to return.
|
||||||
|
*
|
||||||
|
* @return Metadata struct containing multiple candidate transcripts. Each transcript
|
||||||
|
* has per-token metadata including timing information. The user is
|
||||||
|
* responsible for freeing Metadata by calling {@link STT_FreeMetadata()}.
|
||||||
|
* Returns NULL on error.
|
||||||
|
*/
|
||||||
|
STT_EXPORT
|
||||||
|
Metadata* STT_IntermediateDecodeExpensiveWithMetadata(StreamingState* aSctx,
|
||||||
|
unsigned int aNumResults);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Compute the final decoding of an ongoing streaming inference and return
|
* @brief Compute the final decoding of an ongoing streaming inference and return
|
||||||
* the result. Signals the end of an ongoing streaming inference.
|
* the result. Signals the end of an ongoing streaming inference.
|
||||||
|
@ -106,6 +106,26 @@ class StreamImpl {
|
|||||||
return binding.IntermediateDecodeWithMetadata(this._impl, aNumResults);
|
return binding.IntermediateDecodeWithMetadata(this._impl, aNumResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the intermediate decoding of an ongoing streaming inference, flushing buffers first. This ensures that all audio that has been streamed so far is included in the result, but is more expensive than intermediateDecode() because buffers are processed through the acoustic model.
|
||||||
|
*
|
||||||
|
* @return The STT intermediate result.
|
||||||
|
*/
|
||||||
|
intermediateDecodeExpensive(): string {
|
||||||
|
return binding.IntermediateDecodeExpensive(this._impl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the intermediate decoding of an ongoing streaming inference, flushing buffers first. This ensures that all audio that has been streamed so far is included in the result, but is more expensive than intermediateDecodeWithMetadata() because buffers are processed through the acoustic model. Return results including metadata.
|
||||||
|
*
|
||||||
|
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||||
|
*
|
||||||
|
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||||
|
*/
|
||||||
|
intermediateDecodeExpensiveWithMetadata(aNumResults: number = 1): Metadata {
|
||||||
|
return binding.IntermediateDecodeExpensiveWithMetadata(this._impl, aNumResults);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference.
|
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference.
|
||||||
*
|
*
|
||||||
|
@ -35,6 +35,7 @@ using namespace node;
|
|||||||
|
|
||||||
%newobject STT_SpeechToText;
|
%newobject STT_SpeechToText;
|
||||||
%newobject STT_IntermediateDecode;
|
%newobject STT_IntermediateDecode;
|
||||||
|
%newobject STT_IntermediateDecodeExpensive;
|
||||||
%newobject STT_FinishStream;
|
%newobject STT_FinishStream;
|
||||||
%newobject STT_Version;
|
%newobject STT_Version;
|
||||||
%newobject STT_ErrorCodeToErrorMessage;
|
%newobject STT_ErrorCodeToErrorMessage;
|
||||||
|
@ -283,8 +283,10 @@ class Stream(object):
|
|||||||
|
|
||||||
def intermediateDecodeExpensive(self):
|
def intermediateDecodeExpensive(self):
|
||||||
"""
|
"""
|
||||||
Compute the intermediate decoding of an ongoing streaming inference, flushing buffers.
|
Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
This ensures that all data that has been streamed so far are included in the result.
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
|
included in the result, but is more expensive than intermediateDecode() because
|
||||||
|
buffers are processed through the acoustic model.
|
||||||
|
|
||||||
:return: The STT intermediate result.
|
:return: The STT intermediate result.
|
||||||
:type: str
|
:type: str
|
||||||
@ -297,6 +299,28 @@ class Stream(object):
|
|||||||
)
|
)
|
||||||
return stt.impl.IntermediateDecodeExpensive(self._impl)
|
return stt.impl.IntermediateDecodeExpensive(self._impl)
|
||||||
|
|
||||||
|
def intermediateDecodeExpensiveWithMetadata(self, num_results=1):
|
||||||
|
"""
|
||||||
|
Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
|
included in the result, but is more expensive than intermediateDecode() because
|
||||||
|
buffers are processed through the acoustic model. Return results including
|
||||||
|
metadata.
|
||||||
|
|
||||||
|
:param num_results: Maximum number of candidate transcripts to return. Returned list might be smaller than this.
|
||||||
|
:type num_results: int
|
||||||
|
|
||||||
|
:return: Metadata object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information.
|
||||||
|
:type: :func:`Metadata`
|
||||||
|
|
||||||
|
:throws: RuntimeError if the stream object is not valid
|
||||||
|
"""
|
||||||
|
if not self._impl:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Stream object is not valid. Trying to decode an already finished stream?"
|
||||||
|
)
|
||||||
|
return stt.impl.IntermediateDecodeWithMetadata(self._impl, num_results)
|
||||||
|
|
||||||
def finishStream(self):
|
def finishStream(self):
|
||||||
"""
|
"""
|
||||||
Compute the final decoding of an ongoing streaming inference and return
|
Compute the final decoding of an ongoing streaming inference and return
|
||||||
|
@ -120,6 +120,7 @@ static PyObject *parent_reference() {
|
|||||||
%newobject STT_SpeechToText;
|
%newobject STT_SpeechToText;
|
||||||
%newobject STT_IntermediateDecode;
|
%newobject STT_IntermediateDecode;
|
||||||
%newobject STT_IntermediateDecodeExpensive;
|
%newobject STT_IntermediateDecodeExpensive;
|
||||||
|
%newobject STT_IntermediateDecodeExpensiveWithMetadata;
|
||||||
%newobject STT_FinishStream;
|
%newobject STT_FinishStream;
|
||||||
%newobject STT_Version;
|
%newobject STT_Version;
|
||||||
%newobject STT_ErrorCodeToErrorMessage;
|
%newobject STT_ErrorCodeToErrorMessage;
|
||||||
|
@ -74,8 +74,8 @@ struct StreamingState {
|
|||||||
char* intermediateDecode() const;
|
char* intermediateDecode() const;
|
||||||
Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const;
|
Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const;
|
||||||
char* intermediateDecodeExpensive();
|
char* intermediateDecodeExpensive();
|
||||||
void flushBuffer();
|
Metadata* intermediateDecodeExpensiveWithMetadata(unsigned int num_results);
|
||||||
void finalizeStream();
|
void flushBuffers(bool addZeroMfccVectors = false);
|
||||||
char* finishStream();
|
char* finishStream();
|
||||||
Metadata* finishStreamWithMetadata(unsigned int num_results);
|
Metadata* finishStreamWithMetadata(unsigned int num_results);
|
||||||
|
|
||||||
@ -142,21 +142,28 @@ StreamingState::intermediateDecodeWithMetadata(unsigned int num_results) const
|
|||||||
char*
|
char*
|
||||||
StreamingState::intermediateDecodeExpensive()
|
StreamingState::intermediateDecodeExpensive()
|
||||||
{
|
{
|
||||||
flushBuffer();
|
flushBuffers();
|
||||||
return model_->decode(decoder_state_);
|
return model_->decode(decoder_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Metadata*
|
||||||
|
StreamingState::intermediateDecodeExpensiveWithMetadata(unsigned int num_results)
|
||||||
|
{
|
||||||
|
flushBuffers();
|
||||||
|
return model_->decode_metadata(decoder_state_, num_results);
|
||||||
|
}
|
||||||
|
|
||||||
char*
|
char*
|
||||||
StreamingState::finishStream()
|
StreamingState::finishStream()
|
||||||
{
|
{
|
||||||
finalizeStream();
|
flushBuffers(true);
|
||||||
return model_->decode(decoder_state_);
|
return model_->decode(decoder_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Metadata*
|
Metadata*
|
||||||
StreamingState::finishStreamWithMetadata(unsigned int num_results)
|
StreamingState::finishStreamWithMetadata(unsigned int num_results)
|
||||||
{
|
{
|
||||||
finalizeStream();
|
flushBuffers(true);
|
||||||
return model_->decode_metadata(decoder_state_, num_results);
|
return model_->decode_metadata(decoder_state_, num_results);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,31 +178,16 @@ StreamingState::processAudioWindow(const vector<float>& buf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
StreamingState::flushBuffer()
|
StreamingState::flushBuffers(bool addZeroMfccVectors)
|
||||||
{
|
{
|
||||||
// Flush audio buffer
|
// Flush audio buffer
|
||||||
processAudioWindow(audio_buffer_);
|
processAudioWindow(audio_buffer_);
|
||||||
|
|
||||||
// Add empty mfcc vectors at end of sample
|
if (addZeroMfccVectors) {
|
||||||
//for (int i = 0; i < model_->n_context_; ++i) {
|
// Add empty mfcc vectors at end of sample
|
||||||
// addZeroMfccWindow();
|
for (int i = 0; i < model_->n_context_; ++i) {
|
||||||
//}
|
addZeroMfccWindow();
|
||||||
|
}
|
||||||
// Process final batch
|
|
||||||
if (batch_buffer_.size() > 0) {
|
|
||||||
processBatch(batch_buffer_, batch_buffer_.size()/model_->mfcc_feats_per_timestep_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
StreamingState::finalizeStream()
|
|
||||||
{
|
|
||||||
// Flush audio buffer
|
|
||||||
processAudioWindow(audio_buffer_);
|
|
||||||
|
|
||||||
// Add empty mfcc vectors at end of sample
|
|
||||||
for (int i = 0; i < model_->n_context_; ++i) {
|
|
||||||
addZeroMfccWindow();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process final batch
|
// Process final batch
|
||||||
@ -485,6 +477,13 @@ STT_IntermediateDecodeExpensive(StreamingState* aSctx)
|
|||||||
return aSctx->intermediateDecodeExpensive();
|
return aSctx->intermediateDecodeExpensive();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Metadata*
|
||||||
|
STT_IntermediateDecodeExpensiveWithMetadata(StreamingState* aSctx,
|
||||||
|
unsigned int aNumResults)
|
||||||
|
{
|
||||||
|
return aSctx->intermediateDecodeExpensiveWithMetadata(aNumResults);
|
||||||
|
}
|
||||||
|
|
||||||
char*
|
char*
|
||||||
STT_FinishStream(StreamingState* aSctx)
|
STT_FinishStream(StreamingState* aSctx)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user