Add intermediateDecodeExpensiveWithMetadata() and Python bindings
- Rename finalizeStream() to flushBuffers(). - Add optional argument addZeroMfccVectors = false. - Fix doc string applied to wrong function.
This commit is contained in:
parent
fe03974d7d
commit
ed656aa487
@ -315,8 +315,10 @@ Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
|||||||
unsigned int aNumResults);
|
unsigned int aNumResults);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Compute the final decoding of an ongoing streaming inference and return
|
* @brief Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
* the result. Signals the end of an ongoing streaming inference.
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
|
included in the result, but is more expensive than STT_IntermediateDecode()
|
||||||
|
because buffers are processed through the acoustic model.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||||
*
|
*
|
||||||
@ -331,8 +333,9 @@ char* STT_IntermediateDecodeExpensive(StreamingState* aSctx);
|
|||||||
/**
|
/**
|
||||||
* @brief Compute the intermediate decoding of an ongoing streaming inference, flushing
|
* @brief Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
buffers first. This ensures that all audio that has been streamed so far is
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
included in the result, but is more expensive than STT_IntermediateDecode()
|
included in the result, but is more expensive than
|
||||||
because buffers are processed through the acoustic model.
|
STT_IntermediateDecodeWithMetadata() because buffers are processed through
|
||||||
|
the acoustic model. Return results including metadata.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||||
* @param aNumResults The number of candidate transcripts to return.
|
* @param aNumResults The number of candidate transcripts to return.
|
||||||
@ -343,6 +346,21 @@ char* STT_IntermediateDecodeExpensive(StreamingState* aSctx);
|
|||||||
* Returns NULL on error.
|
* Returns NULL on error.
|
||||||
*/
|
*/
|
||||||
STT_EXPORT
|
STT_EXPORT
|
||||||
|
Metadata* STT_IntermediateDecodeExpensiveWithMetadata(StreamingState* aSctx,
|
||||||
|
unsigned int aNumResults);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the final decoding of an ongoing streaming inference and return
|
||||||
|
* the result. Signals the end of an ongoing streaming inference.
|
||||||
|
*
|
||||||
|
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||||
|
*
|
||||||
|
* @return The STT result. The user is responsible for freeing the string using
|
||||||
|
* {@link STT_FreeString()}.
|
||||||
|
*
|
||||||
|
* @note This method will free the state pointer (@p aSctx).
|
||||||
|
*/
|
||||||
|
STT_EXPORT
|
||||||
char* STT_FinishStream(StreamingState* aSctx);
|
char* STT_FinishStream(StreamingState* aSctx);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -299,6 +299,28 @@ class Stream(object):
|
|||||||
)
|
)
|
||||||
return stt.impl.IntermediateDecodeExpensive(self._impl)
|
return stt.impl.IntermediateDecodeExpensive(self._impl)
|
||||||
|
|
||||||
|
def intermediateDecodeExpensiveWithMetadata(self, num_results=1):
|
||||||
|
"""
|
||||||
|
Compute the intermediate decoding of an ongoing streaming inference, flushing
|
||||||
|
buffers first. This ensures that all audio that has been streamed so far is
|
||||||
|
included in the result, but is more expensive than intermediateDecode() because
|
||||||
|
buffers are processed through the acoustic model. Return results including
|
||||||
|
metadata.
|
||||||
|
|
||||||
|
:param num_results: Maximum number of candidate transcripts to return. Returned list might be smaller than this.
|
||||||
|
:type num_results: int
|
||||||
|
|
||||||
|
:return: Metadata object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information.
|
||||||
|
:type: :func:`Metadata`
|
||||||
|
|
||||||
|
:throws: RuntimeError if the stream object is not valid
|
||||||
|
"""
|
||||||
|
if not self._impl:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Stream object is not valid. Trying to decode an already finished stream?"
|
||||||
|
)
|
||||||
|
return stt.impl.IntermediateDecodeWithMetadata(self._impl, num_results)
|
||||||
|
|
||||||
def finishStream(self):
|
def finishStream(self):
|
||||||
"""
|
"""
|
||||||
Compute the final decoding of an ongoing streaming inference and return
|
Compute the final decoding of an ongoing streaming inference and return
|
||||||
|
@ -120,6 +120,7 @@ static PyObject *parent_reference() {
|
|||||||
%newobject STT_SpeechToText;
|
%newobject STT_SpeechToText;
|
||||||
%newobject STT_IntermediateDecode;
|
%newobject STT_IntermediateDecode;
|
||||||
%newobject STT_IntermediateDecodeExpensive;
|
%newobject STT_IntermediateDecodeExpensive;
|
||||||
|
%newobject STT_IntermediateDecodeExpensiveWithMetadata;
|
||||||
%newobject STT_FinishStream;
|
%newobject STT_FinishStream;
|
||||||
%newobject STT_Version;
|
%newobject STT_Version;
|
||||||
%newobject STT_ErrorCodeToErrorMessage;
|
%newobject STT_ErrorCodeToErrorMessage;
|
||||||
|
@ -80,8 +80,8 @@ struct StreamingState {
|
|||||||
char* intermediateDecode() const;
|
char* intermediateDecode() const;
|
||||||
Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const;
|
Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const;
|
||||||
char* intermediateDecodeExpensive();
|
char* intermediateDecodeExpensive();
|
||||||
void flushBuffer();
|
Metadata* intermediateDecodeExpensiveWithMetadata(unsigned int num_results);
|
||||||
void finalizeStream();
|
void flushBuffers(bool addZeroMfccVectors = false);
|
||||||
char* finishStream();
|
char* finishStream();
|
||||||
Metadata* finishStreamWithMetadata(unsigned int num_results);
|
Metadata* finishStreamWithMetadata(unsigned int num_results);
|
||||||
|
|
||||||
@ -148,21 +148,28 @@ StreamingState::intermediateDecodeWithMetadata(unsigned int num_results) const
|
|||||||
char*
|
char*
|
||||||
StreamingState::intermediateDecodeExpensive()
|
StreamingState::intermediateDecodeExpensive()
|
||||||
{
|
{
|
||||||
flushBuffer();
|
flushBuffers();
|
||||||
return model_->decode(decoder_state_);
|
return model_->decode(decoder_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Metadata*
|
||||||
|
StreamingState::intermediateDecodeExpensiveWithMetadata(unsigned int num_results)
|
||||||
|
{
|
||||||
|
flushBuffers();
|
||||||
|
return model_->decode_metadata(decoder_state_, num_results);
|
||||||
|
}
|
||||||
|
|
||||||
char*
|
char*
|
||||||
StreamingState::finishStream()
|
StreamingState::finishStream()
|
||||||
{
|
{
|
||||||
finalizeStream();
|
flushBuffers(true);
|
||||||
return model_->decode(decoder_state_);
|
return model_->decode(decoder_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Metadata*
|
Metadata*
|
||||||
StreamingState::finishStreamWithMetadata(unsigned int num_results)
|
StreamingState::finishStreamWithMetadata(unsigned int num_results)
|
||||||
{
|
{
|
||||||
finalizeStream();
|
flushBuffers(true);
|
||||||
return model_->decode_metadata(decoder_state_, num_results);
|
return model_->decode_metadata(decoder_state_, num_results);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,26 +184,16 @@ StreamingState::processAudioWindow(const vector<float>& buf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
StreamingState::flushBuffer()
|
StreamingState::flushBuffers(bool addZeroMfccVectors)
|
||||||
{
|
{
|
||||||
// Flush audio buffer
|
// Flush audio buffer
|
||||||
processAudioWindow(audio_buffer_);
|
processAudioWindow(audio_buffer_);
|
||||||
|
|
||||||
// Process final batch
|
if (addZeroMfccVectors) {
|
||||||
if (batch_buffer_.size() > 0) {
|
// Add empty mfcc vectors at end of sample
|
||||||
processBatch(batch_buffer_, batch_buffer_.size()/model_->mfcc_feats_per_timestep_);
|
for (int i = 0; i < model_->n_context_; ++i) {
|
||||||
}
|
addZeroMfccWindow();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
StreamingState::finalizeStream()
|
|
||||||
{
|
|
||||||
// Flush audio buffer
|
|
||||||
processAudioWindow(audio_buffer_);
|
|
||||||
|
|
||||||
// Add empty mfcc vectors at end of sample
|
|
||||||
for (int i = 0; i < model_->n_context_; ++i) {
|
|
||||||
addZeroMfccWindow();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process final batch
|
// Process final batch
|
||||||
@ -492,6 +489,13 @@ STT_IntermediateDecodeExpensive(StreamingState* aSctx)
|
|||||||
return aSctx->intermediateDecodeExpensive();
|
return aSctx->intermediateDecodeExpensive();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Metadata*
|
||||||
|
STT_IntermediateDecodeExpensiveWithMetadata(StreamingState* aSctx,
|
||||||
|
unsigned int aNumResults)
|
||||||
|
{
|
||||||
|
return aSctx->intermediateDecodeExpensiveWithMetadata(aNumResults);
|
||||||
|
}
|
||||||
|
|
||||||
char*
|
char*
|
||||||
STT_FinishStream(StreamingState* aSctx)
|
STT_FinishStream(StreamingState* aSctx)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user