Add DS_IntermediateDecodeWithMetadata

This commit is contained in:
Reuben Morais 2020-02-25 13:38:25 +01:00
parent 69bd032605
commit ea8c7d2957
2 changed files with 38 additions and 5 deletions

View File

@ -78,6 +78,7 @@ struct StreamingState {
void feedAudioContent(const short* buffer, unsigned int buffer_size);
char* intermediateDecode() const;
Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const;
void finalizeStream();
char* finishStream();
Metadata* finishStreamWithMetadata(unsigned int num_results);
@ -136,6 +137,12 @@ StreamingState::intermediateDecode() const
return model_->decode(decoder_state_);
}
Metadata*
StreamingState::intermediateDecodeWithMetadata(unsigned int num_results) const
{
return model_->decode_metadata(decoder_state_, num_results);
}
char*
StreamingState::finishStream()
{
@ -147,7 +154,6 @@ Metadata*
StreamingState::finishStreamWithMetadata(unsigned int num_results)
{
finalizeStream();
return model_->decode_metadata(decoder_state_, num_results);
}
@ -403,6 +409,13 @@ DS_IntermediateDecode(const StreamingState* aSctx)
return aSctx->intermediateDecode();
}
Metadata*
DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
unsigned int aNumResults)
{
return aSctx->intermediateDecodeWithMetadata(aNumResults);
}
char*
DS_FinishStream(StreamingState* aSctx)
{

View File

@ -200,8 +200,10 @@ char* DS_SpeechToText(ModelState* aCtx,
* @param aBufferSize The number of samples in the audio signal.
* @param aNumResults The number of candidate transcripts to return.
*
* @return Outputs a struct of individual letters along with their timing information.
* The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
* @return Metadata struct containing multiple candidate transcripts. Each transcript
* has per-token metadata including timing information. The user is
* responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
* Returns NULL on error.
*/
DEEPSPEECH_EXPORT
Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
@ -248,6 +250,22 @@ void DS_FeedAudioContent(StreamingState* aSctx,
DEEPSPEECH_EXPORT
char* DS_IntermediateDecode(const StreamingState* aSctx);
/**
* @brief Compute the intermediate decoding of an ongoing streaming inference,
* returns per-letter metadata.
*
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
* @param aNumResults The number of candidate transcripts to return.
*
* @return Metadata struct containing multiple candidate transcripts. Each transcript
* has per-token metadata including timing information. The user is
* responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
* Returns NULL on error.
*/
DEEPSPEECH_EXPORT
Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
unsigned int aNumResults);
/**
* @brief Signal the end of an audio signal to an ongoing streaming
* inference, returns the STT result over the whole audio signal.
@ -269,8 +287,10 @@ char* DS_FinishStream(StreamingState* aSctx);
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
* @param aNumResults The number of candidate transcripts to return.
*
* @return Outputs a struct of individual letters along with their timing information.
* The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
* @return Metadata struct containing multiple candidate transcripts. Each transcript
* has per-token metadata including timing information. The user is
* responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
* Returns NULL on error.
*
* @note This method will free the state pointer (@p aSctx).
*/