diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index adaa0445..d284a319 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -78,6 +78,7 @@ struct StreamingState { void feedAudioContent(const short* buffer, unsigned int buffer_size); char* intermediateDecode() const; + Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const; void finalizeStream(); char* finishStream(); Metadata* finishStreamWithMetadata(unsigned int num_results); @@ -136,6 +137,12 @@ StreamingState::intermediateDecode() const return model_->decode(decoder_state_); } +Metadata* +StreamingState::intermediateDecodeWithMetadata(unsigned int num_results) const +{ + return model_->decode_metadata(decoder_state_, num_results); +} + char* StreamingState::finishStream() { @@ -147,7 +154,6 @@ Metadata* StreamingState::finishStreamWithMetadata(unsigned int num_results) { finalizeStream(); - return model_->decode_metadata(decoder_state_, num_results); } @@ -403,6 +409,13 @@ DS_IntermediateDecode(const StreamingState* aSctx) return aSctx->intermediateDecode(); } +Metadata* +DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, + unsigned int aNumResults) +{ + return aSctx->intermediateDecodeWithMetadata(aNumResults); +} + char* DS_FinishStream(StreamingState* aSctx) { diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 7aee1048..8bfee073 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -200,8 +200,10 @@ char* DS_SpeechToText(ModelState* aCtx, * @param aBufferSize The number of samples in the audio signal. * @param aNumResults The number of candidate transcripts to return. * - * @return Outputs a struct of individual letters along with their timing information. - * The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error. + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. */ DEEPSPEECH_EXPORT Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, @@ -248,6 +250,22 @@ void DS_FeedAudioContent(StreamingState* aSctx, DEEPSPEECH_EXPORT char* DS_IntermediateDecode(const StreamingState* aSctx); +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference, + * returns per-letter metadata. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, + unsigned int aNumResults); + /** * @brief Signal the end of an audio signal to an ongoing streaming * inference, returns the STT result over the whole audio signal. @@ -269,8 +287,10 @@ char* DS_FinishStream(StreamingState* aSctx); * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. * @param aNumResults The number of candidate transcripts to return. * - * @return Outputs a struct of individual letters along with their timing information. - * The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error. + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. * * @note This method will free the state pointer (@p aSctx). */