Add DS_IntermediateDecodeWithMetadata

2020-02-25 13:38:25 +01:00 · 2020-02-25 13:38:25 +01:00 · ea8c7d2957
commit ea8c7d2957
parent 69bd032605
2 changed files with 38 additions and 5 deletions
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -78,6 +78,7 @@ struct StreamingState {

  void feedAudioContent(const short* buffer, unsigned int buffer_size);
  char* intermediateDecode() const;
+  Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const;
  void finalizeStream();
  char* finishStream();
  Metadata* finishStreamWithMetadata(unsigned int num_results);
@ -136,6 +137,12 @@ StreamingState::intermediateDecode() const
  return model_->decode(decoder_state_);
 }

+Metadata*
+StreamingState::intermediateDecodeWithMetadata(unsigned int num_results) const
+{
+  return model_->decode_metadata(decoder_state_, num_results);
+}
+
 char*
 StreamingState::finishStream()
 {
@ -147,7 +154,6 @@ Metadata*
 StreamingState::finishStreamWithMetadata(unsigned int num_results)
 {
  finalizeStream();
-
  return model_->decode_metadata(decoder_state_, num_results);
 }

@ -403,6 +409,13 @@ DS_IntermediateDecode(const StreamingState* aSctx)
  return aSctx->intermediateDecode();
 }

+Metadata*
+DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
+                                  unsigned int aNumResults)
+{
+  return aSctx->intermediateDecodeWithMetadata(aNumResults);
+}
+
 char*
 DS_FinishStream(StreamingState* aSctx)
 {
--- a/native_client/deepspeech.h
+++ b/native_client/deepspeech.h
@ -200,8 +200,10 @@ char* DS_SpeechToText(ModelState* aCtx,
 * @param aBufferSize The number of samples in the audio signal.
 * @param aNumResults The number of candidate transcripts to return.
 *
- * @return Outputs a struct of individual letters along with their timing information. 
- *         The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
+ * @return Metadata struct containing multiple candidate transcripts. Each transcript
+ *         has per-token metadata including timing information. The user is
+ *         responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
+ *         Returns NULL on error.
 */
 DEEPSPEECH_EXPORT
 Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
@ -248,6 +250,22 @@ void DS_FeedAudioContent(StreamingState* aSctx,
 DEEPSPEECH_EXPORT
 char* DS_IntermediateDecode(const StreamingState* aSctx);

+/**
+ * @brief Compute the intermediate decoding of an ongoing streaming inference,
+ *        returns per-letter metadata.
+ *
+ * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
+ * @param aNumResults The number of candidate transcripts to return.
+ *
+ * @return Metadata struct containing multiple candidate transcripts. Each transcript
+ *         has per-token metadata including timing information. The user is
+ *         responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
+ *         Returns NULL on error.
+ */
+DEEPSPEECH_EXPORT
+Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
+                                            unsigned int aNumResults);
+
 /**
 * @brief Signal the end of an audio signal to an ongoing streaming
 *        inference, returns the STT result over the whole audio signal.
@ -269,8 +287,10 @@ char* DS_FinishStream(StreamingState* aSctx);
 * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
 * @param aNumResults The number of candidate transcripts to return.
 *
- * @return Outputs a struct of individual letters along with their timing information. 
- *         The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
+ * @return Metadata struct containing multiple candidate transcripts. Each transcript
+ *         has per-token metadata including timing information. The user is
+ *         responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
+ *         Returns NULL on error.
 *
 * @note This method will free the state pointer (@p aSctx).
 */