Adapt Java bindings to new API

2020-02-25 14:29:49 +01:00 · 2020-02-25 14:29:49 +01:00 · c52f3b32fa
commit c52f3b32fa
parent bb709ff955
3 changed files with 48 additions and 18 deletions
--- a/native_client/java/jni/deepspeech.i
+++ b/native_client/java/jni/deepspeech.i
@ -18,18 +18,32 @@
 %typemap(newfree) char* "DS_FreeString($1);";

 %include "carrays.i"
-%array_functions(struct MetadataItem, metadataItem_array);
+%array_functions(struct TokenMetadata, TokenMetadata_array);
+%array_functions(struct CandidateTranscript, CandidateTranscript_array);
+
+%extend struct CandidateTranscript {
+  /**
+   * Retrieve one TokenMetadata element
+   * 
+   * @param i Array index of the TokenMetadata to get
+   *
+   * @return The TokenMetadata requested or null
+   */
+  TokenMetadata getToken(int i) {
+    return TokenMetadata_array_getitem(self->tokens, i);
+  }
+}

 %extend struct Metadata {
  /**
-   * Retrieve one MetadataItem element
+   * Retrieve one CandidateTranscript element
   * 
-   * @param i Array index of the MetadataItem to get
+   * @param i Array index of the CandidateTranscript to get
   *
-   * @return The MetadataItem requested or null
+   * @return The CandidateTranscript requested or null
   */
-  MetadataItem getItem(int i) {
-    return metadataItem_array_getitem(self->items, i);
+  CandidateTranscript getTranscript(int i) {
+    return CandidateTranscript_array_getitem(self->transcripts, i);
  }

  ~Metadata() {
@ -37,10 +51,12 @@
  }
 }

-%nodefaultdtor Metadata;
 %nodefaultctor Metadata;
-%nodefaultctor MetadataItem;
-%nodefaultdtor MetadataItem;
+%nodefaultdtor Metadata;
+%nodefaultctor CandidateTranscript;
+%nodefaultdtor CandidateTranscript;
+%nodefaultctor TokenMetadata;
+%nodefaultdtor TokenMetadata;

 %newobject DS_SpeechToText;
 %newobject DS_IntermediateDecode;
--- a/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java
+++ b/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java
@ -12,7 +12,7 @@ import org.junit.runners.MethodSorters;
 import static org.junit.Assert.*;

 import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
-import org.mozilla.deepspeech.libdeepspeech.Metadata;
+import org.mozilla.deepspeech.libdeepspeech.CandidateTranscript;

 import java.io.RandomAccessFile;
 import java.io.FileNotFoundException;
@ -61,10 +61,10 @@ public class BasicTest {
        m.freeModel();
    }

-    private String metadataToString(Metadata m) {
+    private String candidateTranscriptToString(CandidateTranscript t) {
        String retval = "";
-        for (int i = 0; i < m.getNum_items(); ++i) {
-            retval += m.getItem(i).getCharacter();
+        for (int i = 0; i < t.getNum_tokens(); ++i) {
+            retval += t.getToken(i).getText();
        }
        return retval;
    }
@ -97,7 +97,7 @@ public class BasicTest {
            ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);

            if (extendedMetadata) {
-                return metadataToString(m.sttWithMetadata(shorts, shorts.length));
+                return candidateTranscriptToString(m.sttWithMetadata(shorts, shorts.length, 1).getTranscript(0));
            } else {
                return m.stt(shorts, shorts.length);
            }
--- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
+++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java
@ -117,11 +117,12 @@ public class DeepSpeechModel {
    * @param buffer A 16-bit, mono raw audio signal at the appropriate
    *                sample rate (matching what the model was trained on).
    * @param buffer_size The number of samples in the audio signal.
+    * @param num_results Number of candidate transcripts to return.
    *
    * @return Outputs a Metadata object of individual letters along with their timing information.
    */
-    public Metadata sttWithMetadata(short[] buffer, int buffer_size) {
-        return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size);
+    public Metadata sttWithMetadata(short[] buffer, int buffer_size, int num_results) {
+        return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, num_results);
    }

   /**
@ -160,6 +161,18 @@ public class DeepSpeechModel {
        return impl.IntermediateDecode(ctx.get());
    }

+   /**
+    * @brief Compute the intermediate decoding of an ongoing streaming inference.
+    *
+    * @param ctx A streaming state pointer returned by createStream().
+    * @param num_results Number of candidate transcripts to return.
+    *
+    * @return The STT intermediate result.
+    */
+    public Metadata intermediateDecodeWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
+        return impl.IntermediateDecodeWithMetadata(ctx.get(), num_results);
+    }
+
   /**
    * @brief Signal the end of an audio signal to an ongoing streaming
    *        inference, returns the STT result over the whole audio signal.
@ -179,12 +192,13 @@ public class DeepSpeechModel {
    *        inference, returns per-letter metadata.
    *
    * @param ctx A streaming state pointer returned by createStream().
+    * @param num_results Number of candidate transcripts to return.
    *
    * @return Outputs a Metadata object of individual letters along with their timing information.
    *
    * @note This method will free the state pointer (@p ctx).
    */
-    public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx) {
-        return impl.FinishStreamWithMetadata(ctx.get());
+    public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
+        return impl.FinishStreamWithMetadata(ctx.get(), num_results);
    }
 }