Adapt Java bindings to new API

This commit is contained in:
Reuben Morais 2020-02-25 14:29:49 +01:00
parent bb709ff955
commit c52f3b32fa
3 changed files with 48 additions and 18 deletions

View File

@ -18,18 +18,32 @@
%typemap(newfree) char* "DS_FreeString($1);";
%include "carrays.i"
%array_functions(struct MetadataItem, metadataItem_array);
%array_functions(struct TokenMetadata, TokenMetadata_array);
%array_functions(struct CandidateTranscript, CandidateTranscript_array);
%extend struct CandidateTranscript {
/**
* Retrieve one TokenMetadata element
*
* @param i Array index of the TokenMetadata to get
*
* @return The TokenMetadata requested or null
*/
TokenMetadata getToken(int i) {
return TokenMetadata_array_getitem(self->tokens, i);
}
}
%extend struct Metadata {
/**
* Retrieve one MetadataItem element
* Retrieve one CandidateTranscript element
*
* @param i Array index of the MetadataItem to get
* @param i Array index of the CandidateTranscript to get
*
* @return The MetadataItem requested or null
* @return The CandidateTranscript requested or null
*/
MetadataItem getItem(int i) {
return metadataItem_array_getitem(self->items, i);
CandidateTranscript getTranscript(int i) {
return CandidateTranscript_array_getitem(self->transcripts, i);
}
~Metadata() {
@ -37,10 +51,12 @@
}
}
%nodefaultdtor Metadata;
%nodefaultctor Metadata;
%nodefaultctor MetadataItem;
%nodefaultdtor MetadataItem;
%nodefaultdtor Metadata;
%nodefaultctor CandidateTranscript;
%nodefaultdtor CandidateTranscript;
%nodefaultctor TokenMetadata;
%nodefaultdtor TokenMetadata;
%newobject DS_SpeechToText;
%newobject DS_IntermediateDecode;

View File

@ -12,7 +12,7 @@ import org.junit.runners.MethodSorters;
import static org.junit.Assert.*;
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
import org.mozilla.deepspeech.libdeepspeech.Metadata;
import org.mozilla.deepspeech.libdeepspeech.CandidateTranscript;
import java.io.RandomAccessFile;
import java.io.FileNotFoundException;
@ -61,10 +61,10 @@ public class BasicTest {
m.freeModel();
}
private String metadataToString(Metadata m) {
private String candidateTranscriptToString(CandidateTranscript t) {
String retval = "";
for (int i = 0; i < m.getNum_items(); ++i) {
retval += m.getItem(i).getCharacter();
for (int i = 0; i < t.getNum_tokens(); ++i) {
retval += t.getToken(i).getText();
}
return retval;
}
@ -97,7 +97,7 @@ public class BasicTest {
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
if (extendedMetadata) {
return metadataToString(m.sttWithMetadata(shorts, shorts.length));
return candidateTranscriptToString(m.sttWithMetadata(shorts, shorts.length, 1).getTranscript(0));
} else {
return m.stt(shorts, shorts.length);
}

View File

@ -117,11 +117,12 @@ public class DeepSpeechModel {
* @param buffer A 16-bit, mono raw audio signal at the appropriate
* sample rate (matching what the model was trained on).
* @param buffer_size The number of samples in the audio signal.
* @param num_results Number of candidate transcripts to return.
*
* @return Outputs a Metadata object of individual letters along with their timing information.
*/
public Metadata sttWithMetadata(short[] buffer, int buffer_size) {
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size);
public Metadata sttWithMetadata(short[] buffer, int buffer_size, int num_results) {
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, num_results);
}
/**
@ -160,6 +161,18 @@ public class DeepSpeechModel {
return impl.IntermediateDecode(ctx.get());
}
/**
* @brief Compute the intermediate decoding of an ongoing streaming inference.
*
* @param ctx A streaming state pointer returned by createStream().
* @param num_results Number of candidate transcripts to return.
*
* @return The STT intermediate result.
*/
public Metadata intermediateDecodeWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
return impl.IntermediateDecodeWithMetadata(ctx.get(), num_results);
}
/**
* @brief Signal the end of an audio signal to an ongoing streaming
* inference, returns the STT result over the whole audio signal.
@ -179,12 +192,13 @@ public class DeepSpeechModel {
* inference, returns per-letter metadata.
*
* @param ctx A streaming state pointer returned by createStream().
* @param num_results Number of candidate transcripts to return.
*
* @return Outputs a Metadata object of individual letters along with their timing information.
*
* @note This method will free the state pointer (@p ctx).
*/
public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx) {
return impl.FinishStreamWithMetadata(ctx.get());
public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
return impl.FinishStreamWithMetadata(ctx.get(), num_results);
}
}