Adapt Java bindings to new API
This commit is contained in:
parent
bb709ff955
commit
c52f3b32fa
@ -18,18 +18,32 @@
|
||||
%typemap(newfree) char* "DS_FreeString($1);";
|
||||
|
||||
%include "carrays.i"
|
||||
%array_functions(struct MetadataItem, metadataItem_array);
|
||||
%array_functions(struct TokenMetadata, TokenMetadata_array);
|
||||
%array_functions(struct CandidateTranscript, CandidateTranscript_array);
|
||||
|
||||
%extend struct CandidateTranscript {
|
||||
/**
|
||||
* Retrieve one TokenMetadata element
|
||||
*
|
||||
* @param i Array index of the TokenMetadata to get
|
||||
*
|
||||
* @return The TokenMetadata requested or null
|
||||
*/
|
||||
TokenMetadata getToken(int i) {
|
||||
return TokenMetadata_array_getitem(self->tokens, i);
|
||||
}
|
||||
}
|
||||
|
||||
%extend struct Metadata {
|
||||
/**
|
||||
* Retrieve one MetadataItem element
|
||||
* Retrieve one CandidateTranscript element
|
||||
*
|
||||
* @param i Array index of the MetadataItem to get
|
||||
* @param i Array index of the CandidateTranscript to get
|
||||
*
|
||||
* @return The MetadataItem requested or null
|
||||
* @return The CandidateTranscript requested or null
|
||||
*/
|
||||
MetadataItem getItem(int i) {
|
||||
return metadataItem_array_getitem(self->items, i);
|
||||
CandidateTranscript getTranscript(int i) {
|
||||
return CandidateTranscript_array_getitem(self->transcripts, i);
|
||||
}
|
||||
|
||||
~Metadata() {
|
||||
@ -37,10 +51,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
%nodefaultdtor Metadata;
|
||||
%nodefaultctor Metadata;
|
||||
%nodefaultctor MetadataItem;
|
||||
%nodefaultdtor MetadataItem;
|
||||
%nodefaultdtor Metadata;
|
||||
%nodefaultctor CandidateTranscript;
|
||||
%nodefaultdtor CandidateTranscript;
|
||||
%nodefaultctor TokenMetadata;
|
||||
%nodefaultdtor TokenMetadata;
|
||||
|
||||
%newobject DS_SpeechToText;
|
||||
%newobject DS_IntermediateDecode;
|
||||
|
@ -12,7 +12,7 @@ import org.junit.runners.MethodSorters;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
|
||||
import org.mozilla.deepspeech.libdeepspeech.Metadata;
|
||||
import org.mozilla.deepspeech.libdeepspeech.CandidateTranscript;
|
||||
|
||||
import java.io.RandomAccessFile;
|
||||
import java.io.FileNotFoundException;
|
||||
@ -61,10 +61,10 @@ public class BasicTest {
|
||||
m.freeModel();
|
||||
}
|
||||
|
||||
private String metadataToString(Metadata m) {
|
||||
private String candidateTranscriptToString(CandidateTranscript t) {
|
||||
String retval = "";
|
||||
for (int i = 0; i < m.getNum_items(); ++i) {
|
||||
retval += m.getItem(i).getCharacter();
|
||||
for (int i = 0; i < t.getNum_tokens(); ++i) {
|
||||
retval += t.getToken(i).getText();
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
@ -97,7 +97,7 @@ public class BasicTest {
|
||||
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
|
||||
|
||||
if (extendedMetadata) {
|
||||
return metadataToString(m.sttWithMetadata(shorts, shorts.length));
|
||||
return candidateTranscriptToString(m.sttWithMetadata(shorts, shorts.length, 1).getTranscript(0));
|
||||
} else {
|
||||
return m.stt(shorts, shorts.length);
|
||||
}
|
||||
|
@ -117,11 +117,12 @@ public class DeepSpeechModel {
|
||||
* @param buffer A 16-bit, mono raw audio signal at the appropriate
|
||||
* sample rate (matching what the model was trained on).
|
||||
* @param buffer_size The number of samples in the audio signal.
|
||||
* @param num_results Number of candidate transcripts to return.
|
||||
*
|
||||
* @return Outputs a Metadata object of individual letters along with their timing information.
|
||||
*/
|
||||
public Metadata sttWithMetadata(short[] buffer, int buffer_size) {
|
||||
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size);
|
||||
public Metadata sttWithMetadata(short[] buffer, int buffer_size, int num_results) {
|
||||
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, num_results);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -160,6 +161,18 @@ public class DeepSpeechModel {
|
||||
return impl.IntermediateDecode(ctx.get());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute the intermediate decoding of an ongoing streaming inference.
|
||||
*
|
||||
* @param ctx A streaming state pointer returned by createStream().
|
||||
* @param num_results Number of candidate transcripts to return.
|
||||
*
|
||||
* @return The STT intermediate result.
|
||||
*/
|
||||
public Metadata intermediateDecodeWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
|
||||
return impl.IntermediateDecodeWithMetadata(ctx.get(), num_results);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Signal the end of an audio signal to an ongoing streaming
|
||||
* inference, returns the STT result over the whole audio signal.
|
||||
@ -179,12 +192,13 @@ public class DeepSpeechModel {
|
||||
* inference, returns per-letter metadata.
|
||||
*
|
||||
* @param ctx A streaming state pointer returned by createStream().
|
||||
* @param num_results Number of candidate transcripts to return.
|
||||
*
|
||||
* @return Outputs a Metadata object of individual letters along with their timing information.
|
||||
*
|
||||
* @note This method will free the state pointer (@p ctx).
|
||||
*/
|
||||
public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx) {
|
||||
return impl.FinishStreamWithMetadata(ctx.get());
|
||||
public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
|
||||
return impl.FinishStreamWithMetadata(ctx.get(), num_results);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user