Adapt JavaScript bindings to new API

This commit is contained in:
Reuben Morais 2020-02-25 13:58:29 +01:00
parent 6e88a37ad4
commit 09048e2ea2
3 changed files with 77 additions and 49 deletions

View File

@ -42,12 +42,11 @@ function totalTime(hrtimeValue) {
return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
}
function metadataToString(metadata) {
function candidateTranscriptToString(transcript) {
var retval = ""
for (var i = 0; i < metadata.num_items; ++i) {
retval += metadata.items[i].character;
for (var i = 0; i < transcript.tokens.length; ++i) {
retval += transcript.tokens[i].text;
}
Ds.FreeMetadata(metadata);
return retval;
}
@ -117,7 +116,9 @@ audioStream.on('finish', () => {
const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate);
if (args['extended']) {
console.log(metadataToString(model.sttWithMetadata(audioBuffer)));
let metadata = model.sttWithMetadata(audioBuffer, 1);
console.log(candidateTranscriptToString(metadata.transcripts[0]));
Ds.FreeMetadata(metadata);
} else {
console.log(model.stt(audioBuffer));
}

View File

@ -47,8 +47,8 @@ using namespace node;
%typemap(argout) ModelState **retval {
$result = SWIGV8_ARRAY_NEW();
SWIGV8_AppendOutput($result, SWIG_From_int(result));
// owned by SWIG, ModelState destructor gets called when the JavaScript object is finalized (see below)
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, SWIG_POINTER_OWN));
// owned by the application. NodeJS does not guarantee the finalizer will be called so applications must call FreeMetadata themselves.
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0));
}
@ -68,27 +68,33 @@ using namespace node;
%nodefaultctor ModelState;
%nodefaultdtor ModelState;
%typemap(out) MetadataItem* %{
%typemap(out) TokenMetadata* %{
$result = SWIGV8_ARRAY_NEW();
for (int i = 0; i < arg1->num_items; ++i) {
SWIGV8_AppendOutput($result, SWIG_NewPointerObj(SWIG_as_voidptr(&result[i]), SWIGTYPE_p_MetadataItem, SWIG_POINTER_OWN));
for (int i = 0; i < arg1->num_tokens; ++i) {
SWIGV8_AppendOutput($result, SWIG_NewPointerObj(SWIG_as_voidptr(&result[i]), SWIGTYPE_p_TokenMetadata, 0));
}
%}
%nodefaultdtor Metadata;
%nodefaultctor Metadata;
%nodefaultctor MetadataItem;
%nodefaultdtor MetadataItem;
%extend struct Metadata {
~Metadata() {
DS_FreeMetadata($self);
%typemap(out) CandidateTranscript* %{
$result = SWIGV8_ARRAY_NEW();
for (int i = 0; i < arg1->num_transcripts; ++i) {
SWIGV8_AppendOutput($result, SWIG_NewPointerObj(SWIG_as_voidptr(&result[i]), SWIGTYPE_p_CandidateTranscript, 0));
}
}
%}
%extend struct MetadataItem {
~MetadataItem() { }
}
%ignore Metadata::num_transcripts;
%ignore CandidateTranscript::num_tokens;
%immutable Metadata::transcripts;
%immutable CandidateTranscripts::tokens;
%immutable TokenMetadata::text;
%nodefaultctor Metadata;
%nodefaultdtor Metadata;
%nodefaultctor CandidateTranscript;
%nodefaultdtor CandidateTranscript;
%nodefaultctor TokenMetadata;
%nodefaultdtor TokenMetadata;
%rename ("%(strip:[DS_])s") "";

View File

@ -122,8 +122,9 @@ Model.prototype.stt = function(aBuffer) {
*
* @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
*/
Model.prototype.sttWithMetadata = function(aBuffer) {
return binding.SpeechToTextWithMetadata(this._impl, aBuffer);
Model.prototype.sttWithMetadata = function(aBuffer, aNumResults) {
aNumResults = aNumResults || 1;
return binding.SpeechToTextWithMetadata(this._impl, aBuffer, aNumResults);
}
/**
@ -171,6 +172,16 @@ Stream.prototype.intermediateDecode = function() {
return binding.IntermediateDecode(this._impl);
}
/**
* Compute the intermediate decoding of an ongoing streaming inference.
*
* @return {string} The STT intermediate result.
*/
Stream.prototype.intermediateDecodeWithMetadata = function(aNumResults) {
aNumResults = aNumResults || 1;
return binding.IntermediateDecode(this._impl, aNumResults);
}
/**
* Signal the end of an audio signal to an ongoing streaming inference, returns the STT result over the whole audio signal.
*
@ -191,8 +202,9 @@ Stream.prototype.finishStream = function() {
*
* This method will free the stream, it must not be used after this method is called.
*/
Stream.prototype.finishStreamWithMetadata = function() {
result = binding.FinishStreamWithMetadata(this._impl);
Stream.prototype.finishStreamWithMetadata = function(aNumResults) {
aNumResults = aNumResults || 1;
result = binding.FinishStreamWithMetadata(this._impl, aNumResults);
this._impl = null;
return result;
}
@ -236,35 +248,58 @@ function Version() {
}
//// Metadata and MetadataItem are here only for documentation purposes
//// Metadata, CandidateTranscript and TokenMetadata are here only for documentation purposes
/**
* @class
*
* Stores each individual character, along with its timing information
*/
function MetadataItem() {}
function TokenMetadata() {}
/**
* The character generated for transcription
*
* @return {string} The character generated
*/
MetadataItem.prototype.character = function() {}
TokenMetadata.prototype.text = function() {}
/**
* Position of the character in units of 20ms
*
* @return {int} The position of the character
*/
MetadataItem.prototype.timestep = function() {};
TokenMetadata.prototype.timestep = function() {};
/**
* Position of the character in seconds
*
* @return {float} The position of the character
*/
MetadataItem.prototype.start_time = function() {};
TokenMetadata.prototype.start_time = function() {};
/**
* @class
*
* Stores the entire CTC output as an array of character metadata objects
*/
function CandidateTranscript () {}
/**
* List of items
*
* @return {array} List of :js:func:`TokenMetadata`
*/
CandidateTranscript.prototype.items = function() {}
/**
* Approximated confidence value for this transcription. This is roughly the
* sum of the acoustic model logit values for each timestep/character that
* contributed to the creation of this transcription.
*
* @return {float} Confidence value
*/
CandidateTranscript.prototype.confidence = function() {}
/**
* @class
@ -276,30 +311,16 @@ function Metadata () {}
/**
* List of items
*
* @return {array} List of :js:func:`MetadataItem`
* @return {array} List of :js:func:`CandidateTranscript` objects
*/
Metadata.prototype.items = function() {}
Metadata.prototype.transcripts = function() {}
/**
* Size of the list of items
*
* @return {int} Number of items
*/
Metadata.prototype.num_items = function() {}
/**
* Approximated confidence value for this transcription. This is roughly the
* sum of the acoustic model logit values for each timestep/character that
* contributed to the creation of this transcription.
*
* @return {float} Confidence value
*/
Metadata.prototype.confidence = function() {}
module.exports = {
Model: Model,
Metadata: Metadata,
MetadataItem: MetadataItem,
CandidateTranscript: CandidateTranscript,
TokenMetadata: TokenMetadata,
Version: Version,
FreeModel: FreeModel,
FreeStream: FreeStream,