diff --git a/native_client/javascript/client.js b/native_client/javascript/client.js index abbfe59e..16dd19e8 100644 --- a/native_client/javascript/client.js +++ b/native_client/javascript/client.js @@ -42,12 +42,11 @@ function totalTime(hrtimeValue) { return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); } -function metadataToString(metadata) { +function candidateTranscriptToString(transcript) { var retval = "" - for (var i = 0; i < metadata.num_items; ++i) { - retval += metadata.items[i].character; + for (var i = 0; i < transcript.tokens.length; ++i) { + retval += transcript.tokens[i].text; } - Ds.FreeMetadata(metadata); return retval; } @@ -117,7 +116,9 @@ audioStream.on('finish', () => { const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate); if (args['extended']) { - console.log(metadataToString(model.sttWithMetadata(audioBuffer))); + let metadata = model.sttWithMetadata(audioBuffer, 1); + console.log(candidateTranscriptToString(metadata.transcripts[0])); + Ds.FreeMetadata(metadata); } else { console.log(model.stt(audioBuffer)); } diff --git a/native_client/javascript/deepspeech.i b/native_client/javascript/deepspeech.i index efbaa360..6b0151a4 100644 --- a/native_client/javascript/deepspeech.i +++ b/native_client/javascript/deepspeech.i @@ -47,8 +47,8 @@ using namespace node; %typemap(argout) ModelState **retval { $result = SWIGV8_ARRAY_NEW(); SWIGV8_AppendOutput($result, SWIG_From_int(result)); - // owned by SWIG, ModelState destructor gets called when the JavaScript object is finalized (see below) - %append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, SWIG_POINTER_OWN)); + // owned by the application. NodeJS does not guarantee the finalizer will be called so applications must call FreeMetadata themselves. + %append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0)); } @@ -68,27 +68,33 @@ using namespace node; %nodefaultctor ModelState; %nodefaultdtor ModelState; -%typemap(out) MetadataItem* %{ +%typemap(out) TokenMetadata* %{ $result = SWIGV8_ARRAY_NEW(); - for (int i = 0; i < arg1->num_items; ++i) { - SWIGV8_AppendOutput($result, SWIG_NewPointerObj(SWIG_as_voidptr(&result[i]), SWIGTYPE_p_MetadataItem, SWIG_POINTER_OWN)); + for (int i = 0; i < arg1->num_tokens; ++i) { + SWIGV8_AppendOutput($result, SWIG_NewPointerObj(SWIG_as_voidptr(&result[i]), SWIGTYPE_p_TokenMetadata, 0)); } %} -%nodefaultdtor Metadata; -%nodefaultctor Metadata; -%nodefaultctor MetadataItem; -%nodefaultdtor MetadataItem; - -%extend struct Metadata { - ~Metadata() { - DS_FreeMetadata($self); +%typemap(out) CandidateTranscript* %{ + $result = SWIGV8_ARRAY_NEW(); + for (int i = 0; i < arg1->num_transcripts; ++i) { + SWIGV8_AppendOutput($result, SWIG_NewPointerObj(SWIG_as_voidptr(&result[i]), SWIGTYPE_p_CandidateTranscript, 0)); } -} +%} -%extend struct MetadataItem { - ~MetadataItem() { } -} +%ignore Metadata::num_transcripts; +%ignore CandidateTranscript::num_tokens; + +%immutable Metadata::transcripts; +%immutable CandidateTranscripts::tokens; +%immutable TokenMetadata::text; + +%nodefaultctor Metadata; +%nodefaultdtor Metadata; +%nodefaultctor CandidateTranscript; +%nodefaultdtor CandidateTranscript; +%nodefaultctor TokenMetadata; +%nodefaultdtor TokenMetadata; %rename ("%(strip:[DS_])s") ""; diff --git a/native_client/javascript/index.js b/native_client/javascript/index.js index cca483f1..7a027bde 100644 --- a/native_client/javascript/index.js +++ b/native_client/javascript/index.js @@ -122,8 +122,9 @@ Model.prototype.stt = function(aBuffer) { * * @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. */ -Model.prototype.sttWithMetadata = function(aBuffer) { - return binding.SpeechToTextWithMetadata(this._impl, aBuffer); +Model.prototype.sttWithMetadata = function(aBuffer, aNumResults) { + aNumResults = aNumResults || 1; + return binding.SpeechToTextWithMetadata(this._impl, aBuffer, aNumResults); } /** @@ -171,6 +172,16 @@ Stream.prototype.intermediateDecode = function() { return binding.IntermediateDecode(this._impl); } +/** + * Compute the intermediate decoding of an ongoing streaming inference. + * + * @return {string} The STT intermediate result. + */ +Stream.prototype.intermediateDecodeWithMetadata = function(aNumResults) { + aNumResults = aNumResults || 1; + return binding.IntermediateDecode(this._impl, aNumResults); +} + /** * Signal the end of an audio signal to an ongoing streaming inference, returns the STT result over the whole audio signal. * @@ -191,8 +202,9 @@ Stream.prototype.finishStream = function() { * * This method will free the stream, it must not be used after this method is called. */ -Stream.prototype.finishStreamWithMetadata = function() { - result = binding.FinishStreamWithMetadata(this._impl); +Stream.prototype.finishStreamWithMetadata = function(aNumResults) { + aNumResults = aNumResults || 1; + result = binding.FinishStreamWithMetadata(this._impl, aNumResults); this._impl = null; return result; } @@ -236,35 +248,58 @@ function Version() { } -//// Metadata and MetadataItem are here only for documentation purposes +//// Metadata, CandidateTranscript and TokenMetadata are here only for documentation purposes /** * @class * * Stores each individual character, along with its timing information */ -function MetadataItem() {} +function TokenMetadata() {} /** * The character generated for transcription * * @return {string} The character generated */ -MetadataItem.prototype.character = function() {} +TokenMetadata.prototype.text = function() {} /** * Position of the character in units of 20ms * * @return {int} The position of the character */ -MetadataItem.prototype.timestep = function() {}; +TokenMetadata.prototype.timestep = function() {}; /** * Position of the character in seconds * * @return {float} The position of the character */ -MetadataItem.prototype.start_time = function() {}; +TokenMetadata.prototype.start_time = function() {}; + +/** + * @class + * + * Stores the entire CTC output as an array of character metadata objects + */ +function CandidateTranscript () {} + +/** + * List of items + * + * @return {array} List of :js:func:`TokenMetadata` + */ +CandidateTranscript.prototype.items = function() {} + +/** + * Approximated confidence value for this transcription. This is roughly the + * sum of the acoustic model logit values for each timestep/character that + * contributed to the creation of this transcription. + * + * @return {float} Confidence value + */ +CandidateTranscript.prototype.confidence = function() {} /** * @class @@ -276,30 +311,16 @@ function Metadata () {} /** * List of items * - * @return {array} List of :js:func:`MetadataItem` + * @return {array} List of :js:func:`CandidateTranscript` objects */ -Metadata.prototype.items = function() {} +Metadata.prototype.transcripts = function() {} -/** - * Size of the list of items - * - * @return {int} Number of items - */ -Metadata.prototype.num_items = function() {} - -/** - * Approximated confidence value for this transcription. This is roughly the - * sum of the acoustic model logit values for each timestep/character that - * contributed to the creation of this transcription. - * - * @return {float} Confidence value - */ -Metadata.prototype.confidence = function() {} module.exports = { Model: Model, Metadata: Metadata, - MetadataItem: MetadataItem, + CandidateTranscript: CandidateTranscript, + TokenMetadata: TokenMetadata, Version: Version, FreeModel: FreeModel, FreeStream: FreeStream,