Merge pull request #3012 from reuben/index-ts
Switch index.js to TypeScript
This commit is contained in:
commit
b12c7be710
@ -29,7 +29,7 @@ pip3:
|
||||
$(PIP_INSTALL) -r ../taskcluster/docs-requirements.txt
|
||||
|
||||
npm:
|
||||
npm install jsdoc@3.6.3
|
||||
npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x
|
||||
|
||||
submodule:
|
||||
git submodule update --init --remote
|
||||
|
@ -1,8 +1,6 @@
|
||||
JavaScript (NodeJS / ElectronJS)
|
||||
================================
|
||||
|
||||
Support for TypeScript is :download:`provided in index.d.ts<../native_client/javascript/index.d.ts>`
|
||||
|
||||
Model
|
||||
-----
|
||||
|
||||
|
@ -80,7 +80,9 @@ breathe_projects = {
|
||||
"deepspeech-dotnet": "xml-dotnet/",
|
||||
}
|
||||
|
||||
js_source_path = "../native_client/javascript"
|
||||
js_source_path = "../native_client/javascript/index.ts"
|
||||
js_language = "typescript"
|
||||
jsdoc_config_path = "../native_client/javascript/tsconfig.json"
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['.templates']
|
||||
|
@ -59,7 +59,7 @@ copy-deps: build
|
||||
node-wrapper: copy-deps build
|
||||
PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) --no-color package $(NODE_BUILD_VERBOSE)
|
||||
|
||||
npm-pack: clean package.json index.js npm-dev
|
||||
npm-pack: clean package.json index.ts npm-dev
|
||||
PATH="$(NODE_MODULES_BIN):${PATH}" tsc && $(NPM_TOOL) pack $(NODE_BUILD_VERBOSE)
|
||||
|
||||
deepspeech_wrap.cxx: deepspeech.i ds-swig
|
||||
|
@ -3,7 +3,7 @@
|
||||
// This is required for process.versions.electron below
|
||||
/// <reference types="electron" />
|
||||
|
||||
import Ds from "./index";
|
||||
import * as Ds from "./index";
|
||||
import * as Fs from "fs";
|
||||
import Sox from "sox-stream";
|
||||
import * as argparse from "argparse";
|
||||
|
196
native_client/javascript/index.d.ts
vendored
196
native_client/javascript/index.d.ts
vendored
@ -1,196 +0,0 @@
|
||||
/**
|
||||
* Stores text of an individual token, along with its timing information
|
||||
*/
|
||||
export interface TokenMetadata {
|
||||
text: string;
|
||||
timestep: number;
|
||||
start_time: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single transcript computed by the model, including a confidence value and
|
||||
* the metadata for its constituent tokens.
|
||||
*/
|
||||
export interface CandidateTranscript {
|
||||
tokens: TokenMetadata[];
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* An array of CandidateTranscript objects computed by the model.
|
||||
*/
|
||||
export interface Metadata {
|
||||
transcripts: CandidateTranscript[];
|
||||
}
|
||||
|
||||
/**
|
||||
* An object providing an interface to a trained DeepSpeech model.
|
||||
*
|
||||
* @param aModelPath The path to the frozen model graph.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
export class Model {
|
||||
constructor(aModelPath: string)
|
||||
|
||||
/**
|
||||
* Get beam width value used by the model. If :js:func:Model.setBeamWidth was
|
||||
* not called before, will return the default value loaded from the model file.
|
||||
*
|
||||
* @return Beam width value used by the model.
|
||||
*/
|
||||
beamWidth(): number;
|
||||
|
||||
/**
|
||||
* Set beam width value used by the model.
|
||||
*
|
||||
* @param The beam width used by the model. A larger beam width value generates better results at the cost of decoding time.
|
||||
*
|
||||
* @return Zero on success, non-zero on failure.
|
||||
*/
|
||||
setBeamWidth(aBeamWidth: number): number;
|
||||
|
||||
/**
|
||||
* Return the sample rate expected by the model.
|
||||
*
|
||||
* @return Sample rate.
|
||||
*/
|
||||
sampleRate(): number;
|
||||
|
||||
/**
|
||||
* Enable decoding using an external scorer.
|
||||
*
|
||||
* @param aScorerPath The path to the external scorer file.
|
||||
*
|
||||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
enableExternalScorer(aScorerPath: string): number;
|
||||
|
||||
/**
|
||||
* Disable decoding using an external scorer.
|
||||
*
|
||||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
disableExternalScorer(): number;
|
||||
|
||||
/**
|
||||
* Set hyperparameters alpha and beta of the external scorer.
|
||||
*
|
||||
* @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight.
|
||||
* @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight.
|
||||
*
|
||||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
setScorerAlphaBeta(aLMAlpha: number, aLMBeta: number): number;
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text.
|
||||
*
|
||||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
|
||||
*
|
||||
* @return The STT result. Returns undefined on error.
|
||||
*/
|
||||
stt(aBuffer: Buffer): string;
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text and output metadata
|
||||
* about the results.
|
||||
*
|
||||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
|
||||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this.
|
||||
* Default value is 1 if not specified.
|
||||
*
|
||||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information.
|
||||
* The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
sttWithMetadata(aBuffer: Buffer, aNumResults?: number): Metadata;
|
||||
|
||||
/**
|
||||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object.
|
||||
*
|
||||
* @return a :js:func:`Stream` object that represents the streaming state.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
createStream(): Stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class
|
||||
* Provides an interface to a DeepSpeech stream. The constructor cannot be called
|
||||
* directly, use :js:func:`Model.createStream`.
|
||||
*/
|
||||
declare class Stream {
|
||||
/**
|
||||
* Feed audio samples to an ongoing streaming inference.
|
||||
*
|
||||
* @param aBuffer An array of 16-bit, mono raw audio samples at the
|
||||
* appropriate sample rate (matching what the model was trained on).
|
||||
*/
|
||||
feedAudioContent(aBuffer: Buffer): void;
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference.
|
||||
*
|
||||
* @return The STT intermediate result.
|
||||
*/
|
||||
intermediateDecode(): string;
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata.
|
||||
*
|
||||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
intermediateDecodeWithMetadata (aNumResults?: number): Metadata;
|
||||
|
||||
/**
|
||||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @return The STT result.
|
||||
*
|
||||
* This method will free the stream, it must not be used after this method is called.
|
||||
*/
|
||||
finishStream(): string;
|
||||
|
||||
/**
|
||||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`.
|
||||
*
|
||||
* This method will free the stream, it must not be used after this method is called.
|
||||
*/
|
||||
finishStreamWithMetadata(aNumResults?: number): Metadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees associated resources and destroys model object.
|
||||
*
|
||||
* @param model A model pointer returned by :js:func:`Model`
|
||||
*
|
||||
*/
|
||||
export function FreeModel(model: Model): void;
|
||||
|
||||
/**
|
||||
* Free memory allocated for metadata information.
|
||||
*
|
||||
* @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Stream.finishStreamWithMetadata`
|
||||
*/
|
||||
export function FreeMetadata(metadata: Metadata): void;
|
||||
|
||||
/**
|
||||
* Destroy a streaming state without decoding the computed logits. This
|
||||
* can be used if you no longer need the result of an ongoing streaming
|
||||
* inference and don't want to perform a costly decode operation.
|
||||
*
|
||||
* @param stream A streaming state pointer returned by :js:func:`Model.createStream`.
|
||||
*/
|
||||
export function FreeStream(stream: Stream): void;
|
||||
|
||||
/**
|
||||
* Print version of this library and of the linked TensorFlow library on standard output.
|
||||
*/
|
||||
export function Version(): void;
|
@ -1,336 +0,0 @@
|
||||
'use strict';
|
||||
|
||||
const binary = require('node-pre-gyp');
|
||||
const path = require('path')
|
||||
// 'lib', 'binding', 'v0.1.1', ['node', 'v' + process.versions.modules, process.platform, process.arch].join('-'), 'deepspeech-bindings.node')
|
||||
const binding_path = binary.find(path.resolve(path.join(__dirname, 'package.json')));
|
||||
|
||||
// On Windows, we can't rely on RPATH being set to $ORIGIN/../ or on
|
||||
// @loader_path/../ but we can change the PATH to include the proper directory
|
||||
// for the dynamic linker
|
||||
if (process.platform === 'win32') {
|
||||
const dslib_path = path.resolve(path.join(binding_path, '../..'));
|
||||
var oldPath = process.env.PATH;
|
||||
process.env['PATH'] = `${dslib_path};${process.env.PATH}`;
|
||||
}
|
||||
|
||||
const binding = require(binding_path);
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
process.env['PATH'] = oldPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class
|
||||
* An object providing an interface to a trained DeepSpeech model.
|
||||
*
|
||||
* @param {string} aModelPath The path to the frozen model graph.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
function Model(aModelPath) {
|
||||
this._impl = null;
|
||||
|
||||
const rets = binding.CreateModel(aModelPath);
|
||||
const status = rets[0];
|
||||
const impl = rets[1];
|
||||
if (status !== 0) {
|
||||
throw "CreateModel failed with '"+binding.ErrorCodeToErrorMessage(status)+"' (0x" + status.toString(16) + ")";
|
||||
}
|
||||
|
||||
this._impl = impl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get beam width value used by the model. If :js:func:Model.setBeamWidth was
|
||||
* not called before, will return the default value loaded from the model file.
|
||||
*
|
||||
* @return {number} Beam width value used by the model.
|
||||
*/
|
||||
Model.prototype.beamWidth = function() {
|
||||
return binding.GetModelBeamWidth(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set beam width value used by the model.
|
||||
*
|
||||
* @param {number} The beam width used by the model. A larger beam width value generates better results at the cost of decoding time.
|
||||
*
|
||||
* @return {number} Zero on success, non-zero on failure.
|
||||
*/
|
||||
Model.prototype.setBeamWidth = function(aBeamWidth) {
|
||||
return binding.SetModelBeamWidth(this._impl, aBeamWidth);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the sample rate expected by the model.
|
||||
*
|
||||
* @return {number} Sample rate.
|
||||
*/
|
||||
Model.prototype.sampleRate = function() {
|
||||
return binding.GetModelSampleRate(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable decoding using an external scorer.
|
||||
*
|
||||
* @param {string} aScorerPath The path to the external scorer file.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
Model.prototype.enableExternalScorer = function(aScorerPath) {
|
||||
const status = binding.EnableExternalScorer(this._impl, aScorerPath);
|
||||
if (status !== 0) {
|
||||
throw "EnableExternalScorer failed with '"+binding.ErrorCodeToErrorMessage(status)+"' (0x" + status.toString(16) + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable decoding using an external scorer.
|
||||
*
|
||||
* @return {number} Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
Model.prototype.disableExternalScorer = function() {
|
||||
return binding.EnableExternalScorer(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set hyperparameters alpha and beta of the external scorer.
|
||||
*
|
||||
* @param {float} aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight.
|
||||
* @param {float} aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight.
|
||||
*
|
||||
* @return {number} Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
Model.prototype.setScorerAlphaBeta = function(aLMAlpha, aLMBeta) {
|
||||
return binding.SetScorerAlphaBeta(this._impl, aLMAlpha, aLMBeta);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text.
|
||||
*
|
||||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
|
||||
*
|
||||
* @return {string} The STT result. Returns undefined on error.
|
||||
*/
|
||||
Model.prototype.stt = function(aBuffer) {
|
||||
return binding.SpeechToText(this._impl, aBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text and output results including metadata.
|
||||
*
|
||||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
|
||||
* @param {number} aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return {object} :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
Model.prototype.sttWithMetadata = function(aBuffer, aNumResults) {
|
||||
aNumResults = aNumResults || 1;
|
||||
return binding.SpeechToTextWithMetadata(this._impl, aBuffer, aNumResults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object.
|
||||
*
|
||||
* @return {object} a :js:func:`Stream` object that represents the streaming state.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
Model.prototype.createStream = function() {
|
||||
const rets = binding.CreateStream(this._impl);
|
||||
const status = rets[0];
|
||||
const ctx = rets[1];
|
||||
if (status !== 0) {
|
||||
throw "CreateStream failed with '"+binding.ErrorCodeToErrorMessage(status)+"' (0x" + status.toString(16) + ")";
|
||||
}
|
||||
return new Stream(ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* @class
|
||||
* Provides an interface to a DeepSpeech stream. The constructor cannot be called
|
||||
* directly, use :js:func:`Model.createStream`.
|
||||
*/
|
||||
function Stream(nativeStream) {
|
||||
this._impl = nativeStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feed audio samples to an ongoing streaming inference.
|
||||
*
|
||||
* @param {buffer} aBuffer An array of 16-bit, mono raw audio samples at the
|
||||
* appropriate sample rate (matching what the model was trained on).
|
||||
*/
|
||||
Stream.prototype.feedAudioContent = function(aBuffer) {
|
||||
binding.FeedAudioContent(this._impl, aBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference.
|
||||
*
|
||||
* @return {string} The STT intermediate result.
|
||||
*/
|
||||
Stream.prototype.intermediateDecode = function() {
|
||||
return binding.IntermediateDecode(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata.
|
||||
*
|
||||
* @param {number} aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return {object} :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
Stream.prototype.intermediateDecodeWithMetadata = function(aNumResults) {
|
||||
aNumResults = aNumResults || 1;
|
||||
return binding.IntermediateDecodeWithMetadata(this._impl, aNumResults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @return {string} The STT result.
|
||||
*
|
||||
* This method will free the stream, it must not be used after this method is called.
|
||||
*/
|
||||
Stream.prototype.finishStream = function() {
|
||||
let result = binding.FinishStream(this._impl);
|
||||
this._impl = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @param {number} aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`.
|
||||
*
|
||||
* This method will free the stream, it must not be used after this method is called.
|
||||
*/
|
||||
Stream.prototype.finishStreamWithMetadata = function(aNumResults) {
|
||||
aNumResults = aNumResults || 1;
|
||||
let result = binding.FinishStreamWithMetadata(this._impl, aNumResults);
|
||||
this._impl = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Frees associated resources and destroys model object.
|
||||
*
|
||||
* @param {object} model A model pointer returned by :js:func:`Model`
|
||||
*
|
||||
*/
|
||||
function FreeModel(model) {
|
||||
return binding.FreeModel(model._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free memory allocated for metadata information.
|
||||
*
|
||||
* @param {object} metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Stream.finishStreamWithMetadata`
|
||||
*/
|
||||
function FreeMetadata(metadata) {
|
||||
return binding.FreeMetadata(metadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a streaming state without decoding the computed logits. This
|
||||
* can be used if you no longer need the result of an ongoing streaming
|
||||
* inference and don't want to perform a costly decode operation.
|
||||
*
|
||||
* @param {Object} stream A stream object returned by :js:func:`Model.createStream`.
|
||||
*/
|
||||
function FreeStream(stream) {
|
||||
return binding.FreeStream(stream._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print version of this library and of the linked TensorFlow library on standard output.
|
||||
*/
|
||||
function Version() {
|
||||
return binding.Version();
|
||||
}
|
||||
|
||||
|
||||
//// Metadata, CandidateTranscript and TokenMetadata are here only for documentation purposes
|
||||
|
||||
/**
|
||||
* @class
|
||||
*
|
||||
* Stores text of an individual token, along with its timing information
|
||||
*/
|
||||
function TokenMetadata() {}
|
||||
|
||||
/**
|
||||
* The text corresponding to this token
|
||||
*
|
||||
* @return {string} The text generated
|
||||
*/
|
||||
TokenMetadata.prototype.text = function() {}
|
||||
|
||||
/**
|
||||
* Position of the token in units of 20ms
|
||||
*
|
||||
* @return {int} The position of the token
|
||||
*/
|
||||
TokenMetadata.prototype.timestep = function() {};
|
||||
|
||||
/**
|
||||
* Position of the token in seconds
|
||||
*
|
||||
* @return {float} The position of the token
|
||||
*/
|
||||
TokenMetadata.prototype.start_time = function() {};
|
||||
|
||||
/**
|
||||
* @class
|
||||
*
|
||||
* A single transcript computed by the model, including a confidence value and
|
||||
* the metadata for its constituent tokens.
|
||||
*/
|
||||
function CandidateTranscript () {}
|
||||
|
||||
/**
|
||||
* Array of tokens
|
||||
*
|
||||
* @return {array} Array of :js:func:`TokenMetadata`
|
||||
*/
|
||||
CandidateTranscript.prototype.tokens = function() {}
|
||||
|
||||
/**
|
||||
* Approximated confidence value for this transcription. This is roughly the
|
||||
* sum of the acoustic model logit values for each timestep/token that
|
||||
* contributed to the creation of this transcription.
|
||||
*
|
||||
* @return {float} Confidence value
|
||||
*/
|
||||
CandidateTranscript.prototype.confidence = function() {}
|
||||
|
||||
/**
|
||||
* @class
|
||||
*
|
||||
* An array of CandidateTranscript objects computed by the model.
|
||||
*/
|
||||
function Metadata () {}
|
||||
|
||||
/**
|
||||
* Array of transcripts
|
||||
*
|
||||
* @return {array} Array of :js:func:`CandidateTranscript` objects
|
||||
*/
|
||||
Metadata.prototype.transcripts = function() {}
|
||||
|
||||
|
||||
module.exports = {
|
||||
Model: Model,
|
||||
Metadata: Metadata,
|
||||
CandidateTranscript: CandidateTranscript,
|
||||
TokenMetadata: TokenMetadata,
|
||||
Version: Version,
|
||||
FreeModel: FreeModel,
|
||||
FreeStream: FreeStream,
|
||||
FreeMetadata: FreeMetadata
|
||||
};
|
307
native_client/javascript/index.ts
Normal file
307
native_client/javascript/index.ts
Normal file
@ -0,0 +1,307 @@
|
||||
import binary from 'node-pre-gyp';
|
||||
import path from 'path';
|
||||
|
||||
// 'lib', 'binding', 'v0.1.1', ['node', 'v' + process.versions.modules, process.platform, process.arch].join('-'), 'deepspeech-bindings.node')
|
||||
const binding_path = binary.find(path.resolve(path.join(__dirname, 'package.json')));
|
||||
|
||||
// On Windows, we can't rely on RPATH being set to $ORIGIN/../ or on
|
||||
// @loader_path/../ but we can change the PATH to include the proper directory
|
||||
// for the dynamic linker
|
||||
if (process.platform === 'win32') {
|
||||
const dslib_path = path.resolve(path.join(binding_path, '../..'));
|
||||
var oldPath = process.env.PATH;
|
||||
process.env['PATH'] = `${dslib_path};${process.env.PATH}`;
|
||||
}
|
||||
|
||||
const binding = require(binding_path);
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
process.env['PATH'] = oldPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores text of an individual token, along with its timing information
|
||||
*/
|
||||
export interface TokenMetadata {
|
||||
/** The text corresponding to this token */
|
||||
text: string;
|
||||
|
||||
/** Position of the token in units of 20ms */
|
||||
timestep: number;
|
||||
|
||||
/** Position of the token in seconds */
|
||||
start_time: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single transcript computed by the model, including a confidence value and
|
||||
* the metadata for its constituent tokens.
|
||||
*/
|
||||
export interface CandidateTranscript {
|
||||
tokens: TokenMetadata[];
|
||||
|
||||
/**
|
||||
* Approximated confidence value for this transcription. This is roughly the
|
||||
* sum of the acoustic model logit values for each timestep/token that
|
||||
* contributed to the creation of this transcription.
|
||||
*/
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* An array of CandidateTranscript objects computed by the model.
|
||||
*/
|
||||
export interface Metadata {
|
||||
transcripts: CandidateTranscript[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides an interface to a DeepSpeech stream. The constructor cannot be called
|
||||
* directly, use :js:func:`Model.createStream`.
|
||||
*/
|
||||
class Stream {
|
||||
/** @internal */
|
||||
_impl: any;
|
||||
|
||||
/**
|
||||
* @param nativeStream SWIG wrapper for native StreamingState object.
|
||||
*/
|
||||
constructor(nativeStream: object) {
|
||||
this._impl = nativeStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feed audio samples to an ongoing streaming inference.
|
||||
*
|
||||
* @param aBuffer An array of 16-bit, mono raw audio samples at the
|
||||
* appropriate sample rate (matching what the model was trained on).
|
||||
*/
|
||||
feedAudioContent(aBuffer: Buffer): void {
|
||||
binding.FeedAudioContent(this._impl, aBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference.
|
||||
*
|
||||
* @return The STT intermediate result.
|
||||
*/
|
||||
intermediateDecode(): string {
|
||||
return binding.IntermediateDecode(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata.
|
||||
*
|
||||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
intermediateDecodeWithMetadata(aNumResults: number = 1): Metadata {
|
||||
return binding.IntermediateDecodeWithMetadata(this._impl, aNumResults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @return The STT result.
|
||||
*
|
||||
* This method will free the stream, it must not be used after this method is called.
|
||||
*/
|
||||
finishStream(): string {
|
||||
const result = binding.FinishStream(this._impl);
|
||||
this._impl = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
|
||||
*
|
||||
* @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`.
|
||||
*
|
||||
* This method will free the stream, it must not be used after this method is called.
|
||||
*/
|
||||
finishStreamWithMetadata(aNumResults: number = 1): Metadata {
|
||||
const result = binding.FinishStreamWithMetadata(this._impl, aNumResults);
|
||||
this._impl = null;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An object providing an interface to a trained DeepSpeech model.
|
||||
*/
|
||||
export class Model {
|
||||
/** @internal */
|
||||
_impl: any;
|
||||
|
||||
/**
|
||||
* @param aModelPath The path to the frozen model graph.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
constructor(aModelPath: string) {
|
||||
this._impl = null;
|
||||
|
||||
const [status, impl] = binding.CreateModel(aModelPath);
|
||||
if (status !== 0) {
|
||||
throw `CreateModel failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`;
|
||||
}
|
||||
|
||||
this._impl = impl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get beam width value used by the model. If :js:func:`Model.setBeamWidth` was
|
||||
* not called before, will return the default value loaded from the model file.
|
||||
*
|
||||
* @return Beam width value used by the model.
|
||||
*/
|
||||
beamWidth(): number {
|
||||
return binding.GetModelBeamWidth(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set beam width value used by the model.
|
||||
*
|
||||
* @param aBeamWidth The beam width used by the model. A larger beam width value generates better results at the cost of decoding time.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
setBeamWidth(aBeamWidth: number): void {
|
||||
const status = binding.SetModelBeamWidth(this._impl, aBeamWidth);
|
||||
if (status !== 0) {
|
||||
throw `SetModelBeamWidth failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the sample rate expected by the model.
|
||||
*
|
||||
* @return Sample rate.
|
||||
*/
|
||||
sampleRate(): number {
|
||||
return binding.GetModelSampleRate(this._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable decoding using an external scorer.
|
||||
*
|
||||
* @param aScorerPath The path to the external scorer file.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
enableExternalScorer(aScorerPath: string): void {
|
||||
const status = binding.EnableExternalScorer(this._impl, aScorerPath);
|
||||
if (status !== 0) {
|
||||
throw `EnableExternalScorer failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable decoding using an external scorer.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
disableExternalScorer(): void {
|
||||
const status = binding.DisableExternalScorer(this._impl);
|
||||
if (status !== 0) {
|
||||
throw `DisableExternalScorer failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set hyperparameters alpha and beta of the external scorer.
|
||||
*
|
||||
* @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight.
|
||||
* @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
setScorerAlphaBeta(aLMAlpha: number, aLMBeta: number): void {
|
||||
const status = binding.SetScorerAlphaBeta(this._impl, aLMAlpha, aLMBeta);
|
||||
if (status !== 0) {
|
||||
throw `SetScorerAlphaBeta failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text.
|
||||
*
|
||||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
|
||||
*
|
||||
* @return The STT result. Returns undefined on error.
|
||||
*/
|
||||
stt(aBuffer: Buffer): string {
|
||||
return binding.SpeechToText(this._impl, aBuffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text and output metadata
|
||||
* about the results.
|
||||
*
|
||||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
|
||||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this.
|
||||
* Default value is 1 if not specified.
|
||||
*
|
||||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information.
|
||||
* The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
sttWithMetadata(aBuffer: Buffer, aNumResults: number = 1): Metadata {
|
||||
return binding.SpeechToTextWithMetadata(this._impl, aBuffer, aNumResults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object.
|
||||
*
|
||||
* @return a :js:func:`Stream` object that represents the streaming state.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
createStream(): Stream {
|
||||
const [status, ctx] = binding.CreateStream(this._impl);
|
||||
if (status !== 0) {
|
||||
throw `CreateStream failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`;
|
||||
}
|
||||
return new Stream(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees associated resources and destroys model object.
|
||||
*
|
||||
* @param model A model pointer returned by :js:func:`Model`
|
||||
*
|
||||
*/
|
||||
export function FreeModel(model: Model): void {
|
||||
binding.FreeModel(model._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free memory allocated for metadata information.
|
||||
*
|
||||
* @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Stream.finishStreamWithMetadata`
|
||||
*/
|
||||
export function FreeMetadata(metadata: Metadata): void {
|
||||
binding.FreeMetadata(metadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a streaming state without decoding the computed logits. This
|
||||
* can be used if you no longer need the result of an ongoing streaming
|
||||
* inference and don't want to perform a costly decode operation.
|
||||
*
|
||||
* @param stream A streaming state pointer returned by :js:func:`Model.createStream`.
|
||||
*/
|
||||
export function FreeStream(stream: Stream): void {
|
||||
binding.FreeStream(stream._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the version of this library. The returned version is a semantic
|
||||
* version (SemVer 2.0.0).
|
||||
*/
|
||||
export function Version(): string {
|
||||
return binding.Version();
|
||||
}
|
3
native_client/javascript/node-pre-gyp.d.ts
vendored
Normal file
3
native_client/javascript/node-pre-gyp.d.ts
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
declare module "node-pre-gyp" {
|
||||
export function find(path: String): any;
|
||||
}
|
@ -41,7 +41,8 @@
|
||||
"devDependencies": {
|
||||
"electron": "^1.7.9",
|
||||
"node-gyp": "4.x - 5.x",
|
||||
"typescript": "3.6.x",
|
||||
"typescript": "3.8.x",
|
||||
"typedoc": "0.17.x",
|
||||
"@types/argparse": "1.0.x",
|
||||
"@types/node": "13.9.x"
|
||||
},
|
||||
|
@ -9,10 +9,14 @@
|
||||
"noImplicitThis": true,
|
||||
"strictFunctionTypes": true,
|
||||
"strictNullChecks": true,
|
||||
"forceConsistentCasingInFileNames": true
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"alwaysStrict": true,
|
||||
"declaration": true,
|
||||
"stripInternal": true
|
||||
},
|
||||
"files": [
|
||||
"index.d.ts",
|
||||
"client.ts"
|
||||
"index.ts",
|
||||
"client.ts",
|
||||
"node-pre-gyp.d.ts"
|
||||
]
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
breathe==4.14.2
|
||||
semver==2.8.1
|
||||
sphinx==2.4.4
|
||||
sphinx-js==2.8
|
||||
#FIXME: switch back to upstream sphinx-js when https://github.com/mozilla/sphinx-js/pull/135 is merged or the issue is fixed otherwise
|
||||
git+git://github.com/reuben/sphinx-js.git@a24775935443d21028ee4a7025a407c78030c4e7#egg=sphinx-js
|
||||
sphinx-rtd-theme==0.4.3
|
||||
pygments==2.6.1
|
||||
|
Loading…
Reference in New Issue
Block a user