diff --git a/.gitignore b/.gitignore index fe04f7c8..464c21dd 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ /native_client/python/model_wrap.cpp /native_client/python/utils_wrap.cpp /native_client/javascript/build +/native_client/javascript/client.js /native_client/javascript/deepspeech_wrap.cxx /doc/.build/ /doc/xml-c/ diff --git a/doc/NodeJS-API.rst b/doc/NodeJS-API.rst index b6170b5b..7593150c 100644 --- a/doc/NodeJS-API.rst +++ b/doc/NodeJS-API.rst @@ -1,6 +1,8 @@ JavaScript (NodeJS / ElectronJS) ================================ +Support for TypeScript is :download:`provided in index.d.ts<../native_client/javascript/index.d.ts>` + Model ----- diff --git a/doc/NodeJS-Examples.rst b/doc/NodeJS-Examples.rst index 7fe4d3b4..83b37870 100644 --- a/doc/NodeJS-Examples.rst +++ b/doc/NodeJS-Examples.rst @@ -4,20 +4,20 @@ JavaScript API Usage example Creating a model instance and loading model ------------------------------------------- -.. literalinclude:: ../native_client/javascript/client.js +.. literalinclude:: ../native_client/javascript/client.ts :language: javascript :linenos: - :lines: 56,69 + :lines: 49,54 Performing inference -------------------- -.. literalinclude:: ../native_client/javascript/client.js +.. literalinclude:: ../native_client/javascript/client.ts :language: javascript :linenos: - :lines: 122 + :lines: 114,118 Full source code ---------------- -See :download:`Full source code<../native_client/javascript/client.js>`. +See :download:`Full source code<../native_client/javascript/client.ts>`. diff --git a/doc/USING.rst b/doc/USING.rst index 55631fe8..2e357af1 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -125,6 +125,8 @@ Please note that as of now, we support: - Node.JS versions 4 to 13. - Electron.JS versions 1.6 to 7.1 +TypeScript support is also provided. + Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows: .. code-block:: bash @@ -133,7 +135,7 @@ Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can in See the `release notes `_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_. -See :github:`client.js ` for an example of how to use the bindings. +See :github:`client.ts ` for an example of how to use the bindings. Using the Command-Line client ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/index.rst b/doc/index.rst index a905d903..eee1977a 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,6 +14,8 @@ Welcome to DeepSpeech's documentation! TRAINING + Decoder + .. toctree:: :maxdepth: 2 :caption: DeepSpeech Model diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index 5c1db09a..0581d648 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -1,26 +1,37 @@ NODE_BUILD_TOOL ?= node-pre-gyp NODE_ABI_TARGET ?= NODE_BUILD_VERBOSE ?= --verbose +NPM_TOOL ?= npm PROJECT_NAME ?= deepspeech PROJECT_VERSION ?= $(shell cat ../../VERSION | tr -d '\n') +NPM_ROOT ?= $(shell npm root) + +NODE_MODULES_BIN ?= $(NPM_ROOT)/.bin/ +ifeq ($(findstring _NT,$(OS)),_NT) +# On Windows, we seem to need both in PATH for node-pre-gyp as well as tsc +# they do not get installed the same way. +NODE_MODULES_BIN := $(shell cygpath -u $(NPM_ROOT)/.bin/):$(shell cygpath -u `dirname "$(NPM_ROOT)"`) +endif include ../definitions.mk -ifeq ($(TARGET),host-win) +ifeq ($(findstring _NT,$(OS)),_NT) LIBS := '$(shell cygpath -w $(subst .lib,,$(LIBS)))' endif +.PHONY: npm-dev + default: build clean: - rm -f deepspeech_wrap.cxx package.json + rm -f deepspeech_wrap.cxx package.json package-lock.json rm -rf ./build/ clean-npm-pack: - rm -fr ./node_modules/ rm -fr ./deepspeech-*.tgz really-clean: clean clean-npm-pack + rm -fr ./node_modules/ rm -fr ./lib/ package.json: package.json.in @@ -29,21 +40,27 @@ package.json: package.json.in -e 's/$$(PROJECT_VERSION)/$(PROJECT_VERSION)/' \ package.json.in > package.json && cat package.json -configure: deepspeech_wrap.cxx package.json - $(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE) +npm-dev: package.json +ifeq ($(findstring _NT,$(OS)),_NT) + # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection + $(NPM_TOOL) install node-gyp@4.x +endif + $(NPM_TOOL) install --prefix=$(NPM_ROOT)/../ --ignore-scripts --force --verbose --production=false . + +configure: deepspeech_wrap.cxx package.json npm-dev + PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE) build: configure deepspeech_wrap.cxx - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) rebuild $(NODE_BUILD_VERBOSE) + PATH="$(NODE_MODULES_BIN):${PATH}" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE) copy-deps: build $(call copy_missing_libs,lib/binding/*/*/*/deepspeech.node,lib/binding/*/*/) node-wrapper: copy-deps build - $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) package $(NODE_BUILD_VERBOSE) + PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) --no-color package $(NODE_BUILD_VERBOSE) -npm-pack: clean package.json index.js - npm install node-pre-gyp@0.14.x - npm pack $(NODE_BUILD_VERBOSE) +npm-pack: clean package.json index.js npm-dev + PATH="$(NODE_MODULES_BIN):${PATH}" tsc && $(NPM_TOOL) pack $(NODE_BUILD_VERBOSE) deepspeech_wrap.cxx: deepspeech.i swig -version diff --git a/native_client/javascript/README.md b/native_client/javascript/README.md index 89ebc594..267fbeba 100644 --- a/native_client/javascript/README.md +++ b/native_client/javascript/README.md @@ -1 +1,18 @@ Full project description and documentation on GitHub: [https://github.com/mozilla/DeepSpeech](https://github.com/mozilla/DeepSpeech). + +## Generating TypeScript Type Definitions + +You can generate the TypeScript type declaration file using `dts-gen`. +This requires a compiled/installed version of the DeepSpeech NodeJS client. + +Upon API change, it is required to generate a new `index.d.ts` type declaration +file, you have to run: + +```sh +npm install -g dts-gen +dts-gen --module deepspeech --file index.d.ts +``` + +### Example usage + +See `client.ts` diff --git a/native_client/javascript/client.js b/native_client/javascript/client.ts similarity index 59% rename from native_client/javascript/client.js rename to native_client/javascript/client.ts index 16dd19e8..e6000fe5 100644 --- a/native_client/javascript/client.js +++ b/native_client/javascript/client.ts @@ -1,48 +1,42 @@ #!/usr/bin/env node -'use strict'; -const Fs = require('fs'); -const Sox = require('sox-stream'); -const Ds = require('./index.js'); -const argparse = require('argparse'); -const MemoryStream = require('memory-stream'); -const Wav = require('node-wav'); -const Duplex = require('stream').Duplex; -const util = require('util'); +// This is required for process.versions.electron below +/// -var VersionAction = function VersionAction(options) { - options = options || {}; - options.nargs = 0; - argparse.Action.call(this, options); -} -util.inherits(VersionAction, argparse.Action); +import Ds from "./index"; +import * as Fs from "fs"; +import Sox from "sox-stream"; +import * as argparse from "argparse"; -VersionAction.prototype.call = function(parser) { - console.log('DeepSpeech ' + Ds.Version()); - let runtime = 'Node'; - if (process.versions.electron) { - runtime = 'Electron'; +const MemoryStream = require("memory-stream"); +const Wav = require("node-wav"); +const Duplex = require("stream").Duplex; + +class VersionAction extends argparse.Action { + call(parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: string | string[], optionString: string | null) { + console.log('DeepSpeech ' + Ds.Version()); + let runtime = 'Node'; + if (process.versions.electron) { + runtime = 'Electron'; + } + console.error('Runtime: ' + runtime); + process.exit(0); } - console.error('Runtime: ' + runtime); - process.exit(0); } -var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); +let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'}); parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'}); -parser.addArgument(['--beam_width'], {help: 'Beam width for the CTC decoder', type: 'int'}); -parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha). If not specified, use default from the scorer package.', type: 'float'}); -parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta). If not specified, use default from the scorer package.', type: 'float'}); -parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}); +parser.addArgument(['--version'], {action: VersionAction, nargs: 0, help: 'Print version and exits'}); parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'}); -var args = parser.parseArgs(); +let args = parser.parseArgs(); -function totalTime(hrtimeValue) { +function totalTime(hrtimeValue: number[]): string { return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); } -function candidateTranscriptToString(transcript) { +function candidateTranscriptToString(transcript: Ds.CandidateTranscript): string { var retval = "" for (var i = 0; i < transcript.tokens.length; ++i) { retval += transcript.tokens[i].text; @@ -52,7 +46,7 @@ function candidateTranscriptToString(transcript) { console.error('Loading model from file %s', args['model']); const model_load_start = process.hrtime(); -var model = new Ds.Model(args['model']); +let model = new Ds.Model(args['model']); const model_load_end = process.hrtime(model_load_start); console.error('Loaded model in %ds.', totalTime(model_load_end)); @@ -60,7 +54,7 @@ if (args['beam_width']) { model.setBeamWidth(args['beam_width']); } -var desired_sample_rate = model.sampleRate(); +let desired_sample_rate = model.sampleRate(); if (args['scorer']) { console.error('Loading scorer from file %s', args['scorer']); @@ -78,23 +72,24 @@ const buffer = Fs.readFileSync(args['audio']); const result = Wav.decode(buffer); if (result.sampleRate < desired_sample_rate) { - console.error('Warning: original sample rate (' + result.sampleRate + ') ' + - 'is lower than ' + desired_sample_rate + 'Hz. ' + - 'Up-sampling might produce erratic speech recognition.'); + console.error(`Warning: original sample rate ( ${result.sampleRate})` + + `is lower than ${desired_sample_rate} Hz. ` + + `Up-sampling might produce erratic speech recognition.`); } -function bufferToStream(buffer) { +function bufferToStream(buffer: Buffer) { var stream = new Duplex(); stream.push(buffer); stream.push(null); return stream; } -var audioStream = new MemoryStream(); +let audioStream = new MemoryStream(); bufferToStream(buffer). pipe(Sox({ global: { 'no-dither': true, + 'replay-gain': 'off', }, output: { bits: 16, diff --git a/native_client/javascript/index.d.ts b/native_client/javascript/index.d.ts new file mode 100644 index 00000000..fe9ab459 --- /dev/null +++ b/native_client/javascript/index.d.ts @@ -0,0 +1,196 @@ +/** + * Stores text of an individual token, along with its timing information + */ +export interface TokenMetadata { + text: string; + timestep: number; + start_time: number; +} + +/** + * A single transcript computed by the model, including a confidence value and + * the metadata for its constituent tokens. + */ +export interface CandidateTranscript { + tokens: TokenMetadata[]; + confidence: number; +} + +/** + * An array of CandidateTranscript objects computed by the model. + */ +export interface Metadata { + transcripts: CandidateTranscript[]; +} + +/** + * An object providing an interface to a trained DeepSpeech model. + * + * @param aModelPath The path to the frozen model graph. + * + * @throws on error + */ +export class Model { +constructor(aModelPath: string) + +/** + * Get beam width value used by the model. If :js:func:Model.setBeamWidth was + * not called before, will return the default value loaded from the model file. + * + * @return Beam width value used by the model. + */ +beamWidth(): number; + +/** + * Set beam width value used by the model. + * + * @param The beam width used by the model. A larger beam width value generates better results at the cost of decoding time. + * + * @return Zero on success, non-zero on failure. + */ +setBeamWidth(aBeamWidth: number): number; + +/** + * Return the sample rate expected by the model. + * + * @return Sample rate. + */ +sampleRate(): number; + +/** + * Enable decoding using an external scorer. + * + * @param aScorerPath The path to the external scorer file. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +enableExternalScorer(aScorerPath: string): number; + +/** + * Disable decoding using an external scorer. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +disableExternalScorer(): number; + +/** + * Set hyperparameters alpha and beta of the external scorer. + * + * @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight. + * @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +setScorerAlphaBeta(aLMAlpha: number, aLMBeta: number): number; + +/** + * Use the DeepSpeech model to perform Speech-To-Text. + * + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + * + * @return The STT result. Returns undefined on error. + */ +stt(aBuffer: object): string; + +/** + * Use the DeepSpeech model to perform Speech-To-Text and output metadata + * about the results. + * + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + * @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. + * Default value is 1 if not specified. + * + * @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. + * The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. + */ +sttWithMetadata(aBuffer: object, aNumResults: number): Metadata; + +/** + * Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. + * + * @return a :js:func:`Stream` object that represents the streaming state. + * + * @throws on error + */ +createStream(): object; +} + +/** + * @class + * Provides an interface to a DeepSpeech stream. The constructor cannot be called + * directly, use :js:func:`Model.createStream`. + */ +declare class Stream { +/** + * Feed audio samples to an ongoing streaming inference. + * + * @param aBuffer An array of 16-bit, mono raw audio samples at the + * appropriate sample rate (matching what the model was trained on). + */ +feedAudioContent(aBuffer: object): void; + +/** + * Compute the intermediate decoding of an ongoing streaming inference. + * + * @return The STT intermediate result. + */ +intermediateDecode(aSctx: object): string; + +/** + * Compute the intermediate decoding of an ongoing streaming inference, return results including metadata. + * + * @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. + * + * @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. + */ +intermediateDecodeWithMetadata (aNumResults: number): Metadata; + +/** + * Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference. + * + * @return The STT result. + * + * This method will free the stream, it must not be used after this method is called. + */ +finishStream(): string; + +/** + * Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference. + * + * @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. + * + * @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. + * + * This method will free the stream, it must not be used after this method is called. + */ +finishStreamWithMetadata(aNumResults: number): Metadata; +} + +/** + * Frees associated resources and destroys model object. + * + * @param model A model pointer returned by :js:func:`Model` + * + */ +export function FreeModel(model: Model): void; + +/** + * Free memory allocated for metadata information. + * + * @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Model.finishStreamWithMetadata` + */ +export function FreeMetadata(metadata: Metadata): void; + +/** + * Destroy a streaming state without decoding the computed logits. This + * can be used if you no longer need the result of an ongoing streaming + * inference and don't want to perform a costly decode operation. + * + * @param stream A streaming state pointer returned by :js:func:`Model.createStream`. + */ +export function FreeStream(stream: object): void; + +/** + * Print version of this library and of the linked TensorFlow library on standard output. + */ +export function Version(): void; diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index bd80f438..22c3363c 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -2,7 +2,8 @@ "name" : "$(PROJECT_NAME)", "version" : "$(PROJECT_VERSION)", "description" : "DeepSpeech NodeJS bindings", - "main" : "./index", + "main" : "./index.js", + "types": "./index.d.ts", "bin": { "deepspeech": "./client.js" }, @@ -13,6 +14,7 @@ "README.md", "client.js", "index.js", + "index.d.ts", "lib/*" ], "bugs": { @@ -37,6 +39,11 @@ "node-wav": "0.0.2" }, "devDependencies": { + "electron": "^1.7.9", + "node-gyp": "4.x - 5.x", + "typescript": "3.6.x", + "@types/argparse": "1.0.x", + "@types/node": "13.9.x" }, "scripts": { "test": "node index.js" diff --git a/native_client/javascript/tsconfig.json b/native_client/javascript/tsconfig.json new file mode 100644 index 00000000..d67d7bce --- /dev/null +++ b/native_client/javascript/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "baseUrl": ".", + "target": "es6", + "module": "commonjs", + "moduleResolution": "node", + "esModuleInterop": true, + "noImplicitAny": true, + "noImplicitThis": true, + "strictFunctionTypes": true, + "strictNullChecks": true, + "forceConsistentCasingInFileNames": true + }, + "files": [ + "index.d.ts", + "client.ts" + ] +} diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 0a9b633b..26234a4f 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -105,16 +105,10 @@ do_deepspeech_nodejs_build() # Python 2.7 is required for node-pre-gyp, it is only required to force it on # Windows if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then - NPM_ROOT=$(cygpath -u "$(npm root)") - PYTHON27=":/c/Python27" - # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection - npm install node-gyp@4.x node-pre-gyp - else - NPM_ROOT="$(npm root)" - npm install node-gyp@5.x node-pre-gyp + PYTHON27="/c/Python27" fi - export PATH="$NPM_ROOT/.bin/${PYTHON27}:$PATH" + export PATH="${PYTHON27}:$PATH" for node in ${SUPPORTED_NODEJS_VERSIONS}; do EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" make -C native_client/javascript \ @@ -157,16 +151,10 @@ do_deepspeech_npm_package() # Python 2.7 is required for node-pre-gyp, it is only required to force it on # Windows if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then - NPM_ROOT=$(cygpath -u "$(npm root)") - PYTHON27=":/c/Python27" - # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection - npm install node-gyp@4.x node-pre-gyp - else - NPM_ROOT="$(npm root)" - npm install node-gyp@5.x node-pre-gyp + PYTHON27="/c/Python27" fi - export PATH="$NPM_ROOT/.bin/$PYTHON27:$PATH" + export PATH="${NPM_BIN}${PYTHON27}:$PATH" all_tasks="$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')"