Merge pull request #2882 from lissyx/PR2876

Pr2876
2020-04-06 23:52:45 +02:00 · 2020-04-06 23:52:45 +02:00 · b5a805056f
commit b5a805056f
parent 510e29fe65 88ac227ebe
12 changed files with 315 additions and 70 deletions
--- a/.gitignore
+++ b/.gitignore
@ -19,6 +19,7 @@
 /native_client/python/model_wrap.cpp
 /native_client/python/utils_wrap.cpp
 /native_client/javascript/build
 /native_client/javascript/client.js
 /native_client/javascript/deepspeech_wrap.cxx
 /doc/.build/
 /doc/xml-c/
--- a/doc/NodeJS-API.rst
+++ b/doc/NodeJS-API.rst
@ -1,6 +1,8 @@
 JavaScript (NodeJS / ElectronJS)
 ================================
 Support for TypeScript is :download:`provided in index.d.ts<../native_client/javascript/index.d.ts>`
 Model
 -----
--- a/doc/NodeJS-Examples.rst
+++ b/doc/NodeJS-Examples.rst
@ -4,20 +4,20 @@ JavaScript API Usage example
 Creating a model instance and loading model
 -------------------------------------------
-.. literalinclude:: ../native_client/javascript/client.js
+.. literalinclude:: ../native_client/javascript/client.ts
   :language: javascript
   :linenos:
-   :lines: 56,69
+   :lines: 49,54
 Performing inference
 --------------------
-.. literalinclude:: ../native_client/javascript/client.js
+.. literalinclude:: ../native_client/javascript/client.ts
   :language: javascript
   :linenos:
-   :lines: 122
+   :lines: 114,118
 Full source code
 ----------------
-See :download:`Full source code<../native_client/javascript/client.js>`.
+See :download:`Full source code<../native_client/javascript/client.ts>`.
--- a/doc/USING.rst
+++ b/doc/USING.rst
@ -125,6 +125,8 @@ Please note that as of now, we support:
 - Node.JS versions 4 to 13.
 - Electron.JS versions 1.6 to 7.1
 TypeScript support is also provided.
 Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows:
 .. code-block:: bash
@ -133,7 +135,7 @@ Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can in
 See the `release notes <https://github.com/mozilla/DeepSpeech/releases>`_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_.
-See :github:`client.js <native_client/javascript/client.js>` for an example of how to use the bindings.
+See :github:`client.ts <native_client/javascript/client.ts>` for an example of how to use the bindings.
 Using the Command-Line client
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/doc/index.rst
+++ b/doc/index.rst
@ -14,6 +14,8 @@ Welcome to DeepSpeech's documentation!
   TRAINING
   Decoder
 .. toctree::
   :maxdepth: 2
   :caption: DeepSpeech Model
--- a/native_client/javascript/Makefile
+++ b/native_client/javascript/Makefile
@ -1,26 +1,37 @@
 NODE_BUILD_TOOL    ?= node-pre-gyp
 NODE_ABI_TARGET    ?= 
 NODE_BUILD_VERBOSE ?= --verbose
 NPM_TOOL           ?= npm
 PROJECT_NAME       ?= deepspeech
 PROJECT_VERSION    ?= $(shell cat ../../VERSION | tr -d '\n')
 NPM_ROOT           ?= $(shell npm root)
 NODE_MODULES_BIN   ?= $(NPM_ROOT)/.bin/
 ifeq ($(findstring _NT,$(OS)),_NT)
 # On Windows, we seem to need both in PATH for node-pre-gyp as well as tsc
 # they do not get installed the same way.
 NODE_MODULES_BIN   := $(shell cygpath -u $(NPM_ROOT)/.bin/):$(shell cygpath -u `dirname "$(NPM_ROOT)"`)
 endif
 include ../definitions.mk
-ifeq ($(TARGET),host-win)
+ifeq ($(findstring _NT,$(OS)),_NT)
 LIBS := '$(shell cygpath -w $(subst .lib,,$(LIBS)))'
 endif
 .PHONY: npm-dev
 default: build
 clean:
-	rm -f deepspeech_wrap.cxx package.json
+	rm -f deepspeech_wrap.cxx package.json package-lock.json
 	rm -rf ./build/
 clean-npm-pack:
 	rm -fr ./node_modules/
 	rm -fr ./deepspeech-*.tgz
 really-clean: clean clean-npm-pack
 	rm -fr ./node_modules/
 	rm -fr ./lib/
 package.json: package.json.in
@ -29,21 +40,27 @@ package.json: package.json.in
 		-e 's/$$(PROJECT_VERSION)/$(PROJECT_VERSION)/' \
 		package.json.in > package.json && cat package.json
-configure: deepspeech_wrap.cxx package.json
+npm-dev: package.json
-	$(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE)
+ifeq ($(findstring _NT,$(OS)),_NT)
 	# node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection
 	$(NPM_TOOL) install node-gyp@4.x
 endif
 	$(NPM_TOOL) install --prefix=$(NPM_ROOT)/../ --ignore-scripts --force --verbose --production=false .
 configure: deepspeech_wrap.cxx package.json npm-dev
 	PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE)
 build: configure deepspeech_wrap.cxx
-	AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) rebuild $(NODE_BUILD_VERBOSE)
+	PATH="$(NODE_MODULES_BIN):${PATH}" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE)
 copy-deps: build
 	$(call copy_missing_libs,lib/binding/*/*/*/deepspeech.node,lib/binding/*/*/)
 node-wrapper: copy-deps build
-	$(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) package $(NODE_BUILD_VERBOSE)
+	PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DIST_URL) --no-color package $(NODE_BUILD_VERBOSE)
-npm-pack: clean package.json index.js
+npm-pack: clean package.json index.js npm-dev
-	npm install node-pre-gyp@0.14.x
+	PATH="$(NODE_MODULES_BIN):${PATH}" tsc && $(NPM_TOOL) pack $(NODE_BUILD_VERBOSE)
 	npm pack $(NODE_BUILD_VERBOSE)
 deepspeech_wrap.cxx: deepspeech.i
 	swig -version
--- a/native_client/javascript/README.md
+++ b/native_client/javascript/README.md
@ -1 +1,18 @@
 Full project description and documentation on GitHub: [https://github.com/mozilla/DeepSpeech](https://github.com/mozilla/DeepSpeech).
 ## Generating TypeScript Type Definitions
 You can generate the TypeScript type declaration file using `dts-gen`.
 This requires a compiled/installed version of the DeepSpeech NodeJS client.
 Upon API change, it is required to generate a new `index.d.ts` type declaration
 file, you have to run:
 ```sh
 npm install -g dts-gen
 dts-gen --module deepspeech --file index.d.ts
 ```
 ### Example usage
 See `client.ts`
--- a/native_client/javascript/client.ts
+++ b/native_client/javascript/client.ts
@ -1,48 +1,42 @@
 #!/usr/bin/env node
 'use strict';
-const Fs = require('fs');
+// This is required for process.versions.electron below
-const Sox = require('sox-stream');
+/// <reference types="electron" />
 const Ds = require('./index.js');
 const argparse = require('argparse');
 const MemoryStream = require('memory-stream');
 const Wav = require('node-wav');
 const Duplex = require('stream').Duplex;
 const util = require('util');
-var VersionAction = function VersionAction(options) {
+import Ds from "./index";
-  options = options || {};
+import * as Fs from "fs";
-  options.nargs = 0;
+import Sox from "sox-stream";
-  argparse.Action.call(this, options);
+import * as argparse from "argparse";
 }
 util.inherits(VersionAction, argparse.Action);
-VersionAction.prototype.call = function(parser) {
+const MemoryStream = require("memory-stream");
-  console.log('DeepSpeech ' + Ds.Version());
+const Wav = require("node-wav");
-  let runtime = 'Node';
+const Duplex = require("stream").Duplex;
-  if (process.versions.electron) {
+
-    runtime = 'Electron';
+class VersionAction extends argparse.Action {
  call(parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: string | string[], optionString: string | null) {
    console.log('DeepSpeech ' + Ds.Version());
    let runtime = 'Node';
    if (process.versions.electron) {
      runtime = 'Electron';
    }
    console.error('Runtime: ' + runtime);
    process.exit(0);
  }
  console.error('Runtime: ' + runtime);
  process.exit(0);
 }
-var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
+let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
 parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
 parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'});
 parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
-parser.addArgument(['--beam_width'], {help: 'Beam width for the CTC decoder', type: 'int'});
+parser.addArgument(['--version'], {action: VersionAction, nargs: 0, help: 'Print version and exits'});
 parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha). If not specified, use default from the scorer package.', type: 'float'});
 parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta). If not specified, use default from the scorer package.', type: 'float'});
 parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'});
 parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'});
-var args = parser.parseArgs();
+let args = parser.parseArgs();
-function totalTime(hrtimeValue) {
+function totalTime(hrtimeValue: number[]): string {
  return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
 }
-function candidateTranscriptToString(transcript) {
+function candidateTranscriptToString(transcript: Ds.CandidateTranscript): string {
  var retval = ""
  for (var i = 0; i < transcript.tokens.length; ++i) {
    retval += transcript.tokens[i].text;
@ -52,7 +46,7 @@ function candidateTranscriptToString(transcript) {
 console.error('Loading model from file %s', args['model']);
 const model_load_start = process.hrtime();
-var model = new Ds.Model(args['model']);
+let model = new Ds.Model(args['model']);
 const model_load_end = process.hrtime(model_load_start);
 console.error('Loaded model in %ds.', totalTime(model_load_end));
@ -60,7 +54,7 @@ if (args['beam_width']) {
  model.setBeamWidth(args['beam_width']);
 }
-var desired_sample_rate = model.sampleRate();
+let desired_sample_rate = model.sampleRate();
 if (args['scorer']) {
  console.error('Loading scorer from file %s', args['scorer']);
@ -78,23 +72,24 @@ const buffer = Fs.readFileSync(args['audio']);
 const result = Wav.decode(buffer);
 if (result.sampleRate < desired_sample_rate) {
-  console.error('Warning: original sample rate (' + result.sampleRate + ') ' +
+  console.error(`Warning: original sample rate ( ${result.sampleRate})` +
-                'is lower than ' + desired_sample_rate + 'Hz. ' +
+                `is lower than ${desired_sample_rate} Hz. ` +
-                'Up-sampling might produce erratic speech recognition.');
+                `Up-sampling might produce erratic speech recognition.`);
 }
-function bufferToStream(buffer) {
+function bufferToStream(buffer: Buffer) {
  var stream = new Duplex();
  stream.push(buffer);
  stream.push(null);
  return stream;
 }
-var audioStream = new MemoryStream();
+let audioStream = new MemoryStream();
 bufferToStream(buffer).
  pipe(Sox({
    global: {
      'no-dither': true,
      'replay-gain': 'off',
    },
    output: {
      bits: 16,
--- a/native_client/javascript/index.d.ts
+++ b/native_client/javascript/index.d.ts
@ -0,0 +1,196 @@
 /**
 * Stores text of an individual token, along with its timing information
 */
 export interface TokenMetadata {
    text: string;
    timestep: number;
    start_time: number;
 }
 /**
 * A single transcript computed by the model, including a confidence value and
 * the metadata for its constituent tokens.
 */
 export interface CandidateTranscript {
    tokens: TokenMetadata[];
    confidence: number;
 }
 /**
 * An array of CandidateTranscript objects computed by the model.
 */
 export interface Metadata {
    transcripts: CandidateTranscript[];
 }
 /**
 * An object providing an interface to a trained DeepSpeech model.
 *
 * @param aModelPath The path to the frozen model graph.
 *
 * @throws on error
 */
 export class Model {
 constructor(aModelPath: string)
 /**
 * Get beam width value used by the model. If :js:func:Model.setBeamWidth was
 * not called before, will return the default value loaded from the model file.
 * 
 * @return Beam width value used by the model.
 */
 beamWidth(): number;
 /**
 * Set beam width value used by the model.
 * 
 * @param The beam width used by the model. A larger beam width value generates better results at the cost of decoding time.
 *
 * @return Zero on success, non-zero on failure.
 */
 setBeamWidth(aBeamWidth: number): number;
 /**
 * Return the sample rate expected by the model.
 *
 * @return Sample rate.
 */
 sampleRate(): number;
 /**
 * Enable decoding using an external scorer.
 *
 * @param aScorerPath The path to the external scorer file.
 *
 * @return Zero on success, non-zero on failure (invalid arguments).
 */
 enableExternalScorer(aScorerPath: string): number;
 /**
 * Disable decoding using an external scorer.
 *
 * @return Zero on success, non-zero on failure (invalid arguments).
 */
 disableExternalScorer(): number;
 /**
 * Set hyperparameters alpha and beta of the external scorer.
 *
 * @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight.
 * @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight.
 *
 * @return Zero on success, non-zero on failure (invalid arguments).
 */
 setScorerAlphaBeta(aLMAlpha: number, aLMBeta: number): number;
 /**
 * Use the DeepSpeech model to perform Speech-To-Text.
 *
 * @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
 *
 * @return The STT result. Returns undefined on error.
 */
 stt(aBuffer: object): string;
 /**
 * Use the DeepSpeech model to perform Speech-To-Text and output metadata
 * about the results.
 *
 * @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
 * @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this.
 * Default value is 1 if not specified.
 *
 * @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information.
 * The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
 */
 sttWithMetadata(aBuffer: object, aNumResults: number): Metadata;
 /**
 * Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object.
 *
 * @return a :js:func:`Stream` object that represents the streaming state.
 *
 * @throws on error
 */
 createStream(): object;
 }
 /**
 * @class
 * Provides an interface to a DeepSpeech stream. The constructor cannot be called
 * directly, use :js:func:`Model.createStream`.
 */
 declare class Stream {
 /**
 * Feed audio samples to an ongoing streaming inference.
 *
 * @param aBuffer An array of 16-bit, mono raw audio samples at the
 *                 appropriate sample rate (matching what the model was trained on).
 */
 feedAudioContent(aBuffer: object): void;
 /**
 * Compute the intermediate decoding of an ongoing streaming inference.
 *
 * @return The STT intermediate result.
 */
 intermediateDecode(aSctx: object): string;
 /**
 * Compute the intermediate decoding of an ongoing streaming inference, return results including metadata.
 *
 * @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
 *
 * @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
 */
 intermediateDecodeWithMetadata (aNumResults: number): Metadata;
 /**
 * Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference.
 *
 * @return The STT result.
 *
 * This method will free the stream, it must not be used after this method is called.
 */
 finishStream(): string;
 /**
 * Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference.
 *
 * @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified.
 *
 * @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`.
 *
 * This method will free the stream, it must not be used after this method is called.
 */
 finishStreamWithMetadata(aNumResults: number): Metadata;
 }
 /**
 * Frees associated resources and destroys model object.
 *
 * @param model A model pointer returned by :js:func:`Model`
 *
 */
 export function FreeModel(model: Model): void;
 /**
 * Free memory allocated for metadata information.
 *
 * @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Model.finishStreamWithMetadata`
 */
 export function FreeMetadata(metadata: Metadata): void;
 /**
 * Destroy a streaming state without decoding the computed logits. This
 * can be used if you no longer need the result of an ongoing streaming
 * inference and don't want to perform a costly decode operation.
 *
 * @param stream A streaming state pointer returned by :js:func:`Model.createStream`.
 */
 export function FreeStream(stream: object): void;
 /**
 * Print version of this library and of the linked TensorFlow library on standard output.
 */
 export function Version(): void;
--- a/native_client/javascript/package.json.in
+++ b/native_client/javascript/package.json.in
@ -2,7 +2,8 @@
    "name" : "$(PROJECT_NAME)",
    "version" : "$(PROJECT_VERSION)",
    "description" : "DeepSpeech NodeJS bindings",
-    "main" : "./index",
+    "main" : "./index.js",
    "types": "./index.d.ts",
    "bin": {
       "deepspeech": "./client.js"
    },
@ -13,6 +14,7 @@
       "README.md",
       "client.js",
       "index.js",
       "index.d.ts",
       "lib/*"
    ],
    "bugs": {
@ -37,6 +39,11 @@
      "node-wav": "0.0.2"
    },
    "devDependencies": {
      "electron": "^1.7.9",
      "node-gyp": "4.x - 5.x",
      "typescript": "3.6.x",
      "@types/argparse": "1.0.x",
      "@types/node": "13.9.x"
    },
    "scripts": {
        "test": "node index.js"
--- a/native_client/javascript/tsconfig.json
+++ b/native_client/javascript/tsconfig.json
@ -0,0 +1,18 @@
 {
    "compilerOptions": {
        "baseUrl": ".",
        "target": "es6",
        "module": "commonjs",
        "moduleResolution": "node",
        "esModuleInterop": true,
        "noImplicitAny": true,
        "noImplicitThis": true,
        "strictFunctionTypes": true,
        "strictNullChecks": true,
        "forceConsistentCasingInFileNames": true
    },
    "files": [
        "index.d.ts",
        "client.ts"
    ]
 }
--- a/taskcluster/tc-build-utils.sh
+++ b/taskcluster/tc-build-utils.sh
@ -105,16 +105,10 @@ do_deepspeech_nodejs_build()
  # Python 2.7 is required for node-pre-gyp, it is only required to force it on
  # Windows
  if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
-    NPM_ROOT=$(cygpath -u "$(npm root)")
+    PYTHON27="/c/Python27"
    PYTHON27=":/c/Python27"
    # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection
    npm install node-gyp@4.x node-pre-gyp
  else
    NPM_ROOT="$(npm root)"
    npm install node-gyp@5.x node-pre-gyp
  fi
-  export PATH="$NPM_ROOT/.bin/${PYTHON27}:$PATH"
+  export PATH="${PYTHON27}:$PATH"
  for node in ${SUPPORTED_NODEJS_VERSIONS}; do
    EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" make -C native_client/javascript \
@ -157,16 +151,10 @@ do_deepspeech_npm_package()
  # Python 2.7 is required for node-pre-gyp, it is only required to force it on
  # Windows
  if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
-    NPM_ROOT=$(cygpath -u "$(npm root)")
+    PYTHON27="/c/Python27"
    PYTHON27=":/c/Python27"
    # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection
    npm install node-gyp@4.x node-pre-gyp
  else
    NPM_ROOT="$(npm root)"
    npm install node-gyp@5.x node-pre-gyp
  fi
-  export PATH="$NPM_ROOT/.bin/$PYTHON27:$PATH"
+  export PATH="${NPM_BIN}${PYTHON27}:$PATH"
  all_tasks="$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')"