commit
5196fa6e9b
3
.gitignore
vendored
3
.gitignore
vendored
@ -20,3 +20,6 @@
|
||||
/native_client/python/utils_wrap.cpp
|
||||
/native_client/javascript/build
|
||||
/native_client/javascript/deepspeech_wrap.cxx
|
||||
/doc/.build/
|
||||
/doc/xml-c/
|
||||
/doc/xml-java/
|
||||
|
17
.readthedocs.yml
Normal file
17
.readthedocs.yml
Normal file
@ -0,0 +1,17 @@
|
||||
# .readthedocs.yml
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
builder: html
|
||||
configuration: doc/conf.py
|
||||
|
||||
# Optionally set the version of Python and requirements required to build your docs
|
||||
python:
|
||||
version: 3.7
|
||||
install:
|
||||
- requirements: taskcluster/docs-requirements.txt
|
44
doc/C-API.rst
Normal file
44
doc/C-API.rst
Normal file
@ -0,0 +1,44 @@
|
||||
C
|
||||
=
|
||||
|
||||
.. doxygenfunction:: DS_CreateModel
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeModel
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_EnableDecoderWithLM
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_SpeechToText
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_SpeechToTextWithMetadata
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_CreateStream
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FeedAudioContent
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_IntermediateDecode
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FinishStream
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FinishStreamWithMetadata
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeStream
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeMetadata
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeString
|
||||
:project: deepspeech-c
|
||||
|
||||
.. doxygenfunction:: DS_PrintVersions
|
||||
:project: deepspeech-c
|
5
doc/Error-Codes.rst
Normal file
5
doc/Error-Codes.rst
Normal file
@ -0,0 +1,5 @@
|
||||
Error codes
|
||||
===========
|
||||
|
||||
.. doxygenenum:: DeepSpeech_Error_Codes
|
||||
:project: deepspeech-c
|
23
doc/Java-API.rst
Normal file
23
doc/Java-API.rst
Normal file
@ -0,0 +1,23 @@
|
||||
Java
|
||||
====
|
||||
|
||||
DeepSpeechModel
|
||||
---------------
|
||||
|
||||
.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::DeepSpeechModel
|
||||
:project: deepspeech-java
|
||||
:members:
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::Metadata
|
||||
:project: deepspeech-java
|
||||
:members: getItems, getNum_items, getProbability, getItem
|
||||
|
||||
MetadataItem
|
||||
------------
|
||||
|
||||
.. doxygenclass:: org::mozilla::deepspeech::libdeepspeech::MetadataItem
|
||||
:project: deepspeech-java
|
||||
:members: getCharacter, getTimestep, getStart_time
|
24
doc/Makefile
24
doc/Makefile
@ -12,9 +12,27 @@ BUILDDIR = .build
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
.PHONY: help pip3 npm Makefile doxygen-c doxygen-java
|
||||
|
||||
doxygen-c:
|
||||
cd ../ && doxygen doc/doxygen-c.conf
|
||||
|
||||
doxygen-java:
|
||||
cd ../ && doxygen doc/doxygen-java.conf
|
||||
|
||||
pip3:
|
||||
pip3 install --user -r ../taskcluster/docs-requirements.txt
|
||||
|
||||
npm:
|
||||
npm install jsdoc@3.6.3
|
||||
|
||||
dist: html
|
||||
cd $(BUILDDIR)/html/ && zip -r9 ../../html.zip *
|
||||
|
||||
dist: html
|
||||
@cd $(BUILDDIR)/html/ && zip -r9 ../../html.zip *
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
%: Makefile pip3 npm doxygen-c doxygen-java
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
31
doc/NodeJS-API.rst
Normal file
31
doc/NodeJS-API.rst
Normal file
@ -0,0 +1,31 @@
|
||||
JavaScript (NodeJS / ElectronJS)
|
||||
================================
|
||||
|
||||
Model
|
||||
-----
|
||||
|
||||
.. js:autoclass:: Model
|
||||
:members:
|
||||
|
||||
Module exported methods
|
||||
-----------------------
|
||||
|
||||
.. js:autofunction:: FreeModel
|
||||
|
||||
.. js:autofunction:: FreeStream
|
||||
|
||||
.. js:autofunction:: FreeMetadata
|
||||
|
||||
.. js:autofunction:: printVersions
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
.. js:autoclass:: Metadata
|
||||
:members:
|
||||
|
||||
MetadataItem
|
||||
------------
|
||||
|
||||
.. js:autoclass:: MetadataItem
|
||||
:members:
|
22
doc/Python-API.rst
Normal file
22
doc/Python-API.rst
Normal file
@ -0,0 +1,22 @@
|
||||
Python
|
||||
======
|
||||
|
||||
.. automodule:: native_client.python
|
||||
|
||||
Model
|
||||
-----
|
||||
|
||||
.. autoclass:: Model
|
||||
:members:
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
.. autoclass:: Metadata
|
||||
:members:
|
||||
|
||||
MetadataItem
|
||||
------------
|
||||
|
||||
.. autoclass:: MetadataItem
|
||||
:members:
|
16
doc/Structs.rst
Normal file
16
doc/Structs.rst
Normal file
@ -0,0 +1,16 @@
|
||||
Data structures
|
||||
===============
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
.. doxygenstruct:: Metadata
|
||||
:project: deepspeech-c
|
||||
:members:
|
||||
|
||||
MetadataItem
|
||||
------------
|
||||
|
||||
.. doxygenstruct:: MetadataItem
|
||||
:project: deepspeech-c
|
||||
:members:
|
72
doc/conf.py
72
doc/conf.py
@ -16,13 +16,44 @@
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
|
||||
# pylint: skip-file
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.abspath('..'))
|
||||
|
||||
sys.path.insert(0, os.path.abspath('../'))
|
||||
|
||||
autodoc_mock_imports = ['deepspeech']
|
||||
|
||||
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
|
||||
if read_the_docs_build:
|
||||
import subprocess
|
||||
subprocess.call('cd ../ && doxygen doc/doxygen-c.conf', shell=True)
|
||||
subprocess.call('cd ../ && doxygen doc/doxygen-java.conf', shell=True)
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
import semver
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = u'DeepSpeech'
|
||||
copyright = '2019, Mozilla Corporation'
|
||||
author = 'Mozilla Corporation'
|
||||
|
||||
with open('../VERSION', 'r') as ver:
|
||||
v = ver.read().strip()
|
||||
vv = semver.parse(v)
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
# The short X.Y version
|
||||
version = '{}.{}'.format(vv['major'], vv['minor'])
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = v
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#
|
||||
# needs_sphinx = '1.0'
|
||||
@ -30,10 +61,23 @@ sys.path.insert(0, os.path.abspath('..'))
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ['sphinx.ext.autodoc',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.viewcode']
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx_rtd_theme',
|
||||
'sphinx_js',
|
||||
'breathe'
|
||||
]
|
||||
|
||||
|
||||
breathe_projects = {
|
||||
"deepspeech-c": "xml-c/",
|
||||
"deepspeech-java": "xml-java/",
|
||||
}
|
||||
|
||||
js_source_path = "../native_client/javascript"
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['.templates']
|
||||
@ -47,20 +91,6 @@ source_suffix = '.rst'
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'DeepSpeech'
|
||||
copyright = u'2017, Mozilla Research'
|
||||
author = u'Mozilla Research'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = u''
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = u''
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#
|
||||
@ -79,13 +109,15 @@ pygments_style = 'sphinx'
|
||||
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||
todo_include_todos = False
|
||||
|
||||
add_module_names = False
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'classic'
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
|
2494
doc/doxygen-c.conf
Normal file
2494
doc/doxygen-c.conf
Normal file
File diff suppressed because it is too large
Load Diff
2494
doc/doxygen-java.conf
Normal file
2494
doc/doxygen-java.conf
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,42 +8,36 @@ Welcome to DeepSpeech's documentation!
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
:caption: Introduction
|
||||
|
||||
DeepSpeech
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: DeepSpeech Model
|
||||
|
||||
Geometry
|
||||
ParallelOptimization
|
||||
|
||||
.. automodule:: DeepSpeech
|
||||
:members:
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Enums and structs
|
||||
|
||||
.. automodule:: util.audio
|
||||
:members:
|
||||
Error-Codes
|
||||
|
||||
.. automodule:: util.text
|
||||
:members:
|
||||
Structs
|
||||
|
||||
.. automodule:: util.gpu
|
||||
:members:
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: API Reference
|
||||
|
||||
.. automodule:: util.stm
|
||||
:members:
|
||||
C-API
|
||||
|
||||
.. automodule:: util.importers.ldc93s1
|
||||
:members:
|
||||
NodeJS-API
|
||||
|
||||
.. automodule:: util.importers.ted
|
||||
:members:
|
||||
|
||||
.. automodule:: util.importers.librivox
|
||||
:members:
|
||||
|
||||
.. automodule:: util.importers.LDC97S62
|
||||
:members:
|
||||
|
||||
.. automodule:: util.importers.fisher
|
||||
:members:
|
||||
Java-API
|
||||
|
||||
Python-API
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
@ -8,9 +8,9 @@ extern "C" {
|
||||
#ifndef SWIG
|
||||
#if defined _MSC_VER
|
||||
#define DEEPSPEECH_EXPORT __declspec(dllexport)
|
||||
#else /*End of _MSC_VER*/
|
||||
#else
|
||||
#define DEEPSPEECH_EXPORT __attribute__ ((visibility("default")))
|
||||
#endif /*End of SWIG*/
|
||||
#endif /*End of _MSC_VER*/
|
||||
#else
|
||||
#define DEEPSPEECH_EXPORT
|
||||
#endif
|
||||
@ -19,20 +19,32 @@ typedef struct ModelState ModelState;
|
||||
|
||||
typedef struct StreamingState StreamingState;
|
||||
|
||||
// Stores each individual character, along with its timing information
|
||||
/**
|
||||
* @brief Stores each individual character, along with its timing information
|
||||
*/
|
||||
typedef struct MetadataItem {
|
||||
/** The character generated for transcription */
|
||||
char* character;
|
||||
int timestep; // Position of the character in units of 20ms
|
||||
float start_time; // Position of the character in seconds
|
||||
|
||||
/** Position of the character in units of 20ms */
|
||||
int timestep;
|
||||
|
||||
/** Position of the character in seconds */
|
||||
float start_time;
|
||||
} MetadataItem;
|
||||
|
||||
// Stores the entire CTC output as an array of character metadata objects
|
||||
/**
|
||||
* @brief Stores the entire CTC output as an array of character metadata objects
|
||||
*/
|
||||
typedef struct Metadata {
|
||||
/** List of items */
|
||||
MetadataItem* items;
|
||||
/** Size of the list of items */
|
||||
int num_items;
|
||||
// Approximated confidence value for this transcription. This is roughly the
|
||||
// sum of the acoustic model logit values for each timestep/character that
|
||||
// contributed to the creation of this transcription.
|
||||
/** Approximated confidence value for this transcription. This is roughly the
|
||||
* sum of the acoustic model logit values for each timestep/character that
|
||||
* contributed to the creation of this transcription.
|
||||
*/
|
||||
double confidence;
|
||||
} Metadata;
|
||||
|
||||
@ -90,8 +102,6 @@ void DS_FreeModel(ModelState* ctx);
|
||||
* @brief Enable decoding using beam scoring with a KenLM language model.
|
||||
*
|
||||
* @param aCtx The ModelState pointer for the model being changed.
|
||||
* @param aAlphabetConfigPath The path to the configuration file specifying
|
||||
* the alphabet used by the network. See alphabet.h.
|
||||
* @param aLMPath The path to the language model binary file.
|
||||
* @param aTriePath The path to the trie file build from the same vocabu-
|
||||
* lary as the language model binary.
|
||||
|
@ -21,6 +21,13 @@
|
||||
%array_functions(struct MetadataItem, metadataItem_array);
|
||||
|
||||
%extend struct Metadata {
|
||||
/**
|
||||
* Retrieve one MetadataItem element
|
||||
*
|
||||
* @param i Array index of the MetadataItem to get
|
||||
*
|
||||
* @return The MetadataItem requested or null
|
||||
*/
|
||||
MetadataItem getItem(int i) {
|
||||
return metadataItem_array_getitem(self->items, i);
|
||||
}
|
||||
|
@ -43,6 +43,11 @@ android {
|
||||
timeOutInMs 15 * 60 * 1000 // 10 minutes
|
||||
installOptions "-d","-t"
|
||||
}
|
||||
|
||||
// Avoid scanning libdeepspeech_doc
|
||||
sourceSets {
|
||||
main.java.srcDirs = [ 'src/main/java/org/mozilla/deepspeech/libdeepspeech/' ]
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
|
@ -1,5 +1,8 @@
|
||||
package org.mozilla.deepspeech.libdeepspeech;
|
||||
|
||||
/**
|
||||
* @brief Exposes a DeepSpeech model in Java
|
||||
**/
|
||||
public class DeepSpeechModel {
|
||||
|
||||
static {
|
||||
@ -11,47 +14,139 @@ public class DeepSpeechModel {
|
||||
SWIGTYPE_p_p_ModelState _mspp;
|
||||
SWIGTYPE_p_ModelState _msp;
|
||||
|
||||
/**
|
||||
* @brief An object providing an interface to a trained DeepSpeech model.
|
||||
*
|
||||
* @constructor
|
||||
*
|
||||
* @param modelPath The path to the frozen model graph.
|
||||
* @param alphabetPath The path to the configuration file specifying
|
||||
* the alphabet used by the network. See alphabet.h.
|
||||
* @param beam_width The beam width used by the decoder. A larger beam
|
||||
* width generates better results at the cost of decoding
|
||||
* time.
|
||||
*/
|
||||
public DeepSpeechModel(String modelPath, String alphabetPath, int beam_width) {
|
||||
this._mspp = impl.new_modelstatep();
|
||||
impl.CreateModel(modelPath, alphabetPath, beam_width, this._mspp);
|
||||
this._msp = impl.modelstatep_value(this._mspp);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Frees associated resources and destroys model object.
|
||||
*/
|
||||
public void freeModel() {
|
||||
impl.FreeModel(this._msp);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Enable decoding using beam scoring with a KenLM language model.
|
||||
*
|
||||
* @param lm The path to the language model binary file.
|
||||
* @param trie The path to the trie file build from the same vocabulary as the language model binary.
|
||||
* @param lm_alpha The alpha hyperparameter of the CTC decoder. Language Model weight.
|
||||
* @param lm_beta The beta hyperparameter of the CTC decoder. Word insertion weight.
|
||||
*
|
||||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
public void enableDecoderWihLM(String lm, String trie, float lm_alpha, float lm_beta) {
|
||||
impl.EnableDecoderWithLM(this._msp, lm, trie, lm_alpha, lm_beta);
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Use the DeepSpeech model to perform Speech-To-Text.
|
||||
*
|
||||
* @param buffer A 16-bit, mono raw audio signal at the appropriate
|
||||
* sample rate.
|
||||
* @param buffer_size The number of samples in the audio signal.
|
||||
* @param sample_rate The sample-rate of the audio signal.
|
||||
*
|
||||
* @return The STT result.
|
||||
*/
|
||||
public String stt(short[] buffer, int buffer_size, int sample_rate) {
|
||||
return impl.SpeechToText(this._msp, buffer, buffer_size, sample_rate);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Use the DeepSpeech model to perform Speech-To-Text and output metadata
|
||||
* about the results.
|
||||
*
|
||||
* @param buffer A 16-bit, mono raw audio signal at the appropriate
|
||||
* sample rate.
|
||||
* @param buffer_size The number of samples in the audio signal.
|
||||
* @param sample_rate The sample-rate of the audio signal.
|
||||
*
|
||||
* @return Outputs a Metadata object of individual letters along with their timing information.
|
||||
*/
|
||||
public Metadata sttWithMetadata(short[] buffer, int buffer_size, int sample_rate) {
|
||||
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Create a new streaming inference state. The streaming state returned
|
||||
* by this function can then be passed to feedAudioContent()
|
||||
* and finishStream().
|
||||
*
|
||||
* @param sample_rate The sample-rate of the audio signal.
|
||||
* @return An opaque object that represents the streaming state.
|
||||
*/
|
||||
public DeepSpeechStreamingState createStream(int sample_rate) {
|
||||
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
||||
impl.CreateStream(this._msp, sample_rate, ssp);
|
||||
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Feed audio samples to an ongoing streaming inference.
|
||||
*
|
||||
* @param cctx A streaming state pointer returned by createStream().
|
||||
* @param buffer An array of 16-bit, mono raw audio samples at the
|
||||
* appropriate sample rate.
|
||||
* @param buffer_size The number of samples in @p buffer.
|
||||
*/
|
||||
public void feedAudioContent(DeepSpeechStreamingState ctx, short[] buffer, int buffer_size) {
|
||||
impl.FeedAudioContent(ctx.get(), buffer, buffer_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compute the intermediate decoding of an ongoing streaming inference.
|
||||
* This is an expensive process as the decoder implementation isn't
|
||||
* currently capable of streaming, so it always starts from the beginning
|
||||
* of the audio.
|
||||
*
|
||||
* @param ctx A streaming state pointer returned by createStream().
|
||||
*
|
||||
* @return The STT intermediate result.
|
||||
*/
|
||||
public String intermediateDecode(DeepSpeechStreamingState ctx) {
|
||||
return impl.IntermediateDecode(ctx.get());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Signal the end of an audio signal to an ongoing streaming
|
||||
* inference, returns the STT result over the whole audio signal.
|
||||
*
|
||||
* @param ctx A streaming state pointer returned by createStream().
|
||||
*
|
||||
* @return The STT result.
|
||||
*
|
||||
* @note This method will free the state pointer (@p ctx).
|
||||
*/
|
||||
public String finishStream(DeepSpeechStreamingState ctx) {
|
||||
return impl.FinishStream(ctx.get());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Signal the end of an audio signal to an ongoing streaming
|
||||
* inference, returns per-letter metadata.
|
||||
*
|
||||
* @param ctx A streaming state pointer returned by createStream().
|
||||
*
|
||||
* @return Outputs a Metadata object of individual letters along with their timing information.
|
||||
*
|
||||
* @note This method will free the state pointer (@p ctx).
|
||||
*/
|
||||
public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx) {
|
||||
return impl.FinishStreamWithMetadata(ctx.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,100 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
* This file was automatically generated by SWIG (http://www.swig.org).
|
||||
* Version 4.0.2
|
||||
*
|
||||
* Do not make changes to this file unless you know what you are doing--modify
|
||||
* the SWIG interface file instead.
|
||||
* ----------------------------------------------------------------------------- */
|
||||
|
||||
package org.mozilla.deepspeech.libdeepspeech;
|
||||
|
||||
/**
|
||||
* Stores the entire CTC output as an array of character metadata objects
|
||||
*/
|
||||
public class Metadata {
|
||||
private transient long swigCPtr;
|
||||
protected transient boolean swigCMemOwn;
|
||||
|
||||
protected Metadata(long cPtr, boolean cMemoryOwn) {
|
||||
swigCMemOwn = cMemoryOwn;
|
||||
swigCPtr = cPtr;
|
||||
}
|
||||
|
||||
protected static long getCPtr(Metadata obj) {
|
||||
return (obj == null) ? 0 : obj.swigCPtr;
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
protected void finalize() {
|
||||
delete();
|
||||
}
|
||||
|
||||
public synchronized void delete() {
|
||||
if (swigCPtr != 0) {
|
||||
if (swigCMemOwn) {
|
||||
swigCMemOwn = false;
|
||||
implJNI.delete_Metadata(swigCPtr);
|
||||
}
|
||||
swigCPtr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List of items
|
||||
*/
|
||||
public void setItems(MetadataItem value) {
|
||||
implJNI.Metadata_items_set(swigCPtr, this, MetadataItem.getCPtr(value), value);
|
||||
}
|
||||
|
||||
/**
|
||||
* List of items
|
||||
*/
|
||||
public MetadataItem getItems() {
|
||||
long cPtr = implJNI.Metadata_items_get(swigCPtr, this);
|
||||
return (cPtr == 0) ? null : new MetadataItem(cPtr, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Size of the list of items
|
||||
*/
|
||||
public void setNum_items(int value) {
|
||||
implJNI.Metadata_num_items_set(swigCPtr, this, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Size of the list of items
|
||||
*/
|
||||
public int getNum_items() {
|
||||
return implJNI.Metadata_num_items_get(swigCPtr, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Approximated confidence value for this transcription. This is roughly the<br>
|
||||
* sum of the acoustic model logit values for each timestep/character that<br>
|
||||
* contributed to the creation of this transcription.
|
||||
*/
|
||||
public void setConfidence(double value) {
|
||||
implJNI.Metadata_confidence_set(swigCPtr, this, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Approximated confidence value for this transcription. This is roughly the<br>
|
||||
* sum of the acoustic model logit values for each timestep/character that<br>
|
||||
* contributed to the creation of this transcription.
|
||||
*/
|
||||
public double getConfidence() {
|
||||
return implJNI.Metadata_confidence_get(swigCPtr, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve one MetadataItem element<br>
|
||||
* <br>
|
||||
* @param i Array index of the MetadataItem to get<br>
|
||||
* <br>
|
||||
* @return The MetadataItem requested or null
|
||||
*/
|
||||
public MetadataItem getItem(int i) {
|
||||
return new MetadataItem(implJNI.Metadata_getItem(swigCPtr, this, i), true);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
* This file was automatically generated by SWIG (http://www.swig.org).
|
||||
* Version 4.0.2
|
||||
*
|
||||
* Do not make changes to this file unless you know what you are doing--modify
|
||||
* the SWIG interface file instead.
|
||||
* ----------------------------------------------------------------------------- */
|
||||
|
||||
package org.mozilla.deepspeech.libdeepspeech;
|
||||
|
||||
/**
|
||||
* Stores each individual character, along with its timing information
|
||||
*/
|
||||
public class MetadataItem {
|
||||
private transient long swigCPtr;
|
||||
protected transient boolean swigCMemOwn;
|
||||
|
||||
protected MetadataItem(long cPtr, boolean cMemoryOwn) {
|
||||
swigCMemOwn = cMemoryOwn;
|
||||
swigCPtr = cPtr;
|
||||
}
|
||||
|
||||
protected static long getCPtr(MetadataItem obj) {
|
||||
return (obj == null) ? 0 : obj.swigCPtr;
|
||||
}
|
||||
|
||||
public synchronized void delete() {
|
||||
if (swigCPtr != 0) {
|
||||
if (swigCMemOwn) {
|
||||
swigCMemOwn = false;
|
||||
throw new UnsupportedOperationException("C++ destructor does not have public access");
|
||||
}
|
||||
swigCPtr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The character generated for transcription
|
||||
*/
|
||||
public void setCharacter(String value) {
|
||||
implJNI.MetadataItem_character_set(swigCPtr, this, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* The character generated for transcription
|
||||
*/
|
||||
public String getCharacter() {
|
||||
return implJNI.MetadataItem_character_get(swigCPtr, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Position of the character in units of 20ms
|
||||
*/
|
||||
public void setTimestep(int value) {
|
||||
implJNI.MetadataItem_timestep_set(swigCPtr, this, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Position of the character in units of 20ms
|
||||
*/
|
||||
public int getTimestep() {
|
||||
return implJNI.MetadataItem_timestep_get(swigCPtr, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Position of the character in seconds
|
||||
*/
|
||||
public void setStart_time(float value) {
|
||||
implJNI.MetadataItem_start_time_set(swigCPtr, this, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Position of the character in seconds
|
||||
*/
|
||||
public float getStart_time() {
|
||||
return implJNI.MetadataItem_start_time_get(swigCPtr, this);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
Javadoc for Sphinx
|
||||
==================
|
||||
|
||||
This code is only here for reference for documentation generation.
|
||||
|
||||
To update, please build SWIG (4.0 at least) and then run from native_client/java:
|
||||
```
|
||||
swig -c++ -java -doxygen -package org.mozilla.deepspeech.libdeepspeech -outdir libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech_doc -o jni/deepspeech_wrap.cpp jni/deepspeech.i
|
||||
```
|
@ -20,6 +20,18 @@ if (process.platform === 'win32') {
|
||||
process.env['PATH'] = oldPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class
|
||||
* An object providing an interface to a trained DeepSpeech model.
|
||||
*
|
||||
* @param {string} aModelPath The path to the frozen model graph.
|
||||
* @param {number} aNCep The number of cepstrum the model was trained with.
|
||||
* @param {number} aNContext The context window the model was trained with.
|
||||
* @param {string} aAlphabetConfigPath The path to the configuration file specifying the alphabet used by the network. See alphabet.h.
|
||||
* @param {number} aBeamWidth The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
function Model() {
|
||||
this._impl = null;
|
||||
|
||||
@ -33,21 +45,59 @@ function Model() {
|
||||
this._impl = impl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable decoding using beam scoring with a KenLM language model.
|
||||
*
|
||||
* @param {string} aAlphabetConfigPath The path to the configuration file specifying the alphabet used by the network. See alphabet.h.
|
||||
* @param {string} aLMPath The path to the language model binary file.
|
||||
* @param {string} aTriePath The path to the trie file build from the same vocabulary as the language model binary.
|
||||
* @param {float} aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight.
|
||||
* @param {float} aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight.
|
||||
*
|
||||
* @return {number} Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
Model.prototype.enableDecoderWithLM = function() {
|
||||
const args = [this._impl].concat(Array.prototype.slice.call(arguments));
|
||||
return binding.EnableDecoderWithLM.apply(null, args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text.
|
||||
*
|
||||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate.
|
||||
* @param {number} aBufferSize The number of samples in the audio signal.
|
||||
* @param {number} aSampleRate The sample-rate of the audio signal.
|
||||
*
|
||||
* @return {string} The STT result. Returns undefined on error.
|
||||
*/
|
||||
Model.prototype.stt = function() {
|
||||
const args = [this._impl].concat(Array.prototype.slice.call(arguments));
|
||||
return binding.SpeechToText.apply(null, args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the DeepSpeech model to perform Speech-To-Text and output metadata
|
||||
* about the results.
|
||||
*
|
||||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate.
|
||||
* @param {number} aBufferSize The number of samples in the audio signal.
|
||||
* @param {number} aSampleRate The sample-rate of the audio signal.
|
||||
*
|
||||
* @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error.
|
||||
*/
|
||||
Model.prototype.sttWithMetadata = function() {
|
||||
const args = [this._impl].concat(Array.prototype.slice.call(arguments));
|
||||
return binding.SpeechToTextWithMetadata.apply(null, args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new streaming inference state. The streaming state returned by this function can then be passed to :js:func:`Model.feedAudioContent` and :js:func:`Model.finishStream`.
|
||||
*
|
||||
* @param {number} aSampleRate The sample-rate of the audio signal.
|
||||
* @return {object} an opaque object that represents the streaming state.
|
||||
*
|
||||
* @throws on error
|
||||
*/
|
||||
Model.prototype.createStream = function() {
|
||||
const args = [this._impl].concat(Array.prototype.slice.call(arguments));
|
||||
const rets = binding.CreateStream.apply(null, args);
|
||||
@ -59,30 +109,159 @@ Model.prototype.createStream = function() {
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Feed audio samples to an ongoing streaming inference.
|
||||
*
|
||||
* @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`.
|
||||
* @param {buffer} aBuffer An array of 16-bit, mono raw audio samples at the
|
||||
* appropriate sample rate.
|
||||
* @param {number} aBufferSize The number of samples in @param aBuffer.
|
||||
*/
|
||||
Model.prototype.feedAudioContent = function() {
|
||||
binding.FeedAudioContent.apply(null, arguments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the intermediate decoding of an ongoing streaming inference. This is an expensive process as the decoder implementation isn't currently capable of streaming, so it always starts from the beginning of the audio.
|
||||
*
|
||||
* @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`.
|
||||
*
|
||||
* @return {string} The STT intermediate result.
|
||||
*/
|
||||
Model.prototype.intermediateDecode = function() {
|
||||
return binding.IntermediateDecode.apply(null, arguments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal the end of an audio signal to an ongoing streaming inference, returns the STT result over the whole audio signal.
|
||||
*
|
||||
* @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`.
|
||||
*
|
||||
* @return {string} The STT result.
|
||||
*
|
||||
* This method will free the state (@param aSctx).
|
||||
*/
|
||||
Model.prototype.finishStream = function() {
|
||||
return binding.FinishStream.apply(null, arguments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal the end of an audio signal to an ongoing streaming inference, returns per-letter metadata.
|
||||
*
|
||||
* @param {object} aSctx A streaming state pointer returned by :js:func:`Model.setupStream`.
|
||||
*
|
||||
* @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`.
|
||||
*
|
||||
* This method will free the state pointer (@param aSctx).
|
||||
*/
|
||||
Model.prototype.finishStreamWithMetadata = function() {
|
||||
return binding.FinishStreamWithMetadata.apply(null, arguments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees associated resources and destroys model object.
|
||||
*
|
||||
* @param {object} model A model pointer returned by :js:func:`Model`
|
||||
*
|
||||
*/
|
||||
function FreeModel(model) {
|
||||
return binding.FreeModel(model._impl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free memory allocated for metadata information.
|
||||
*
|
||||
* @param {object} metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Model.finishStreamWithMetadata`
|
||||
*/
|
||||
function FreeMetadata(metadata) {
|
||||
return binding.FreeMetadata(metadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a streaming state without decoding the computed logits. This
|
||||
* can be used if you no longer need the result of an ongoing streaming
|
||||
* inference and don't want to perform a costly decode operation.
|
||||
*
|
||||
* @param {Object} stream A streaming state pointer returned by :js:func:`Model.createStream`.
|
||||
*/
|
||||
function FreeStream(stream) {
|
||||
return binding.FreeStream(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print version of this library and of the linked TensorFlow library on standard output.
|
||||
*/
|
||||
function printVersions() {
|
||||
return binding.PrintVersions();
|
||||
}
|
||||
|
||||
|
||||
//// Metadata and MetadataItem are here only for documentation purposes
|
||||
|
||||
/**
|
||||
* @class
|
||||
*
|
||||
* Stores each individual character, along with its timing information
|
||||
*/
|
||||
function MetadataItem() {}
|
||||
|
||||
/**
|
||||
* The character generated for transcription
|
||||
*
|
||||
* @return {string} The character generated
|
||||
*/
|
||||
MetadataItem.prototype.character = function() {}
|
||||
|
||||
/**
|
||||
* Position of the character in units of 20ms
|
||||
*
|
||||
* @return {int} The position of the character
|
||||
*/
|
||||
MetadataItem.prototype.timestep = function() {};
|
||||
|
||||
/**
|
||||
* Position of the character in seconds
|
||||
*
|
||||
* @return {float} The position of the character
|
||||
*/
|
||||
MetadataItem.prototype.start_time = function() {};
|
||||
|
||||
/**
|
||||
* @class
|
||||
*
|
||||
* Stores the entire CTC output as an array of character metadata objects
|
||||
*/
|
||||
function Metadata () {}
|
||||
|
||||
/**
|
||||
* List of items
|
||||
*
|
||||
* @return {array} List of :js:func:`MetadataItem`
|
||||
*/
|
||||
Metadata.prototype.items = function() {}
|
||||
|
||||
/**
|
||||
* Size of the list of items
|
||||
*
|
||||
* @return {int} Number of items
|
||||
*/
|
||||
Metadata.prototype.num_items = function() {}
|
||||
|
||||
/**
|
||||
* Approximated confidence value for this transcription. This is roughly the
|
||||
* sum of the acoustic model logit values for each timestep/character that
|
||||
* contributed to the creation of this transcription.
|
||||
*
|
||||
* @return {float} Confidence value
|
||||
*/
|
||||
Metadata.prototype.confidence = function() {}
|
||||
|
||||
module.exports = {
|
||||
Model: Model,
|
||||
printVersions: binding.PrintVersions,
|
||||
Metadata: Metadata,
|
||||
MetadataItem: MetadataItem,
|
||||
printVersions: printVersions,
|
||||
FreeModel: FreeModel,
|
||||
FreeStream: binding.FreeStream,
|
||||
FreeMetadata: binding.FreeMetadata
|
||||
FreeStream: FreeStream,
|
||||
FreeMetadata: FreeMetadata
|
||||
};
|
||||
|
@ -18,7 +18,19 @@ from deepspeech.impl import PrintVersions as printVersions
|
||||
from deepspeech.impl import FreeStream as freeStream
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
Class holding a DeepSpeech model
|
||||
|
||||
:param aModelPath: Path to model file to load
|
||||
:type aModelPath: str
|
||||
|
||||
:param aAlphabetConfigPath: Path to alphabet file to load
|
||||
:type aAlphabetConfigPath: str
|
||||
|
||||
:param aBeamWidth: Decoder beam width
|
||||
:type aBeamWidth: int
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
# make sure the attribute is there if CreateModel fails
|
||||
self._impl = None
|
||||
|
||||
@ -33,29 +45,198 @@ class Model(object):
|
||||
self._impl = None
|
||||
|
||||
def enableDecoderWithLM(self, *args, **kwargs):
|
||||
"""
|
||||
Enable decoding using beam scoring with a KenLM language model.
|
||||
|
||||
:param aLMPath: The path to the language model binary file.
|
||||
:type aLMPath: str
|
||||
|
||||
:param aTriePath: The path to the trie file build from the same vocabulary as the language model binary.
|
||||
:type aTriePath: str
|
||||
|
||||
:param aLMAlpha: The alpha hyperparameter of the CTC decoder. Language Model weight.
|
||||
:type aLMAlpha: float
|
||||
|
||||
:param aLMBeta: The beta hyperparameter of the CTC decoder. Word insertion weight.
|
||||
:type aLMBeta: float
|
||||
|
||||
:return: Zero on success, non-zero on failure (invalid arguments).
|
||||
:type: int
|
||||
"""
|
||||
return deepspeech.impl.EnableDecoderWithLM(self._impl, *args, **kwargs)
|
||||
|
||||
def stt(self, *args, **kwargs):
|
||||
"""
|
||||
Use the DeepSpeech model to perform Speech-To-Text.
|
||||
|
||||
:param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate.
|
||||
:type aBuffer: int array
|
||||
|
||||
:param aBufferSize: The number of samples in the audio signal.
|
||||
:type aBufferSize: int
|
||||
|
||||
:param aSampleRate: The sample-rate of the audio signal.
|
||||
:type aSampleRate: int
|
||||
|
||||
:return: The STT result.
|
||||
:type: str
|
||||
"""
|
||||
return deepspeech.impl.SpeechToText(self._impl, *args, **kwargs)
|
||||
|
||||
def sttWithMetadata(self, *args, **kwargs):
|
||||
"""
|
||||
Use the DeepSpeech model to perform Speech-To-Text and output metadata about the results.
|
||||
|
||||
:param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate.
|
||||
:type aBuffer: int array
|
||||
|
||||
:param aBufferSize: The number of samples in the audio signal.
|
||||
:type aBufferSize: int
|
||||
|
||||
:param aSampleRate: The sample-rate of the audio signal.
|
||||
:type aSampleRate: int
|
||||
|
||||
:return: Outputs a struct of individual letters along with their timing information.
|
||||
:type: :func:`Metadata`
|
||||
"""
|
||||
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
|
||||
|
||||
def createStream(self, sample_rate=16000):
|
||||
"""
|
||||
Create a new streaming inference state. The streaming state returned
|
||||
by this function can then be passed to :func:`feedAudioContent()` and :func:`finishStream()`.
|
||||
|
||||
:param aSampleRate: The sample-rate of the audio signal.
|
||||
:type aSampleRate: int
|
||||
|
||||
:return: Object holding the stream
|
||||
|
||||
:throws: RuntimeError on error
|
||||
"""
|
||||
status, ctx = deepspeech.impl.CreateStream(self._impl,
|
||||
aSampleRate=sample_rate)
|
||||
if status != 0:
|
||||
raise RuntimeError("CreateStream failed with error code {}".format(status))
|
||||
return ctx
|
||||
|
||||
# pylint: disable=no-self-use
|
||||
def feedAudioContent(self, *args, **kwargs):
|
||||
"""
|
||||
Feed audio samples to an ongoing streaming inference.
|
||||
|
||||
:param aSctx: A streaming state pointer returned by :func:`createStream()`.
|
||||
:type aSctx: object
|
||||
|
||||
:param aBuffer: An array of 16-bit, mono raw audio samples at the appropriate sample rate.
|
||||
:type aBuffer: int array
|
||||
|
||||
:param aBufferSize: The number of samples in @p aBuffer.
|
||||
:type aBufferSize: int
|
||||
"""
|
||||
deepspeech.impl.FeedAudioContent(*args, **kwargs)
|
||||
|
||||
# pylint: disable=no-self-use
|
||||
def intermediateDecode(self, *args, **kwargs):
|
||||
"""
|
||||
Compute the intermediate decoding of an ongoing streaming inference.
|
||||
This is an expensive process as the decoder implementation isn't
|
||||
currently capable of streaming, so it always starts from the beginning
|
||||
of the audio.
|
||||
|
||||
:param aSctx: A streaming state pointer returned by :func:`createStream()`.
|
||||
:type aSctx: object
|
||||
|
||||
:return: The STT intermediate result.
|
||||
:type: str
|
||||
"""
|
||||
return deepspeech.impl.IntermediateDecode(*args, **kwargs)
|
||||
|
||||
# pylint: disable=no-self-use
|
||||
def finishStream(self, *args, **kwargs):
|
||||
"""
|
||||
Signal the end of an audio signal to an ongoing streaming
|
||||
inference, returns the STT result over the whole audio signal.
|
||||
|
||||
:param aSctx: A streaming state pointer returned by :func:`createStream()`.
|
||||
:type aSctx: object
|
||||
|
||||
:return: The STT result.
|
||||
:type: str
|
||||
"""
|
||||
return deepspeech.impl.FinishStream(*args, **kwargs)
|
||||
|
||||
# pylint: disable=no-self-use
|
||||
def finishStreamWithMetadata(self, *args, **kwargs):
|
||||
"""
|
||||
Signal the end of an audio signal to an ongoing streaming
|
||||
inference, returns per-letter metadata.
|
||||
|
||||
:param aSctx: A streaming state pointer returned by :func:`createStream()`.
|
||||
:type aSctx: object
|
||||
|
||||
:return: Outputs a struct of individual letters along with their timing information.
|
||||
:type: :func:`Metadata`
|
||||
"""
|
||||
return deepspeech.impl.FinishStreamWithMetadata(*args, **kwargs)
|
||||
|
||||
# This is only for documentation purpose
|
||||
# Metadata and MetadataItem should be in sync with native_client/deepspeech.h
|
||||
class MetadataItem(object):
|
||||
"""
|
||||
Stores each individual character, along with its timing information
|
||||
"""
|
||||
|
||||
def character(self):
|
||||
"""
|
||||
The character generated for transcription
|
||||
"""
|
||||
# pylint: disable=unnecessary-pass
|
||||
pass
|
||||
|
||||
def timestep(self):
|
||||
"""
|
||||
Position of the character in units of 20ms
|
||||
"""
|
||||
# pylint: disable=unnecessary-pass
|
||||
pass
|
||||
|
||||
def start_time(self):
|
||||
"""
|
||||
Position of the character in seconds
|
||||
"""
|
||||
# pylint: disable=unnecessary-pass
|
||||
pass
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
"""
|
||||
Stores the entire CTC output as an array of character metadata objects
|
||||
"""
|
||||
def items(self):
|
||||
"""
|
||||
List of items
|
||||
|
||||
:return: A list of :func:`MetadataItem` elements
|
||||
:type: list
|
||||
"""
|
||||
# pylint: disable=unnecessary-pass
|
||||
pass
|
||||
|
||||
def num_items(self):
|
||||
"""
|
||||
Size of the list of items
|
||||
|
||||
:return: Size of the list of items
|
||||
:type: int
|
||||
"""
|
||||
# pylint: disable=unnecessary-pass
|
||||
pass
|
||||
|
||||
def confidence(self):
|
||||
"""
|
||||
Approximated confidence value for this transcription. This is roughly the
|
||||
sum of the acoustic model logit values for each timestep/character that
|
||||
contributed to the creation of this transcription.
|
||||
"""
|
||||
# pylint: disable=unnecessary-pass
|
||||
pass
|
||||
|
@ -6,6 +6,8 @@ python:
|
||||
brew:
|
||||
setup: 'install_local_homebrew "python-ds-test" && install_pkg_local_homebrew "sox" && install_pkg_local_homebrew "readline" && install_pkg_local_homebrew "openssl" && install_pkg_local_homebrew "pkg-config"'
|
||||
env: 'export EXTRA_ENV="PATH=$TASKCLUSTER_TASK_DIR/python-ds-test.brew/bin/:$PATH"'
|
||||
packages_docs_bionic:
|
||||
apt: 'python3 python3-pip zip doxygen'
|
||||
electronjs:
|
||||
packages_xenial:
|
||||
apt: 'libatk1.0-0 libatk-bridge2.0-0 libcairo2 libcups2 libdbus-1-3 libgdk-pixbuf2.0-0 libgtk-3-0 libnspr4 libnss3 libpango-1.0-0 libpangocairo-1.0-0 libx11-xcb1 libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 xvfb'
|
||||
@ -25,6 +27,10 @@ nodejs:
|
||||
prep_10: 'echo "deb http://deb.nodesource.com/node_10.x xenial main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -'
|
||||
prep_11: 'echo "deb http://deb.nodesource.com/node_11.x xenial main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -'
|
||||
prep_12: 'echo "deb http://deb.nodesource.com/node_12.x xenial main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -'
|
||||
packages_docs_bionic:
|
||||
apt: 'nodejs'
|
||||
apt_pinning: '(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > /etc/apt/preferences'
|
||||
prep_12: 'echo "deb http://deb.nodesource.com/node_12.x bionic main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -'
|
||||
packages_buster:
|
||||
apt: 'nodejs sox'
|
||||
apt_pinning: '(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > /etc/apt/preferences'
|
||||
|
10
taskcluster/docs-build.sh
Normal file
10
taskcluster/docs-build.sh
Normal file
@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
THIS=$(dirname "$0")
|
||||
|
||||
pushd ${THIS}/../
|
||||
export PATH=$HOME/.local/bin:${THIS}/../doc/node_modules/.bin/:$PATH
|
||||
make -C doc/ html dist
|
||||
popd
|
9
taskcluster/docs-package.sh
Normal file
9
taskcluster/docs-package.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/tc-tests-utils.sh
|
||||
|
||||
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
|
||||
|
||||
cp ${DS_DSDIR}/doc/html.zip ${TASKCLUSTER_ARTIFACTS}/doc-html.zip
|
5
taskcluster/docs-requirements.txt
Normal file
5
taskcluster/docs-requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
breathe==4.13.1
|
||||
semver==2.8.1
|
||||
sphinx==2.2.0
|
||||
sphinx-js==2.8
|
||||
sphinx-rtd-theme==0.4.3
|
64
taskcluster/docs.tyml
Normal file
64
taskcluster/docs.tyml
Normal file
@ -0,0 +1,64 @@
|
||||
$if: 'event.event in build.allowed'
|
||||
then:
|
||||
taskId: ${taskcluster.taskId}
|
||||
provisionerId: ${taskcluster.docker.provisionerId}
|
||||
workerType: ${taskcluster.docker.workerType}
|
||||
taskGroupId: ${taskcluster.taskGroupId}
|
||||
schedulerId: ${taskcluster.schedulerId}
|
||||
dependencies:
|
||||
$map: { $eval: build.dependencies }
|
||||
each(b):
|
||||
$eval: as_slugid(b)
|
||||
created: { $fromNow: '0 sec' }
|
||||
deadline: { $fromNow: '1 day' }
|
||||
expires:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then: { $fromNow: '6 months' }
|
||||
else: { $fromNow: '7 days' }
|
||||
|
||||
extra:
|
||||
nc_asset_name: { $eval: build.nc_asset_name }
|
||||
github:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then: { $eval: taskcluster.github_events.merge }
|
||||
else: { $eval: taskcluster.github_events.pull_request }
|
||||
|
||||
routes:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then:
|
||||
{ $eval: build.routes }
|
||||
|
||||
payload:
|
||||
maxRunTime: { $eval: to_int(build.maxRunTime) }
|
||||
image: "ubuntu:18.04"
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
- "--login"
|
||||
- "-cxe"
|
||||
- $let:
|
||||
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
|
||||
extraSystemConfig: { $eval: strip(str(build.system_config)) }
|
||||
in: >
|
||||
apt-get -qq update && apt-get -qq -y install git wget gnupg sudo && ${extraSystemSetup} &&
|
||||
adduser --system --home ${system.homedir.linux} ${system.username} &&
|
||||
cd ${system.homedir.linux}/ &&
|
||||
echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
|
||||
sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} &&
|
||||
sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} &&
|
||||
sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package}
|
||||
|
||||
artifacts:
|
||||
"public":
|
||||
type: "directory"
|
||||
path: "/tmp/artifacts/"
|
||||
expires:
|
||||
$if: '(event.event == "push") || (event.event == "tag")'
|
||||
then: { $fromNow: '6 months' }
|
||||
else: { $fromNow: '7 days' }
|
||||
|
||||
metadata:
|
||||
name: ${build.metadata.name}
|
||||
description: ${build.metadata.description}
|
||||
owner: ${event.head.user.email}
|
||||
source: ${event.head.repo.url}
|
18
taskcluster/docs.yml
Normal file
18
taskcluster/docs.yml
Normal file
@ -0,0 +1,18 @@
|
||||
build:
|
||||
template_file: docs.tyml
|
||||
routes:
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.docs"
|
||||
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.docs"
|
||||
- "index.project.deepspeech.deepspeech.native_client.docs.${event.head.sha}"
|
||||
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||
system_setup:
|
||||
>
|
||||
${nodejs.packages_docs_bionic.prep_12} && ${nodejs.packages_docs_bionic.apt_pinning}
|
||||
&& apt-get -qq update && apt-get -qq -y install ${nodejs.packages_docs_bionic.apt} ${python.packages_docs_bionic.apt}
|
||||
scripts:
|
||||
build: "taskcluster/docs-build.sh"
|
||||
package: "taskcluster/docs-package.sh"
|
||||
metadata:
|
||||
name: "DeepSpeech API Documentation"
|
||||
description: "Building DeepSpeech API Documentation"
|
Loading…
Reference in New Issue
Block a user