Address review comments and update docs
This commit is contained in:
parent
efbed73d5c
commit
1d3b3a31a1
3
.gitattributes
vendored
3
.gitattributes
vendored
@ -1,4 +1 @@
|
|||||||
*.binary filter=lfs diff=lfs merge=lfs -crlf
|
|
||||||
data/lm/trie filter=lfs diff=lfs merge=lfs -crlf
|
|
||||||
data/lm/vocab.txt filter=lfs diff=lfs merge=lfs -text
|
|
||||||
data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text
|
data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text
|
||||||
|
@ -36,7 +36,7 @@ To install and use deepspeech all you have to do is:
|
|||||||
tar xvf audio-0.6.1.tar.gz
|
tar xvf audio-0.6.1.tar.gz
|
||||||
|
|
||||||
# Transcribe an audio file
|
# Transcribe an audio file
|
||||||
deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --lm deepspeech-0.6.1-models/lm.binary --trie deepspeech-0.6.1-models/trie --audio audio/2830-3980-0043.wav
|
deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --scorer deepspeech-0.6.1-models/kenlm.scorer --audio audio/2830-3980-0043.wav
|
||||||
|
|
||||||
A pre-trained English model is available for use and can be downloaded using `the instructions below <doc/USING.rst#using-a-pre-trained-model>`_. A package with some example audio files is available for download in our `release notes <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
|
A pre-trained English model is available for use and can be downloaded using `the instructions below <doc/USING.rst#using-a-pre-trained-model>`_. A package with some example audio files is available for download in our `release notes <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th
|
|||||||
pip3 install deepspeech-gpu
|
pip3 install deepspeech-gpu
|
||||||
|
|
||||||
# Transcribe an audio file.
|
# Transcribe an audio file.
|
||||||
deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --lm deepspeech-0.6.1-models/lm.binary --trie deepspeech-0.6.1-models/trie --audio audio/2830-3980-0043.wav
|
deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --scorer deepspeech-0.6.1-models/kenlm.scorer --audio audio/2830-3980-0043.wav
|
||||||
|
|
||||||
Please ensure you have the required `CUDA dependencies <doc/USING.rst#cuda-dependency>`_.
|
Please ensure you have the required `CUDA dependencies <doc/USING.rst#cuda-dependency>`_.
|
||||||
|
|
||||||
|
@ -5,9 +5,7 @@ This directory contains language-specific data files. Most importantly, you will
|
|||||||
|
|
||||||
1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt`
|
1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt`
|
||||||
|
|
||||||
2. A binary n-gram language model compiled by `kenlm` in `data/lm/lm.binary`
|
2. A scorer package (`data/lm/kenlm.scorer`) generated with `data/lm/generate_package.py`, which includes a binary n-gram language model generated with `data/lm/generate_lm.py`.
|
||||||
|
|
||||||
3. A trie model compiled by `generate_trie <https://github.com/mozilla/DeepSpeech#using-the-command-line-client>`_ in `data/lm/trie`
|
|
||||||
|
|
||||||
For more information on how to build these resources from scratch, see `data/lm/README.md`
|
For more information on how to build these resources from scratch, see `data/lm/README.md`
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
lm.binary was generated from the LibriSpeech normalized LM training text, available `here <http://www.openslr.org/11>`_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). `KenLM <https://github.com/kpu/kenlm>`_'s built binaries must be in your PATH (lmplz, build_binary, filter).
|
The LM binary was generated from the LibriSpeech normalized LM training text, available `here <http://www.openslr.org/11>`_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). `KenLM <https://github.com/kpu/kenlm>`_'s built binaries must be in your PATH (lmplz, build_binary, filter).
|
||||||
|
|
||||||
The trie was then generated from the vocabulary of the language model:
|
The scorer package was then built using the `generate_package.py` script:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
./generate_trie ../data/alphabet.txt lm.binary trie
|
python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --default_alpha 0.75 --default_beta 1.85 --package kenlm.scorer
|
||||||
|
@ -39,10 +39,13 @@ def main():
|
|||||||
'--prune', '0', '0', '1'
|
'--prune', '0', '0', '1'
|
||||||
])
|
])
|
||||||
|
|
||||||
# Filter LM using vocabulary of top 500k words
|
|
||||||
filtered_path = os.path.join(tmp, 'lm_filtered.arpa')
|
|
||||||
vocab_str = '\n'.join(word for word, count in counter.most_common(500000))
|
vocab_str = '\n'.join(word for word, count in counter.most_common(500000))
|
||||||
|
with open('librispeech-vocab-500k.txt', 'w') as fout:
|
||||||
|
fout.write(vocab_str)
|
||||||
|
|
||||||
|
# Filter LM using vocabulary of top 500k words
|
||||||
print('Filtering ARPA file...')
|
print('Filtering ARPA file...')
|
||||||
|
filtered_path = os.path.join(tmp, 'lm_filtered.arpa')
|
||||||
subprocess.run(['filter', 'single', 'model:{}'.format(lm_path), filtered_path], input=vocab_str.encode('utf-8'), check=True)
|
subprocess.run(['filter', 'single', 'model:{}'.format(lm_path), filtered_path], input=vocab_str.encode('utf-8'), check=True)
|
||||||
|
|
||||||
# Quantize and produce trie binary.
|
# Quantize and produce trie binary.
|
||||||
|
@ -41,6 +41,7 @@ def create_bundle(
|
|||||||
|
|
||||||
if force_utf8 != None: # pylint: disable=singleton-comparison
|
if force_utf8 != None: # pylint: disable=singleton-comparison
|
||||||
use_utf8 = force_utf8.value
|
use_utf8 = force_utf8.value
|
||||||
|
print("Forcing UTF-8 mode = {}".format(use_utf8))
|
||||||
else:
|
else:
|
||||||
use_utf8 = vocab_looks_char_based
|
use_utf8 = vocab_looks_char_based
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ Creating a model instance and loading model
|
|||||||
.. literalinclude:: ../native_client/client.cc
|
.. literalinclude:: ../native_client/client.cc
|
||||||
:language: c
|
:language: c
|
||||||
:linenos:
|
:linenos:
|
||||||
:lines: 370-388
|
:lines: 370-390
|
||||||
|
|
||||||
Performing inference
|
Performing inference
|
||||||
--------------------
|
--------------------
|
||||||
|
@ -7,6 +7,12 @@ Model
|
|||||||
.. js:autoclass:: Model
|
.. js:autoclass:: Model
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
Stream
|
||||||
|
------
|
||||||
|
|
||||||
|
.. js:autoclass:: Stream
|
||||||
|
:members:
|
||||||
|
|
||||||
Module exported methods
|
Module exported methods
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ Creating a model instance and loading model
|
|||||||
.. literalinclude:: ../native_client/javascript/client.js
|
.. literalinclude:: ../native_client/javascript/client.js
|
||||||
:language: javascript
|
:language: javascript
|
||||||
:linenos:
|
:linenos:
|
||||||
:lines: 57-66
|
:lines: 54-72
|
||||||
|
|
||||||
Performing inference
|
Performing inference
|
||||||
--------------------
|
--------------------
|
||||||
@ -15,7 +15,7 @@ Performing inference
|
|||||||
.. literalinclude:: ../native_client/javascript/client.js
|
.. literalinclude:: ../native_client/javascript/client.js
|
||||||
:language: javascript
|
:language: javascript
|
||||||
:linenos:
|
:linenos:
|
||||||
:lines: 115-117
|
:lines: 117-121
|
||||||
|
|
||||||
Full source code
|
Full source code
|
||||||
----------------
|
----------------
|
||||||
|
@ -9,6 +9,12 @@ Model
|
|||||||
.. autoclass:: Model
|
.. autoclass:: Model
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
Stream
|
||||||
|
------
|
||||||
|
|
||||||
|
.. autoclass:: Stream
|
||||||
|
:members:
|
||||||
|
|
||||||
Metadata
|
Metadata
|
||||||
--------
|
--------
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ Creating a model instance and loading model
|
|||||||
.. literalinclude:: ../native_client/python/client.py
|
.. literalinclude:: ../native_client/python/client.py
|
||||||
:language: python
|
:language: python
|
||||||
:linenos:
|
:linenos:
|
||||||
:lines: 69, 78
|
:lines: 111, 120
|
||||||
|
|
||||||
Performing inference
|
Performing inference
|
||||||
--------------------
|
--------------------
|
||||||
@ -15,7 +15,7 @@ Performing inference
|
|||||||
.. literalinclude:: ../native_client/python/client.py
|
.. literalinclude:: ../native_client/python/client.py
|
||||||
:language: python
|
:language: python
|
||||||
:linenos:
|
:linenos:
|
||||||
:lines: 95-98
|
:lines: 140-145
|
||||||
|
|
||||||
Full source code
|
Full source code
|
||||||
----------------
|
----------------
|
||||||
|
@ -106,9 +106,9 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio my_audio_file.wav
|
deepspeech --model models/output_graph.pbmm --scorer models/kenlm.scorer --audio my_audio_file.wav
|
||||||
|
|
||||||
The arguments ``--lm`` and ``--trie`` are optional, and represent a language model.
|
The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio.
|
||||||
|
|
||||||
See :github:`client.py <native_client/python/client.py>` for an example of how to use the package programatically.
|
See :github:`client.py <native_client/python/client.py>` for an example of how to use the package programatically.
|
||||||
|
|
||||||
@ -162,7 +162,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav
|
./deepspeech --model models/output_graph.pbmm --scorer models/kenlm.scorer --audio audio_input.wav
|
||||||
|
|
||||||
See the help output with ``./deepspeech -h`` and the :github:`native client README <native_client/README.rst>` for more details.
|
See the help output with ``./deepspeech -h`` and the :github:`native client README <native_client/README.rst>` for more details.
|
||||||
|
|
||||||
|
@ -59,11 +59,11 @@ void PrintHelp(const char* bin)
|
|||||||
|
|
||||||
bool ProcessArgs(int argc, char** argv)
|
bool ProcessArgs(int argc, char** argv)
|
||||||
{
|
{
|
||||||
const char* const short_opts = "m:a:s:r:w:c:d:b:tehv";
|
const char* const short_opts = "m:l:a:b:c:d:tejs:vh";
|
||||||
const option long_opts[] = {
|
const option long_opts[] = {
|
||||||
{"model", required_argument, nullptr, 'm'},
|
{"model", required_argument, nullptr, 'm'},
|
||||||
{"scorer", required_argument, nullptr, 'l'},
|
{"scorer", required_argument, nullptr, 'l'},
|
||||||
{"audio", required_argument, nullptr, 'w'},
|
{"audio", required_argument, nullptr, 'a'},
|
||||||
{"beam_width", required_argument, nullptr, 'b'},
|
{"beam_width", required_argument, nullptr, 'b'},
|
||||||
{"lm_alpha", required_argument, nullptr, 'c'},
|
{"lm_alpha", required_argument, nullptr, 'c'},
|
||||||
{"lm_beta", required_argument, nullptr, 'd'},
|
{"lm_beta", required_argument, nullptr, 'd'},
|
||||||
@ -71,8 +71,8 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
{"extended", no_argument, nullptr, 'e'},
|
{"extended", no_argument, nullptr, 'e'},
|
||||||
{"json", no_argument, nullptr, 'j'},
|
{"json", no_argument, nullptr, 'j'},
|
||||||
{"stream", required_argument, nullptr, 's'},
|
{"stream", required_argument, nullptr, 's'},
|
||||||
{"help", no_argument, nullptr, 'h'},
|
|
||||||
{"version", no_argument, nullptr, 'v'},
|
{"version", no_argument, nullptr, 'v'},
|
||||||
|
{"help", no_argument, nullptr, 'h'},
|
||||||
{nullptr, no_argument, nullptr, 0}
|
{nullptr, no_argument, nullptr, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -93,7 +93,7 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
scorer = optarg;
|
scorer = optarg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'w':
|
case 'a':
|
||||||
audio = optarg;
|
audio = optarg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -115,10 +115,6 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
show_times = true;
|
show_times = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'v':
|
|
||||||
has_versions = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'e':
|
case 'e':
|
||||||
extended_metadata = true;
|
extended_metadata = true;
|
||||||
break;
|
break;
|
||||||
@ -131,6 +127,10 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
stream_size = atoi(optarg);
|
stream_size = atoi(optarg);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'v':
|
||||||
|
has_versions = true;
|
||||||
|
break;
|
||||||
|
|
||||||
case 'h': // -h or --help
|
case 'h': // -h or --help
|
||||||
case '?': // Unrecognized option
|
case '?': // Unrecognized option
|
||||||
default:
|
default:
|
||||||
|
@ -12,11 +12,11 @@ class Scorer(swigwrapper.Scorer):
|
|||||||
:type alpha: float
|
:type alpha: float
|
||||||
:param beta: Word insertion bonus.
|
:param beta: Word insertion bonus.
|
||||||
:type beta: float
|
:type beta: float
|
||||||
:model_path: Path to load scorer.
|
:scorer_path: Path to load scorer from.
|
||||||
:alphabet: Alphabet
|
:alphabet: Alphabet
|
||||||
:type model_path: basestring
|
:type scorer_path: basestring
|
||||||
"""
|
"""
|
||||||
def __init__(self, alpha=None, beta=None, model_path=None, alphabet=None):
|
def __init__(self, alpha=None, beta=None, scorer_path=None, alphabet=None):
|
||||||
super(Scorer, self).__init__()
|
super(Scorer, self).__init__()
|
||||||
# Allow bare initialization
|
# Allow bare initialization
|
||||||
if alphabet:
|
if alphabet:
|
||||||
@ -26,7 +26,7 @@ class Scorer(swigwrapper.Scorer):
|
|||||||
if err != 0:
|
if err != 0:
|
||||||
raise ValueError("Error when deserializing alphabet.")
|
raise ValueError("Error when deserializing alphabet.")
|
||||||
|
|
||||||
err = self.init(model_path.encode('utf-8'),
|
err = self.init(scorer_path.encode('utf-8'),
|
||||||
native_alphabet)
|
native_alphabet)
|
||||||
if err != 0:
|
if err != 0:
|
||||||
raise ValueError("Scorer initialization failed with error code {}".format(err), err)
|
raise ValueError("Scorer initialization failed with error code {}".format(err), err)
|
||||||
|
@ -36,7 +36,7 @@ DecoderState::init(const Alphabet& alphabet,
|
|||||||
prefix_root_.reset(root);
|
prefix_root_.reset(root);
|
||||||
prefixes_.push_back(root);
|
prefixes_.push_back(root);
|
||||||
|
|
||||||
if (ext_scorer != nullptr && (bool)ext_scorer_->dictionary) {
|
if (ext_scorer != nullptr && (bool)(ext_scorer_->dictionary)) {
|
||||||
// no need for std::make_shared<>() since Copy() does 'new' behind the doors
|
// no need for std::make_shared<>() since Copy() does 'new' behind the doors
|
||||||
auto dict_ptr = std::shared_ptr<PathTrie::FstType>(ext_scorer->dictionary->Copy(true));
|
auto dict_ptr = std::shared_ptr<PathTrie::FstType>(ext_scorer->dictionary->Copy(true));
|
||||||
root->set_dictionary(dict_ptr);
|
root->set_dictionary(dict_ptr);
|
||||||
|
@ -51,7 +51,7 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
|
|||||||
|
|
||||||
|
|
||||||
* ``output_graph.tflite`` which is the TF Lite model
|
* ``output_graph.tflite`` which is the TF Lite model
|
||||||
* ``lm.binary`` and ``trie`` files, if you want to use the language model ; please
|
* ``kenlm.scorer``, if you want to use the language model ; please
|
||||||
be aware that too big language model will make the device run out of memory
|
be aware that too big language model will make the device run out of memory
|
||||||
|
|
||||||
Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ :
|
Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ :
|
||||||
|
@ -123,6 +123,11 @@ Model.prototype.createStream = function() {
|
|||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @class
|
||||||
|
* Provides an interface to a DeepSpeech stream. The constructor cannot be called
|
||||||
|
* directly, use :js:func:`Model.createStream`.
|
||||||
|
*/
|
||||||
function Stream(nativeStream) {
|
function Stream(nativeStream) {
|
||||||
this._impl = nativeStream;
|
this._impl = nativeStream;
|
||||||
}
|
}
|
||||||
|
@ -131,6 +131,10 @@ class Model(object):
|
|||||||
|
|
||||||
|
|
||||||
class Stream(object):
|
class Stream(object):
|
||||||
|
"""
|
||||||
|
Class wrapping a DeepSpeech stream. The constructor cannot be called directly.
|
||||||
|
Use :func:`Model.createStream()`
|
||||||
|
"""
|
||||||
def __init__(self, native_stream):
|
def __init__(self, native_stream):
|
||||||
self._impl = native_stream
|
self._impl = native_stream
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ then:
|
|||||||
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
|
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
|
||||||
PIP_DEFAULT_TIMEOUT: "60"
|
PIP_DEFAULT_TIMEOUT: "60"
|
||||||
EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples"
|
EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples"
|
||||||
EXAMPLES_CHECKOUT_TARGET: "f3dee7910d1642e14b1e3877568f8342c1c22e05"
|
EXAMPLES_CHECKOUT_TARGET: "4b97ac41d03ca0d23fa92526433db72a90f47d4a"
|
||||||
|
|
||||||
command:
|
command:
|
||||||
- "/bin/bash"
|
- "/bin/bash"
|
||||||
|
@ -44,7 +44,7 @@ payload:
|
|||||||
MSYS: 'winsymlinks:nativestrict'
|
MSYS: 'winsymlinks:nativestrict'
|
||||||
TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow}
|
TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow}
|
||||||
EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples"
|
EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples"
|
||||||
EXAMPLES_CHECKOUT_TARGET: "f3dee7910d1642e14b1e3877568f8342c1c22e05"
|
EXAMPLES_CHECKOUT_TARGET: "4b97ac41d03ca0d23fa92526433db72a90f47d4a"
|
||||||
|
|
||||||
command:
|
command:
|
||||||
- >-
|
- >-
|
||||||
|
Loading…
Reference in New Issue
Block a user