From 4c7d5fb0e1b3e4ed4d929ed4fbb5dbbae7d1d8d6 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Wed, 8 Jan 2020 10:02:46 +0100 Subject: [PATCH] Publish README/USING/TRAINING to readthedocs Fixes #2581 --- README.rst | 48 +++++++++++++++---------------- doc/DeepSpeech.rst | 4 +-- TRAINING.rst => doc/TRAINING.rst | 10 +++---- USING.rst => doc/USING.rst | 20 +++++++------ doc/conf.py | 4 +++ doc/index.rst | 7 ++++- native_client/tflitemodelstate.cc | 3 +- native_client/tfmodelstate.cc | 3 +- 8 files changed, 56 insertions(+), 43 deletions(-) rename TRAINING.rst => doc/TRAINING.rst (94%) rename USING.rst => doc/USING.rst (91%) diff --git a/README.rst b/README.rst index bf3ae382..77a42e08 100644 --- a/README.rst +++ b/README.rst @@ -38,7 +38,7 @@ To install and use deepspeech all you have to do is: # Transcribe an audio file deepspeech --model deepspeech-0.6.0-models/output_graph.pbmm --lm deepspeech-0.6.0-models/lm.binary --trie deepspeech-0.6.0-models/trie --audio audio/2830-3980-0043.wav -A pre-trained English model is available for use and can be downloaded using `the instructions below `_. A package with some example audio files is available for download in our `release notes `_. +A pre-trained English model is available for use and can be downloaded using `the instructions below `_. A package with some example audio files is available for download in our `release notes `_. Quicker inference can be performed using a supported NVIDIA GPU on Linux. See the `release notes `_ to find which GPUs are supported. To run ``deepspeech`` on a GPU, install the GPU specific package: @@ -54,7 +54,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th # Transcribe an audio file. deepspeech --model deepspeech-0.6.0-models/output_graph.pbmm --lm deepspeech-0.6.0-models/lm.binary --trie deepspeech-0.6.0-models/trie --audio audio/2830-3980-0043.wav -Please ensure you have the required `CUDA dependencies `_. +Please ensure you have the required `CUDA dependencies `_. See the output of ``deepspeech -h`` for more information on the use of ``deepspeech``. (If you experience problems running ``deepspeech``\ , please check `required runtime dependencies `_\ ). @@ -62,34 +62,34 @@ See the output of ``deepspeech -h`` for more information on the use of ``deepspe **Table of Contents** -* `Using a Pre-trained Model `_ +* `Using a Pre-trained Model `_ - * `CUDA dependency `_ - * `Getting the pre-trained model `_ - * `Model compatibility `_ - * `Using the Python package `_ - * `Using the Node.JS package `_ - * `Using the Command Line client `_ - * `Installing bindings from source `_ - * `Third party bindings `_ + * `CUDA dependency `_ + * `Getting the pre-trained model `_ + * `Model compatibility `_ + * `Using the Python package `_ + * `Using the Node.JS package `_ + * `Using the Command Line client `_ + * `Installing bindings from source `_ + * `Third party bindings `_ * `Trying out DeepSpeech with examples `_ -* `Training your own Model `_ +* `Training your own Model `_ - * `Prerequisites for training a model `_ - * `Getting the training code `_ - * `Installing Python dependencies `_ - * `Recommendations `_ - * `Common Voice training data `_ - * `Training a model `_ - * `Checkpointing `_ - * `Exporting a model for inference `_ - * `Exporting a model for TFLite `_ - * `Making a mmap-able model for inference `_ - * `Continuing training from a release model `_ - * `Training with Augmentation `_ + * `Prerequisites for training a model `_ + * `Getting the training code `_ + * `Installing Python dependencies `_ + * `Recommendations `_ + * `Common Voice training data `_ + * `Training a model `_ + * `Checkpointing `_ + * `Exporting a model for inference `_ + * `Exporting a model for TFLite `_ + * `Making a mmap-able model for inference `_ + * `Continuing training from a release model `_ + * `Training with Augmentation `_ * `Contribution guidelines `_ * `Contact/Getting Help `_ diff --git a/doc/DeepSpeech.rst b/doc/DeepSpeech.rst index cd6838f1..3d74d22e 100644 --- a/doc/DeepSpeech.rst +++ b/doc/DeepSpeech.rst @@ -1,5 +1,5 @@ -Introduction -============ +DeepSpeech Model +================ The aim of this project is to create a simple, open, and ubiquitous speech recognition engine. Simple, in that the engine should not require server-class diff --git a/TRAINING.rst b/doc/TRAINING.rst similarity index 94% rename from TRAINING.rst rename to doc/TRAINING.rst index 4f58e960..c38e34ff 100644 --- a/TRAINING.rst +++ b/doc/TRAINING.rst @@ -54,7 +54,7 @@ You'll also need to install the ``ds_ctcdecoder`` Python package. ``ds_ctcdecode pip3 install $(python3 util/taskcluster.py --decoder) -This command will download and install the ``ds_ctcdecoder`` package. You can override the platform with ``--arch`` if you want the package for ARM7 (\ ``--arch arm``\ ) or ARM64 (\ ``--arch arm64``\ ). If you prefer building the ``ds_ctcdecoder`` package from source, see the `native_client README file `_. +This command will download and install the ``ds_ctcdecoder`` package. You can override the platform with ``--arch`` if you want the package for ARM7 (\ ``--arch arm``\ ) or ARM64 (\ ``--arch arm64``\ ). If you prefer building the ``ds_ctcdecoder`` package from source, see the :github:`native_client README file `. Recommendations ^^^^^^^^^^^^^^^ @@ -124,9 +124,9 @@ The central (Python) script is ``DeepSpeech.py`` in the project's root directory ./DeepSpeech.py --helpfull -To get the output of this in a slightly better-formatted way, you can also look up the option definitions in `\ ``util/flags.py`` `_. +To get the output of this in a slightly better-formatted way, you can also look up the option definitions in :github:`util/flags.py `. -For executing pre-configured training scenarios, there is a collection of convenience scripts in the ``bin`` folder. Most of them are named after the corpora they are configured for. Keep in mind that most speech corpora are *very large*\ , on the order of tens of gigabytes, and some aren't free. Downloading and preprocessing them can take a very long time, and training on them without a fast GPU (GTX 10 series or newer recommended) takes even longer. +For executing pre-configured training scenarios, there is a collection of convenience scripts in the ``bin`` folder. Most of them are named after the corpora they are configured for. Keep in mind that most speech corpora are *very large*, on the order of tens of gigabytes, and some aren't free. Downloading and preprocessing them can take a very long time, and training on them without a fast GPU (GTX 10 series or newer recommended) takes even longer. **If you experience GPU OOM errors while training, try reducing the batch size with the ``--train_batch_size``\ , ``--dev_batch_size`` and ``--test_batch_size`` parameters.** @@ -136,7 +136,7 @@ As a simple first example you can open a terminal, change to the directory of th ./bin/run-ldc93s1.sh -This script will train on a small sample dataset composed of just a single audio file, the sample file for the `TIMIT Acoustic-Phonetic Continuous Speech Corpus `_\ , which can be overfitted on a GPU in a few minutes for demonstration purposes. From here, you can alter any variables with regards to what dataset is used, how many training iterations are run and the default values of the network parameters. +This script will train on a small sample dataset composed of just a single audio file, the sample file for the `TIMIT Acoustic-Phonetic Continuous Speech Corpus `_, which can be overfitted on a GPU in a few minutes for demonstration purposes. From here, you can alter any variables with regards to what dataset is used, how many training iterations are run and the default values of the network parameters. Feel also free to pass additional (or overriding) ``DeepSpeech.py`` parameters to these scripts. Then, just run the script to train the modified network. @@ -168,7 +168,7 @@ Exporting a model for inference ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If the ``--export_dir`` parameter is provided, a model will have been exported to this directory during training. -Refer to the corresponding `README.rst `_ for information on building and running a client that can use the exported model. +Refer to the corresponding :github:`README.rst ` for information on building and running a client that can use the exported model. Exporting a model for TFLite ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/USING.rst b/doc/USING.rst similarity index 91% rename from USING.rst rename to doc/USING.rst index 781ee67f..9a2f6888 100644 --- a/USING.rst +++ b/doc/USING.rst @@ -7,7 +7,7 @@ Inference using a DeepSpeech pre-trained model can be done with a client/languag * `The Python package/language binding <#using-the-python-package>`_ * `The Node.JS package/language binding <#using-the-nodejs-package>`_ * `The Command-Line client <#using-the-command-line-client>`_ -* `The .NET client/language binding `_ +* :github:`The .NET client/language binding ` Running ``deepspeech`` might, see below, require some runtime dependencies to be already installed on your system: @@ -110,18 +110,20 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett The arguments ``--lm`` and ``--trie`` are optional, and represent a language model. -See `client.py `_ for an example of how to use the package programatically. +See :github:`client.py ` for an example of how to use the package programatically. -Using the Node.JS package -^^^^^^^^^^^^^^^^^^^^^^^^^ +Using the Node.JS / Electron.JS package +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You can download the Node.JS bindings using ``npm``\ : +You can download the JS bindings using ``npm``\ : .. code-block:: bash npm install deepspeech -Please note that as of now, we only support Node.JS versions 4, 5 and 6. Once `SWIG has support `_ we can build for newer versions. +Please note that as of now, we support: + - Node.JS versions 4 to 13. + - Electron.JS versions 1.6 to 7.1 Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows: @@ -131,7 +133,7 @@ Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can in See the `release notes `_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_. -See `client.js `_ for an example of how to use the bindings. Or download the `wav example `_. +See :github:`client.js ` for an example of how to use the bindings. Using the Command-Line client ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -162,12 +164,12 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett ./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav -See the help output with ``./deepspeech -h`` and the `native client README `_ for more details. +See the help output with ``./deepspeech -h`` and the :github:`native client README ` for more details. Installing bindings from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If pre-built binaries aren't available for your system, you'll need to install them from scratch. Follow these `\ ``native_client`` installation instructions `_. +If pre-built binaries aren't available for your system, you'll need to install them from scratch. Follow these :github:`native client installation instructions `. Third party bindings ^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/conf.py b/doc/conf.py index 76103123..1ee683e6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -64,6 +64,7 @@ release = v # ones. extensions = [ 'sphinx.ext.autodoc', + 'sphinx.ext.extlinks', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', @@ -194,3 +195,6 @@ texinfo_documents = [ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} + +extlinks = {'github': ('https://github.com/mozilla/DeepSpeech/blob/v{}/%s'.format(release), + '%s')} diff --git a/doc/index.rst b/doc/index.rst index a408677a..a905d903 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -10,13 +10,18 @@ Welcome to DeepSpeech's documentation! :maxdepth: 2 :caption: Introduction - DeepSpeech + USING + + TRAINING .. toctree:: :maxdepth: 2 :caption: DeepSpeech Model + DeepSpeech + Geometry + ParallelOptimization .. toctree:: diff --git a/native_client/tflitemodelstate.cc b/native_client/tflitemodelstate.cc index b8d491ee..2135a571 100644 --- a/native_client/tflitemodelstate.cc +++ b/native_client/tflitemodelstate.cc @@ -177,7 +177,8 @@ TFLiteModelState::init(const char* model_path, std::cerr << "Specified model file version (" << *graph_version << ") is " << "incompatible with minimum version supported by this client (" << ds_graph_version() << "). See " - << "https://github.com/mozilla/DeepSpeech/blob/master/USING.rst#model-compatibility " + << "https://github.com/mozilla/DeepSpeech/blob/" + << ds_git_version() << "/doc/USING.rst#model-compatibility " << "for more information" << std::endl; return DS_ERR_MODEL_INCOMPATIBLE; } diff --git a/native_client/tfmodelstate.cc b/native_client/tfmodelstate.cc index e9b45755..5cb1a0c3 100644 --- a/native_client/tfmodelstate.cc +++ b/native_client/tfmodelstate.cc @@ -91,7 +91,8 @@ TFModelState::init(const char* model_path, std::cerr << "Specified model file version (" << graph_version << ") is " << "incompatible with minimum version supported by this client (" << ds_graph_version() << "). See " - << "https://github.com/mozilla/DeepSpeech/blob/master/USING.rst#model-compatibility " + << "https://github.com/mozilla/DeepSpeech/blob/" + << ds_git_version() << "/doc/USING.rst#model-compatibility " << "for more information" << std::endl; return DS_ERR_MODEL_INCOMPATIBLE; }