From 186bb63b573c8b3fa420ad5a2d26caf20b9639d8 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 27 Aug 2021 14:13:26 +0200 Subject: [PATCH 01/11] Documentation cleanup pass to match recent changes --- doc/AUGMENTATION.rst | 68 ++++++++++--------- doc/BYTE_OUTPUT_MODE.rst | 8 --- doc/COMMON_VOICE_DATA.rst | 10 +-- doc/DEPLOYMENT.rst | 77 +++++++--------------- doc/EXPORTING_MODELS.rst | 49 ++------------ doc/LANGUAGE_MODEL.rst | 2 +- doc/MIXED_PRECISION.rst | 10 +-- doc/TRAINING_ADVANCED.rst | 32 +++++---- doc/TRAINING_FLAGS.rst | 4 +- doc/TRAINING_INTRO.rst | 38 +++++------ doc/TRANSFER_LEARNING.rst | 16 ++--- doc/conf.py | 6 +- doc/index.rst | 4 +- doc/static/custom.css | 3 + train.py | 4 ++ training/coqui_stt_training/export.py | 2 +- training/coqui_stt_training/train.py | 16 +++-- training/coqui_stt_training/util/config.py | 8 +-- 18 files changed, 152 insertions(+), 205 deletions(-) delete mode 100644 doc/BYTE_OUTPUT_MODE.rst create mode 100644 doc/static/custom.css diff --git a/doc/AUGMENTATION.rst b/doc/AUGMENTATION.rst index 0b168f7d..68549a4a 100644 --- a/doc/AUGMENTATION.rst +++ b/doc/AUGMENTATION.rst @@ -21,7 +21,7 @@ For example, for the ``overlay`` augmentation: .. code-block:: - python3 train.py --augment overlay[p=0.1,source=/path/to/audio.sdb,snr=20.0] ... + python -m coqui_stt_training.train --augment "overlay[p=0.1,source=/path/to/audio.sdb,snr=20.0]" ... In the documentation below, whenever a value is specified as ```` or ````, it supports one of the follow formats: @@ -55,7 +55,7 @@ Within a single domain, augmentations are applied in the same order as they appe Sample domain augmentations --------------------------- -**Overlay augmentation** ``--augment overlay[p=,source=,snr=,layers=]`` +**Overlay augmentation** ``--augment "overlay[p=,source=,snr=,layers=]"`` Layers another audio source (multiple times) onto augmented samples. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -67,7 +67,7 @@ Sample domain augmentations * **layers**: number of layers added onto the sample (e.g. 10 layers of speech to get "cocktail-party effect"). A layer is just a sample of the same duration as the sample to augment. It gets stitched together from as many source samples as required. -**Reverb augmentation** ``--augment reverb[p=,delay=,decay=]`` +**Reverb augmentation** ``--augment "reverb[p=,delay=,decay=]"`` Adds simplified (no all-pass filters) `Schroeder reverberation `_ to the augmented samples. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -77,7 +77,7 @@ Sample domain augmentations * **decay**: sound decay in dB per reflection - higher values will result in a less reflective perceived "room" -**Resample augmentation** ``--augment resample[p=,rate=]`` +**Resample augmentation** ``--augment "resample[p=,rate=]"`` Resamples augmented samples to another sample rate and then resamples back to the original sample rate. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -85,7 +85,7 @@ Sample domain augmentations * **rate**: sample-rate to re-sample to -**Codec augmentation** ``--augment codec[p=,bitrate=]`` +**Codec augmentation** ``--augment "codec[p=,bitrate=]"`` Compresses and then decompresses augmented samples using the lossy Opus audio codec. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -93,7 +93,7 @@ Sample domain augmentations * **bitrate**: bitrate used during compression -**Volume augmentation** ``--augment volume[p=,dbfs=]`` +**Volume augmentation** ``--augment "volume[p=,dbfs=]"`` Measures and levels augmented samples to a target dBFS value. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -103,7 +103,7 @@ Sample domain augmentations Spectrogram domain augmentations -------------------------------- -**Pitch augmentation** ``--augment pitch[p=,pitch=]`` +**Pitch augmentation** ``--augment "pitch[p=,pitch=]"`` Scales spectrogram on frequency axis and thus changes pitch. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -111,7 +111,7 @@ Spectrogram domain augmentations * **pitch**: pitch factor by with the frequency axis is scaled (e.g. a value of 2.0 will raise audio frequency by one octave) -**Tempo augmentation** ``--augment tempo[p=,factor=]`` +**Tempo augmentation** ``--augment "tempo[p=,factor=]"`` Scales spectrogram on time axis and thus changes playback tempo. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -119,7 +119,7 @@ Spectrogram domain augmentations * **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) -**Warp augmentation** ``--augment warp[p=,nt=,nf=,wt=,wf=]`` +**Warp augmentation** ``--augment "warp[p=,nt=,nf=,wt=,wf=]"`` Applies a non-linear image warp to the spectrogram. This is achieved by randomly shifting a grid of equally distributed warp points along time and frequency axis. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -133,7 +133,7 @@ Spectrogram domain augmentations * **wf**: standard deviation of the random shift applied to warp points along frequency axis (0.0 = no warp, 1.0 = half the distance to the neighbour point) -**Frequency mask augmentation** ``--augment frequency_mask[p=,n=,size=]`` +**Frequency mask augmentation** ``--augment "frequency_mask[p=,n=,size=]"`` Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -145,7 +145,7 @@ Spectrogram domain augmentations Multi domain augmentations -------------------------- -**Time mask augmentation** ``--augment time_mask[p=,n=,size=,domain=]`` +**Time mask augmentation** ``--augment "time_mask[p=,n=,size=,domain=]"`` Sets time-intervals within the augmented samples to zero (silence) at random positions. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -157,7 +157,7 @@ Multi domain augmentations * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) -**Dropout augmentation** ``--augment dropout[p=,rate=,domain=]`` +**Dropout augmentation** ``--augment "dropout[p=,rate=,domain=]"`` Zeros random data points of the targeted data representation. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -167,7 +167,7 @@ Multi domain augmentations * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) -**Add augmentation** ``--augment add[p=,stddev=,domain=]`` +**Add augmentation** ``--augment "add[p=,stddev=,domain=]"`` Adds random values picked from a normal distribution (with a mean of 0.0) to all data points of the targeted data representation. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -177,7 +177,7 @@ Multi domain augmentations * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" -**Multiply augmentation** ``--augment multiply[p=,stddev=,domain=]`` +**Multiply augmentation** ``--augment "multiply[p=,stddev=,domain=]"`` Multiplies all data points of the targeted data representation with random values picked from a normal distribution (with a mean of 1.0). * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -191,24 +191,22 @@ Example training with all augmentations: .. code-block:: bash - python -u train.py \ + python -m coqui_stt_training.train \ --train_files "train.sdb" \ - --feature_cache ./feature.cache \ - --cache_for_epochs 10 \ --epochs 100 \ - --augment overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10] \ - --augment reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0] \ - --augment resample[p=0.1,rate=12000:8000~4000] \ - --augment codec[p=0.1,bitrate=48000:16000] \ - --augment volume[p=0.1,dbfs=-10:-40] \ - --augment pitch[p=0.1,pitch=1~0.2] \ - --augment tempo[p=0.1,factor=1~0.5] \ - --augment warp[p=0.1,nt=4,nf=1,wt=0.5:1.0,wf=0.1:0.2] \ - --augment frequency_mask[p=0.1,n=1:3,size=1:5] \ - --augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \ - --augment dropout[p=0.1,rate=0.05] \ - --augment add[p=0.1,domain=signal,stddev=0~0.5] \ - --augment multiply[p=0.1,domain=features,stddev=0~0.5] \ + --augment "overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10]" \ + --augment "reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0]" \ + --augment "resample[p=0.1,rate=12000:8000~4000]" \ + --augment "codec[p=0.1,bitrate=48000:16000]" \ + --augment "volume[p=0.1,dbfs=-10:-40]" \ + --augment "pitch[p=0.1,pitch=1~0.2]" \ + --augment "tempo[p=0.1,factor=1~0.5]" \ + --augment "warp[p=0.1,nt=4,nf=1,wt=0.5:1.0,wf=0.1:0.2]" \ + --augment "frequency_mask[p=0.1,n=1:3,size=1:5]" \ + --augment "time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40]" \ + --augment "dropout[p=0.1,rate=0.05]" \ + --augment "add[p=0.1,domain=signal,stddev=0~0.5]" \ + --augment "multiply[p=0.1,domain=features,stddev=0~0.5]" \ [...] @@ -218,20 +216,20 @@ Example of playing all samples with reverberation and maximized volume: .. code-block:: bash - bin/play.py --augment reverb[p=0.1,delay=50.0,decay=2.0] --augment volume --random test.sdb + bin/play.py --augment "reverb[p=0.1,delay=50.0,decay=2.0]" --augment volume --random test.sdb Example simulation of the codec augmentation of a wav-file first at the beginning and then at the end of an epoch: .. code-block:: bash - bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 0.0 test.wav - bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 1.0 test.wav + bin/play.py --augment "codec[p=0.1,bitrate=48000:16000]" --clock 0.0 test.wav + bin/play.py --augment "codec[p=0.1,bitrate=48000:16000]" --clock 1.0 test.wav Example of creating a pre-augmented test set: .. code-block:: bash bin/data_set_tool.py \ - --augment overlay[source=noise.sdb,layers=1,snr=20~10] \ - --augment resample[rate=12000:8000~4000] \ + --augment "overlay[source=noise.sdb,layers=1,snr=20~10]" \ + --augment "resample[rate=12000:8000~4000]" \ test.sdb test-augmented.sdb diff --git a/doc/BYTE_OUTPUT_MODE.rst b/doc/BYTE_OUTPUT_MODE.rst deleted file mode 100644 index 6b195c38..00000000 --- a/doc/BYTE_OUTPUT_MODE.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. _byte-output-mode: - -Training in byte output mode -============================= - -🐸STT includes a ``byte output mode`` which can be useful when working with languages with very large alphabets, such as Mandarin Chinese. - -This training mode is experimental, and has only been used for Mandarin Chinese. diff --git a/doc/COMMON_VOICE_DATA.rst b/doc/COMMON_VOICE_DATA.rst index 110825fb..ffd23a8a 100644 --- a/doc/COMMON_VOICE_DATA.rst +++ b/doc/COMMON_VOICE_DATA.rst @@ -32,13 +32,13 @@ The CSV files contain the following fields: * ``wav_filesize`` - samples size given in bytes, used for sorting the data before training. Expects integer * ``transcript`` - transcription target for the sample -To use Common Voice data for training, validation and testing, you should pass the ``CSV`` filenames to ``train.py`` via ``--train_files``, ``--dev_files``, ``--test_files``. +To use Common Voice data for training, validation and testing, you should pass the ``CSV`` filenames via ``--train_files``, ``--dev_files``, ``--test_files``. For example, if you download, extracted, and imported the French language data from Common Voice, you will have a new local directory named ``fr``. You can train STT with this new French data as such: .. code-block:: bash - $ python3 train.py \ - --train_files fr/clips/train.csv \ - --dev_files fr/clips/dev.csv \ - --test_files fr/clips/test.csv + $ python -m coqui_stt_training.train \ + --train_files fr/clips/train.csv \ + --dev_files fr/clips/dev.csv \ + --test_files fr/clips/test.csv diff --git a/doc/DEPLOYMENT.rst b/doc/DEPLOYMENT.rst index ab0f0176..5fa6eace 100644 --- a/doc/DEPLOYMENT.rst +++ b/doc/DEPLOYMENT.rst @@ -10,31 +10,26 @@ Introduction Deployment is the process of feeding audio (speech) into a trained 🐸STT model and receiving text (transcription) as output. In practice you probably want to use two models for deployment: an audio model and a text model. The audio model (a.k.a. the acoustic model) is a deep neural network which converts audio into text. The text model (a.k.a. the language model / scorer) returns the likelihood of a string of text. If the acoustic model makes spelling or grammatical mistakes, the language model can help correct them. -You can deploy 🐸STT models either via a command-line client or a language binding. 🐸 provides three language bindings and one command line client. There also exist several community-maintained clients and language bindings, which are listed `further down in this README <#third-party-bindings>`_. - -*Note that 🐸STT currently only provides packages for CPU deployment with Python 3.5 or higher on Linux. We're working to get the rest of our usually supported packages back up and running as soon as possible.* +You can deploy 🐸STT models either via a command-line client or a language binding. * :ref:`The Python package + language binding ` +* :ref:`The Node.JS package + language binding ` * :ref:`The command-line client ` * :ref:`The native C API ` -* :ref:`The Node.JS package + language binding ` -* :ref:`The .NET client + language binding ` .. _download-models: Download trained Coqui STT models ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You can find pre-trained models ready for deployment on the 🐸STT `releases page `_. You can also download the latest acoustic model (``.pbmm``) and language model (``.scorer``) from the command line as such: +You can find pre-trained models ready for deployment on the 🐸STT `releases page `_. You can also download the latest acoustic model (``.tflite``) and language model (``.scorer``) from the command line as such: .. code-block:: bash - wget https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.pbmm + wget https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.tflite wget https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.scorer -In every 🐸STT official release, there are several kinds of model files provided. For the acoustic model there are two file extensions: ``.pbmm`` and ``.tflite``. Files ending in ``.pbmm`` are compatible with clients and language bindings built against the standard TensorFlow runtime. ``.pbmm`` files are also compatible with CUDA enabled clients and language bindings. Files ending in ``.tflite``, on the other hand, are only compatible with clients and language bindings built against the `TensorFlow Lite runtime `_. TFLite models are optimized for size and performance on low-power devices. You can find a full list of supported platforms and TensorFlow runtimes at :ref:`supported-platforms-deployment`. - -For language models, there is only only file extension: ``.scorer``. Language models can run on any supported device, regardless of Tensorflow runtime. You can read more about language models with regard to :ref:`the decoding process ` and :ref:`how scorers are generated `. +In every 🐸STT official release, there are different model files provided. The acoustic model uses the ``.tflite`` extension. Language models use the extension ``.scorer``. You can read more about language models with regard to :ref:`the decoding process ` and :ref:`how scorers are generated `. .. _model-data-match: @@ -51,7 +46,7 @@ How well a 🐸STT model transcribes your audio will depend on a lot of things. If you take a 🐸STT model trained on English, and pass Spanish into it, you should expect the model to perform horribly. Imagine you have a friend who only speaks English, and you ask her to make Spanish subtitles for a Spanish film, you wouldn't expect to get good subtitles. This is an extreme example, but it helps to form an intuition for what to expect from 🐸STT models. Imagine that the 🐸STT models are like people who speak a certain language with a certain accent, and then think about what would happen if you asked that person to transcribe your audio. -An acoustic model (i.e. ``.pbmm`` or ``.tflite``) has "learned" how to transcribe a certain language, and the model probably understands some accents better than others. In addition to languages and accents, acoustic models are sensitive to the style of speech, the topic of speech, and the demographics of the person speaking. The language model (``.scorer``) has been trained on text alone. As such, the language model is sensitive to how well the topic and style of speech matches that of the text used in training. The 🐸STT `release notes `_ include detailed information on the data used to train the models. If the data used for training the off-the-shelf models does not align with your intended use case, it may be necessary to adapt or train new models in order to improve transcription on your data. +An acoustic model (i.e. ``.tflite`` file) has "learned" how to transcribe a certain language, and the model probably understands some accents better than others. In addition to languages and accents, acoustic models are sensitive to the style of speech, the topic of speech, and the demographics of the person speaking. The language model (``.scorer``) has been trained on text alone. As such, the language model is sensitive to how well the topic and style of speech matches that of the text used in training. The 🐸STT `release notes `_ include detailed information on the data used to train the models. If the data used for training the off-the-shelf models does not align with your intended use case, it may be necessary to adapt or train new models in order to improve transcription on your data. Training your own language model is often a good way to improve transcription on your audio. The process and tools used to generate a language model are described in :ref:`language-model` and general information can be found in :ref:`decoder-docs`. Generating a scorer from a constrained topic dataset is a quick process and can bring significant accuracy improvements if your audio is from a specific topic. @@ -67,7 +62,7 @@ Model compatibility Using the Python package ^^^^^^^^^^^^^^^^^^^^^^^^ -Pre-built binaries for deploying a trained model can be installed with ``pip``. It is highly recommended that you use Python 3.5 or higher in a virtual environment. Both `pip `_ and `venv `_ are included in normal Python 3 installations. +Pre-built binaries for deploying a trained model can be installed with ``pip``. It is highly recommended that you use Python 3.6 or higher in a virtual environment. Both `pip `_ and `venv `_ are included in normal Python 3 installations. When you create a new Python virtual environment, you create a directory containing a ``python`` binary and everything needed to run 🐸STT. For the purpose of this documentation, we will use on ``$HOME/coqui-stt-venv``, but you can use whatever directory you like. @@ -87,7 +82,7 @@ After your environment has been activated, you can use ``pip`` to install ``stt` .. code-block:: - (coqui-stt-venv)$ python3 -m pip install -U pip && python3 -m pip install stt + (coqui-stt-venv)$ python -m pip install -U pip && python -m pip install stt After installation has finished, you can call ``stt`` from the command-line. @@ -95,7 +90,7 @@ The following command assumes you :ref:`downloaded the pre-trained models ` for an example of how to use the package programatically. @@ -103,45 +98,10 @@ See :ref:`the Python client ` for an example of how to use the p .. code-block:: - (coqui-stt-venv)$ python3 -m pip install -U pip && python3 -m pip install stt-gpu + (coqui-stt-venv)$ python -m pip install -U pip && python -m pip install stt-gpu See the `release notes `_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_. -.. _cli-usage: - -Using the command-line client -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To download the pre-built binaries for the ``stt`` command-line (compiled C++) client, use ``util/taskcluster.py``\ : - -.. code-block:: bash - - python3 util/taskcluster.py --target . - -or if you're on macOS: - -.. code-block:: bash - - python3 util/taskcluster.py --arch osx --target . - -also, if you need some binaries different than current main branch, like ``v0.2.0-alpha.6``\ , you can use ``--branch``\ : - -.. code-block:: bash - - python3 util/taskcluster.py --branch "v0.2.0-alpha.6" --target "." - -The script ``taskcluster.py`` will download ``native_client.tar.xz`` (which includes the ``stt`` binary and associated libraries) and extract it into the current folder. ``taskcluster.py`` will download binaries for Linux/x86_64 by default, but you can override that behavior with the ``--arch`` parameter. See the help info with ``python3 util/taskcluster.py -h`` for more details. Specific branches of 🐸STT or TensorFlow can be specified as well. - -Alternatively you may manually download the ``native_client.tar.xz`` from the `releases page `_. - -Assuming you have :ref:`downloaded the pre-trained models `, you can use the client as such: - -.. code-block:: bash - - ./stt --model coqui-stt-0.9.3-models.pbmm --scorer coqui-stt-0.9.3-models.scorer --audio audio_input.wav - -See the help output with ``./stt -h`` for more details. - .. _nodejs-usage: Using the Node.JS / Electron.JS package @@ -173,6 +133,20 @@ See the `release notes `_ to find whic See the :ref:`TypeScript client ` for an example of how to use the bindings programatically. +.. _cli-usage: + +Using the command-line client +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The pre-built binaries for the ``stt`` command-line (compiled C++) client are available in the ``native_client.tar.xz`` archive for your desired platform. You can download the archive from our `releases page `_. + +Assuming you have :ref:`downloaded the pre-trained models `, you can use the client as such: + +.. code-block:: bash + + ./stt --model coqui-stt-0.9.3-models.tflite --scorer coqui-stt-0.9.3-models.scorer --audio audio_input.wav + +See the help output with ``./stt -h`` for more details. Installing bindings from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -209,11 +183,8 @@ CUDA Dependency The GPU capable builds (Python, NodeJS, C++, etc) depend on CUDA 10.1 and CuDNN v7.6. -.. _cuda-inference-deps: - .. toctree:: :maxdepth: 1 - :caption: Supported Platforms SUPPORTED_PLATFORMS diff --git a/doc/EXPORTING_MODELS.rst b/doc/EXPORTING_MODELS.rst index 8362ac65..f630cbde 100644 --- a/doc/EXPORTING_MODELS.rst +++ b/doc/EXPORTING_MODELS.rst @@ -3,54 +3,17 @@ Exporting a model for deployment ================================ -After you train a STT model, your model will be stored on disk as a :ref:`checkpoint file `. Model checkpoints are useful for resuming training at a later date, but they are not the correct format for deploying a model into production. The best model format for deployment is a protobuf file. +After you train a STT model, your model will be stored on disk as a :ref:`checkpoint file `. Model checkpoints are useful for resuming training at a later date, but they are not the correct format for deploying a model into production. The model format for deployment is a TFLite file. -This document explains how to export model checkpoints as a protobuf file. +This document explains how to export model checkpoints as a TFLite file. How to export a model --------------------- -The simplest way to export STT model checkpoints for deployment is via ``train.py`` and the ``--export_dir`` flag. +You can export STT model checkpoints for deployment by using the export script and the ``--export_dir`` flag. .. code-block:: bash - $ python3 train.py \ - --checkpoint_dir path/to/existing/model/checkpoints \ - --export_dir where/to/export/new/protobuf - -However, you may want to export a model for small devices or for more efficient memory usage. In this case, follow the steps below. - -Exporting as memory-mapped -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -By default, the protobuf exported by ``train.py`` will be loaded in memory every time the model is deployed. This results in extra loading time and memory consumption. Creating a memory-mapped protobuf file will avoid these issues. - -First, export your checkpoints to a protobuf with ``train.py``: - -.. code-block:: bash - - $ python3 train.py \ - --checkpoint_dir path/to/existing/model/checkpoints \ - --export_dir where/to/export/new/protobuf - -Second, convert the protobuf to a memory-mapped protobuf with ``convert_graphdef_memmapped_format``: - -.. code-block:: - - $ convert_graphdef_memmapped_format \ - --in_graph=output_graph.pb \ - --out_graph=output_graph.pbmm - -``convert_graphdef_memmapped_format`` is a dedicated tool to convert regular protobuf files to memory-mapped protobufs. You can find this tool pre-compiled on the STT `release page `_. You should download and decompress ``convert_graphdef_memmapped_format`` before use. Upon a sucessful conversion ``convert_graphdef_memmapped_format`` will report conversion of a non-zero number of nodes. - -Exporting for small devices -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you want to deploy a STT model on a small device, you might consider exporting the model with `Tensorflow Lite `_ support. Export STT model checkpoints for Tensorflow Lite via ``train.py`` and the ``--export_tflite`` flag. - -.. code-block:: bash - - $ python3 train.py \ - --checkpoint_dir path/to/existing/model/checkpoints \ - --export_dir where/to/export/new/protobuf \ - --export_tflite + $ python3 -m coqui_stt_training.export \ + --checkpoint_dir path/to/existing/model/checkpoints \ + --export_dir where/to/export/model diff --git a/doc/LANGUAGE_MODEL.rst b/doc/LANGUAGE_MODEL.rst index fa03d485..e4c6187d 100644 --- a/doc/LANGUAGE_MODEL.rst +++ b/doc/LANGUAGE_MODEL.rst @@ -49,7 +49,7 @@ For more custom use cases, you might familiarize yourself with the `KenLM toolki .. code-block:: bash - python3 generate_lm.py \ + python generate_lm.py \ --input_txt librispeech-lm-norm.txt.gz \ --output_dir . \ --top_k 500000 \ diff --git a/doc/MIXED_PRECISION.rst b/doc/MIXED_PRECISION.rst index c82b29fc..bf15e9b3 100644 --- a/doc/MIXED_PRECISION.rst +++ b/doc/MIXED_PRECISION.rst @@ -5,14 +5,14 @@ Automatic Mixed Precision Training with `automatic mixed precision `_ is available when training STT on an GPU. -Mixed precision training makes use of both ``FP32`` and ``FP16`` precisions where appropriate. ``FP16`` operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput. Mixed precision training often allows larger batch sizes. Automatic mixed precision training can be enabled by including the flag `--automatic_mixed_precision` at training time: +Mixed precision training makes use of both ``FP32`` and ``FP16`` precisions where appropriate. ``FP16`` operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput. Mixed precision training often allows larger batch sizes. Automatic mixed precision training can be enabled by including the flag ``--automatic_mixed_precision true`` at training time: .. code-block:: bash - $ python3 train.py \ + $ python -m coqui_stt_training.train \ --train_files train.csv \ - --dev_files dev.csv \ - --test_files test.csv \ - --automatic_mixed_precision + --dev_files dev.csv \ + --test_files test.csv \ + --automatic_mixed_precision true On a Volta generation V100 GPU, automatic mixed precision can speed up 🐸STT training and evaluation by approximately 30% to 40%. diff --git a/doc/TRAINING_ADVANCED.rst b/doc/TRAINING_ADVANCED.rst index d49ce0ba..1fa3f1f8 100644 --- a/doc/TRAINING_ADVANCED.rst +++ b/doc/TRAINING_ADVANCED.rst @@ -5,15 +5,25 @@ Training: Advanced Topics This document contains more advanced topics with regard to training models with STT. If you'd prefer a lighter introduction, please refer to :ref:`Training: Quickstart`. +.. toctree:: + :maxdepth: 1 -1. :ref:`training-flags` -2. :ref:`transfer-learning` -3. :ref:`automatic-mixed-precision` -4. :ref:`checkpointing` -5. :ref:`common-voice-data` -6. :ref:`training-data-augmentation` -7. :ref:`exporting-checkpoints` -8. :ref:`model-geometry` -9. :ref:`parallel-training-optimization` -10. :ref:`data-importers` -11. :ref:`byte-output-mode` + TRAINING_FLAGS + + TRANSFER_LEARNING + + MIXED_PRECISION + + CHECKPOINTING + + COMMON_VOICE_DATA + + AUGMENTATION + + EXPORTING_MODELS + + Geometry + + PARALLLEL_OPTIMIZATION + + DATASET_IMPORTERS diff --git a/doc/TRAINING_FLAGS.rst b/doc/TRAINING_FLAGS.rst index a0f2b48a..44797fc4 100644 --- a/doc/TRAINING_FLAGS.rst +++ b/doc/TRAINING_FLAGS.rst @@ -3,14 +3,12 @@ Command-line flags for the training scripts =========================================== -Below you can find the definition of all command-line flags supported by the training scripts. This includes ``train.py``, ``evaluate.py``, ``evaluate_tflite.py``, ``transcribe.py`` and ``lm_optimizer.py``. +Below you can find the definition of all command-line flags supported by the training modules. This includes the modules ``coqui_stt_training.train``, ``coqui_stt_training.evaluate``, ``coqui_stt_training.export``, ``coqui_stt_training.training_graph_inference``, and the scripts ``evaluate_tflite.py``, ``transcribe.py`` and ``lm_optimizer.py``. Flags ----- .. literalinclude:: ../training/coqui_stt_training/util/config.py :language: python - :linenos: - :lineno-match: :start-after: sphinx-doc: training_ref_flags_start :end-before: sphinx-doc: training_ref_flags_end diff --git a/doc/TRAINING_INTRO.rst b/doc/TRAINING_INTRO.rst index f0312dfe..b43d7176 100644 --- a/doc/TRAINING_INTRO.rst +++ b/doc/TRAINING_INTRO.rst @@ -41,18 +41,18 @@ If you don't want to use our Dockerfile template, you will need to manually inst Prerequisites ^^^^^^^^^^^^^ -* `Python 3.6 `_ +* `Python 3.6, 3.7 or 3.8 `_ * Mac or Linux environment (training on Windows is *not* currently supported) * CUDA 10.0 and CuDNN v7.6 Download ^^^^^^^^ -We recommened that you clone the STT repo from the latest stable release branch on Github (e.g. ``v0.9.3``). You can find all 🐸STT releases `here `_). +Clone the STT repo from GitHub: .. code-block:: bash - $ git clone --branch v0.9.3 --depth 1 https://github.com/coqui-ai/STT + $ git clone https://github.com/coqui-ai/STT Installation ^^^^^^^^^^^^ @@ -86,23 +86,17 @@ Now that we have cloned the STT repo from Github and setup a virtual environment .. code-block:: bash $ cd STT - $ python3 -m pip install --upgrade pip wheel setuptools - $ python3 -m pip install --upgrade -e . - -The ``webrtcvad`` package may additionally require ``python3-dev``: - -.. code-block:: bash - - $ sudo apt-get install python3-dev + $ python -m pip install --upgrade pip wheel setuptools + $ python -m pip install --upgrade -e . If you have an NVIDIA GPU, it is highly recommended to install TensorFlow with GPU support. Training will be significantly faster than using the CPU. .. code-block:: bash - $ python3 -m pip uninstall tensorflow - $ python3 -m pip install 'tensorflow-gpu==1.15.4' + $ python -m pip uninstall tensorflow + $ python -m pip install 'tensorflow-gpu==1.15.4' -Please ensure you have the required `CUDA dependency `_ and :ref:`prerequisites `. +Please ensure you have the required :ref:`prerequisites ` and a working CUDA installation with the versions listed above. Verify Install """""""""""""" @@ -118,12 +112,12 @@ This script will train a model on a single audio file. If the script exits succe Training on your own Data ------------------------- -Whether you used our Dockerfile template or you set up your own environment, the central STT training script is ``train.py``. For a list of command line options, use the ``--helpfull`` flag: +Whether you used our Dockerfile template or you set up your own environment, the central STT training module is ``python -m coqui_stt_training.train``. For a list of command line options, use the ``--help`` flag: .. code-block:: bash $ cd STT - $ python3 train.py --helpfull + $ python -m coqui_stt_training.train --help Training Data ^^^^^^^^^^^^^ @@ -143,12 +137,18 @@ Text transcripts should be formatted exactly as the transcripts you expect your CSV file format """"""""""""""" -The audio and transcripts used in training are passed to ``train.py`` via CSV files. You should supply CSV files for training (``train.csv``), development (``dev.csv``), and testing (``test.csv``). The CSV files should contain three columns: +The audio and transcripts used in training are specified via CSV files. You should supply CSV files for training (``train.csv``), validation (``dev.csv``), and testing (``test.csv``). The CSV files should contain three columns: 1. ``wav_filename`` - the path to a WAV file on your machine 2. ``wav_filesize`` - the number of bytes in the WAV file 3. ``transcript`` - the text transcript of the WAV file +Alternatively, if you don't have pre-defined splits for training, validation and testing, you can use the ``--auto_input_dataset`` flag to automatically split a single CSV into subsets and generate an alphabet automatically: + +.. code-block:: bash + + $ python -m coqui_stt_training.train --auto_input_dataset samples.csv + Start Training ^^^^^^^^^^^^^^ @@ -157,11 +157,11 @@ After you've successfully installed STT and have access to data, you can start a .. code-block:: bash $ cd STT - $ python3 train.py --train_files train.csv --dev_files dev.csv --test_files test.csv + $ python -m coqui_stt_training.train --train_files train.csv --dev_files dev.csv --test_files test.csv Next Steps ---------- -You will want to customize the settings of ``train.py`` to work better with your data and your hardware. You should review the :ref:`command-line training flags `, and experiment with different settings. +You will want to customize the training settings to work better with your data and your hardware. You should review the :ref:`command-line training flags `, and experiment with different settings. For more in-depth training documentation, you should refer to the :ref:`Advanced Training Topics ` section. diff --git a/doc/TRANSFER_LEARNING.rst b/doc/TRANSFER_LEARNING.rst index fb01145f..2ace6fb5 100644 --- a/doc/TRANSFER_LEARNING.rst +++ b/doc/TRANSFER_LEARNING.rst @@ -14,17 +14,17 @@ If your own data uses the *extact* same alphabet as the English release model (i Fine-Tuning (same alphabet) --------------------------- -You can fine-tune pre-trained model checkpoints by using the ``--checkpoint_dir`` flag in ``train.py``. Specify the path to the checkpoints, and training will resume from the pre-trained model. +You can fine-tune pre-trained model checkpoints by using the ``--checkpoint_dir`` flag. Specify the path to the checkpoints, and training will resume from the pre-trained model. For example, if you want to fine tune existing checkpoints to your own data in ``my-train.csv``, ``my-dev.csv``, and ``my-test.csv``, you can do the following: .. code-block:: bash - $ python3 train.py \ - --checkpoint_dir path/to/checkpoint/folder \ - --train_files my-train.csv \ - --dev_files my-dev.csv \ - --test_files my_test.csv + $ python -m coqui_stt_training.train \ + --checkpoint_dir path/to/checkpoint/folder \ + --train_files my-train.csv \ + --dev_files my-dev.csv \ + --test_files my_test.csv Transfer-Learning (new alphabet) -------------------------------- @@ -39,12 +39,12 @@ You need to specify the location of the pre-trained model with ``--load_checkpoi .. code-block:: bash - python3 train.py \ + python -m coqui_stt_training.train \ --drop_source_layers 1 \ --alphabet_config_path my-alphabet.txt \ --save_checkpoint_dir path/to/output-checkpoint/folder \ --load_checkpoint_dir path/to/input-checkpoint/folder \ - --train_files my-new-language-train.csv \ + --train_files my-new-language-train.csv \ --dev_files my-new-language-dev.csv \ --test_files my-new-language-test.csv diff --git a/doc/conf.py b/doc/conf.py index 594c81ea..9c668c74 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -136,10 +136,14 @@ add_module_names = False # html_theme = "furo" +html_css_files = [ + "custom.css", +] + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [".static"] +html_static_path = ["static"] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/doc/index.rst b/doc/index.rst index 9d8d1b23..7fbfe0a3 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -35,8 +35,8 @@ The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python $ source venv-stt/bin/activate # Install 🐸STT - $ python3 -m pip install -U pip - $ python3 -m pip install stt + $ python -m pip install -U pip + $ python -m pip install stt # Download 🐸's pre-trained English models $ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.pbmm diff --git a/doc/static/custom.css b/doc/static/custom.css new file mode 100644 index 00000000..a961604c --- /dev/null +++ b/doc/static/custom.css @@ -0,0 +1,3 @@ +#flags pre { + white-space: pre-wrap; +} diff --git a/train.py b/train.py index 7477ed7f..528af395 100755 --- a/train.py +++ b/train.py @@ -3,6 +3,10 @@ from __future__ import absolute_import, division, print_function if __name__ == "__main__": + print( + "Using the top level train.py script is deprecated and will be removed " + "in a future release. Instead use: python -m coqui_stt_training.train" + ) try: from coqui_stt_training import train as stt_train except ImportError: diff --git a/training/coqui_stt_training/export.py b/training/coqui_stt_training/export.py index 22c31ad6..b1fedbbc 100644 --- a/training/coqui_stt_training/export.py +++ b/training/coqui_stt_training/export.py @@ -191,7 +191,7 @@ def package_zip(): log_info("Exported packaged model {}".format(archive)) -def main(_): +def main(): initialize_globals_from_cli() if not Config.export_dir: diff --git a/training/coqui_stt_training/train.py b/training/coqui_stt_training/train.py index 38417d1d..98f9e407 100644 --- a/training/coqui_stt_training/train.py +++ b/training/coqui_stt_training/train.py @@ -662,20 +662,24 @@ def main(): def deprecated_msg(prefix): return ( - f"{prefix} Using the training script as a generic driver for all training " + f"{prefix} Using the training module as a generic driver for all training " "related functionality is deprecated and will be removed soon. Use " - "the specific scripts: train.py/evaluate.py/export.py/training_graph_inference.py." + "the specific modules: \n" + " python -m coqui_stt_training.train\n" + " python -m coqui_stt_training.evaluate\n" + " python -m coqui_stt_training.export\n" + " python -m coqui_stt_training.training_graph_inference" ) if Config.train_files: train() else: - log_warn(deprecated_msg("Calling training script without --train_files.")) + log_warn(deprecated_msg("Calling training module without --train_files.")) if Config.test_files: log_warn( deprecated_msg( - "Specifying --test_files when calling train.py script. Use evaluate.py." + "Specifying --test_files when calling train module. Use python -m coqui_stt_training.evaluate" ) ) evaluate.test() @@ -683,7 +687,7 @@ def main(): if Config.export_dir: log_warn( deprecated_msg( - "Specifying --export_dir when calling train.py script. Use export.py." + "Specifying --export_dir when calling train module. Use python -m coqui_stt_training.export" ) ) export.export() @@ -691,7 +695,7 @@ def main(): if Config.one_shot_infer: log_warn( deprecated_msg( - "Specifying --one_shot_infer when calling train.py script. Use training_graph_inference.py." + "Specifying --one_shot_infer when calling train module. Use python -m coqui_stt_training.training_graph_inference" ) ) traning_graph_inference.do_single_file_inference(Config.one_shot_infer) diff --git a/training/coqui_stt_training/util/config.py b/training/coqui_stt_training/util/config.py index 1352e002..85493d4b 100644 --- a/training/coqui_stt_training/util/config.py +++ b/training/coqui_stt_training/util/config.py @@ -201,10 +201,10 @@ class _SttConfig(Coqpit): self.alphabet = alphabet else: raise RuntimeError( - "Missing --alphabet_config_path flag. Couldn't find an alphabet file\n" - "alongside checkpoint, and input datasets are not fully specified\n" - "(--train_files, --dev_files, --test_files), so can't generate an alphabet.\n" - "Either specify an alphabet file or fully specify the dataset, so one will\n" + "Missing --alphabet_config_path flag. Couldn't find an alphabet file " + "alongside checkpoint, and input datasets are not fully specified " + "(--train_files, --dev_files, --test_files), so can't generate an alphabet. " + "Either specify an alphabet file or fully specify the dataset, so one will " "be generated automatically." ) From d85187aa445a55111adc664310ac656751534095 Mon Sep 17 00:00:00 2001 From: Jeremiah Rose Date: Tue, 7 Sep 2021 09:45:31 +1000 Subject: [PATCH 02/11] Use local source instead of redownloading in Dockerfile.build --- Dockerfile.build | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Dockerfile.build b/Dockerfile.build index e7d2e6b5..426d3e11 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -3,9 +3,6 @@ # Need devel version cause we need /usr/include/cudnn.h FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 -ARG STT_REPO=https://github.com/coqui-ai/STT.git -ARG STT_SHA=origin/main - # >> START Install base software # Get basic packages @@ -112,12 +109,7 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ # << END Configure Bazel WORKDIR / - -RUN git clone --recursive $STT_REPO STT -WORKDIR /STT -RUN git checkout $STT_SHA -RUN git submodule sync tensorflow/ -RUN git submodule update --init tensorflow/ +COPY . /STT/ # >> START Build and bind From 4d7922d111ff47ff6da2ec0dc17f2e933f427ccf Mon Sep 17 00:00:00 2001 From: Jeremiah Rose Date: Mon, 6 Sep 2021 11:21:11 +1000 Subject: [PATCH 03/11] Initial try --- native_client/coqui-stt.h | 14 ++++++++++++++ native_client/python/__init__.py | 16 ++++++++++++++++ native_client/stt.cc | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/native_client/coqui-stt.h b/native_client/coqui-stt.h index 7794bc79..decd9444 100644 --- a/native_client/coqui-stt.h +++ b/native_client/coqui-stt.h @@ -326,6 +326,20 @@ Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx, * @note This method will free the state pointer (@p aSctx). */ STT_EXPORT +char* STT_IntermediateDecodeExpensive(const StreamingState* aSctx); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference, + * + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. + * Returns NULL on error. + */ +STT_EXPORT char* STT_FinishStream(StreamingState* aSctx); /** diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index e1d37a70..ee5685bd 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -281,6 +281,22 @@ class Stream(object): ) return stt.impl.IntermediateDecodeWithMetadata(self._impl, num_results) + def intermediateDecodeExpensive(self): + """ + Compute the intermediate decoding of an ongoing streaming inference, flushing buffers. + This ensures that all data that has been streamed so far are included in the result. + + :return: The STT intermediate result. + :type: str + + :throws: RuntimeError if the stream object is not valid + """ + if not self._impl: + raise RuntimeError( + "Stream object is not valid. Trying to decode an already finished stream?" + ) + return stt.impl.IntermediateDecodeExpensive(self._impl) + def finishStream(self): """ Compute the final decoding of an ongoing streaming inference and return diff --git a/native_client/stt.cc b/native_client/stt.cc index 28715ec5..0ea5bb02 100644 --- a/native_client/stt.cc +++ b/native_client/stt.cc @@ -79,6 +79,8 @@ struct StreamingState { void feedAudioContent(const short* buffer, unsigned int buffer_size); char* intermediateDecode() const; Metadata* intermediateDecodeWithMetadata(unsigned int num_results) const; + char* intermediateDecodeExpensive(); + void flushBuffer(); void finalizeStream(); char* finishStream(); Metadata* finishStreamWithMetadata(unsigned int num_results); @@ -143,6 +145,13 @@ StreamingState::intermediateDecodeWithMetadata(unsigned int num_results) const return model_->decode_metadata(decoder_state_, num_results); } +char* +StreamingState::intermediateDecodeExpensive() +{ + flushBuffer(); + return model_->decode(decoder_state_); +} + char* StreamingState::finishStream() { @@ -167,6 +176,23 @@ StreamingState::processAudioWindow(const vector& buf) pushMfccBuffer(mfcc); } +void +StreamingState::flushBuffer() +{ + // Flush audio buffer + processAudioWindow(audio_buffer_); + + // Add empty mfcc vectors at end of sample + //for (int i = 0; i < model_->n_context_; ++i) { + // addZeroMfccWindow(); + //} + + // Process final batch + if (batch_buffer_.size() > 0) { + processBatch(batch_buffer_, batch_buffer_.size()/model_->mfcc_feats_per_timestep_); + } +} + void StreamingState::finalizeStream() { @@ -465,6 +491,12 @@ STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx, return aSctx->intermediateDecodeWithMetadata(aNumResults); } +char* +STT_IntermediateDecodeExpensive(StreamingState* aSctx) +{ + return aSctx->intermediateDecodeExpensive(); +} + char* STT_FinishStream(StreamingState* aSctx) { From 9335842b615565968490569fd30b669fc8ef4809 Mon Sep 17 00:00:00 2001 From: Jeremiah Rose Date: Tue, 7 Sep 2021 14:30:44 +1000 Subject: [PATCH 04/11] Fix Python bindings --- native_client/coqui-stt.h | 2 +- native_client/python/impl.i | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/native_client/coqui-stt.h b/native_client/coqui-stt.h index decd9444..a08e3ab4 100644 --- a/native_client/coqui-stt.h +++ b/native_client/coqui-stt.h @@ -326,7 +326,7 @@ Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx, * @note This method will free the state pointer (@p aSctx). */ STT_EXPORT -char* STT_IntermediateDecodeExpensive(const StreamingState* aSctx); +char* STT_IntermediateDecodeExpensive(StreamingState* aSctx); /** * @brief Compute the intermediate decoding of an ongoing streaming inference, diff --git a/native_client/python/impl.i b/native_client/python/impl.i index b9405238..142d2e8d 100644 --- a/native_client/python/impl.i +++ b/native_client/python/impl.i @@ -119,6 +119,7 @@ static PyObject *parent_reference() { %newobject STT_SpeechToText; %newobject STT_IntermediateDecode; +%newobject STT_IntermediateDecodeExpensive; %newobject STT_FinishStream; %newobject STT_Version; %newobject STT_ErrorCodeToErrorMessage; From 083a9e1eccae032fc98cc6a8226bbf4a13cb0319 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 7 Sep 2021 18:28:48 +0200 Subject: [PATCH 05/11] Add logo and wordmark to docs [skip ci] --- doc/conf.py | 6 ++++++ doc/index.rst | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 9c668c74..27fa54e2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -136,6 +136,12 @@ add_module_names = False # html_theme = "furo" +html_theme_options = { + "light_logo": "coqui-STT-circle.png", + "dark_logo": "coqui-STT-circle.png", + "sidebar_hide_name": True, +} + html_css_files = [ "custom.css", ] diff --git a/doc/index.rst b/doc/index.rst index 7fbfe0a3..0cfab4fc 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -3,8 +3,9 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Coqui STT -========= +.. image:: https://raw.githubusercontent.com/coqui-ai/STT/main/images/coqui-STT-logo-green.png + :alt: Coqui STT logo and wordmark + :height: 155 **Coqui STT** (🐸STT) is an open-source deep-learning toolkit for training and deploying speech-to-text models. From a51cc78a3bf32ab2fe16c3f03c572fcbd6df5eb8 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 7 Sep 2021 18:30:48 +0200 Subject: [PATCH 06/11] git add missing logo image [skip ci] --- doc/static/coqui-STT-circle.png | Bin 0 -> 38798 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 doc/static/coqui-STT-circle.png diff --git a/doc/static/coqui-STT-circle.png b/doc/static/coqui-STT-circle.png new file mode 100644 index 0000000000000000000000000000000000000000..158bd9bed13825bd98a703638e003d2ab49acef0 GIT binary patch literal 38798 zcmeFZWmH_ee^MApMB0%i#1!ts;S7LAQB=1000zuIVlYQ03z@A3mz8Sf|)IR z2LMpj`)TQUXqb7EySO=9**bv8J$zk2tV3TZU5c80W-_NngZ*{HSMp3p`E;M zqQRq&4kEoX=c+{}+pF8bXF*TE3+^lKprEMfN#ix0ep^KKe_W;Kl7;BRzf?xD(1`?OiM)Gb@}~Zn_QrJYZ6#%KKPTbkKzN z`f#q*vNgRy8|6|eJ#&aYWDTJnJT>x>_cBnWXV(1w5-a|fJvIKmx7U?u!{qaE2HEy1 zS z%MX5(^pPC|jv=uJ#hp7wCQWXCzWo*GA(Bh!hsUxVv|BTAy9w#kmFckFY=0PN^j^;# zkyfiOs@WwyXvrK!xc-QwPyV5q$nL~-EQ%BFEa8dkNiV+l@y5FwltASCHREGvHa&K} z$g^s|)1|~M!~RsjqZ#884t4~SWZ}HEairu)(3pAvOaRWO@*w_5&9DEt9I$X2^CN*vm6=D!Z;BA8MJ+1U-xjj{ugbjJZa*A5 zwl9b5zlk_KkBtRy+TWbEUkFv;Es+_2ZDVbrwA6uG07ze)#geW825E zZjFg-^E&y9n#`}1pI>3B5@w^~*2ww`N$~i89KQJuDbR>@Lg_7XYRuFTcttAQzPPuU zSL7+u)7J2F_W_depBRi|(^&%uBy<$Eg)L0W$CF(<|Y4!H$I>JAduV z8+I(GQ|m1(g>!1Byc$)V_KkNQAB#pA5Ry`!1eTVKEK)}W+d21G+!v6lgRjw$<|)y#7jV>M znm)eA&tU_ekH}aa*OslDC)8ECK3o$8n{XXq`*pn-jhl~+O2*2VX3sK%GPLS-c_`9y z)uRFgBLk3iFr%oxBCwU>tYJNFtZE1N5V?x_n$gre^9P3KN*#>AQdz08C42D}w(7`V zRMvX>8UK14WSwsI2I*^Rf$u2SZ6f8@u&y17h`@Gj4Iyi8!T3qtv8nG3yXn&gu?fvT zR>Cbk*6mcN<_bM$&S-VXN2@ccIzar5VGV24n@&yE(-yAF-KB^&BtL%YO_-%-UX*@Y z7q;$DfsT~N1a-lEbqtmrRTW%clafdX(jdgm_q^;0!myGd+&DIxlPEzqD-2CS^mI)X z?`TJVpvyDuwO%Z6;C4x<%5fO7xj93Y(MDceBA`!zbUiI_$waR47Hg}c=gZB@_u zu!pc^#n&v~=!Mdh0f(!iK(8wl!!uc_C8TcM5tH-sVquOU@lB$e!BIR~Vv~>kg-Acd zbFVG@;M2a$``0tw+hw#9^pO^mqm>v?7?){o3N0u1#W=1P22JQ5 zhDT?c84wY={p4o9nq>H-{K>%|bXo{;3Rtm@I@8o_eyw8-+g?nNHc`Mv82g&$X;Fw| z+68~A^`+7+K}v~8R*BIO)=3w#5JGd%3W7=9#4gTX9&tkl2eG(j=ksX}<3#s4fL0!a zuBf+vt6w3W&v5X$h~Wey0IQiQ=*c>w%R~Z&iIWX;aWDaZN(G64gKoBA4%@t|_Jt!Q zRQsba_ArE2fLn*!?K72y;C4nsr=VU(X*b}8NJT2{-a zPZDv&o_WWr2df0MSXpWq)rby^w^N1!^6`4W00x{J!iy&aYLl}{p5(Fm@GF~$RPA8+ ziqAvn3JTS0F85zsygOk9Qb^m7{na^W)ZNS)#E|S+I8mE5KS-LR3}HyJ$I2KZ;h>NC z443=x@N3Bj=nNV{Ic0r}*3D_a8$}ov2L%-DR6N5rb5-nHz(ARi6(!k<8(0C7mbqk9iKha#3mtd!TqdjXhwq8@zWVR-xdp zhPjYgLG9^eopV>{#P8OQ37|OnfrEV`7DF-tcg)ZIelJ_z=sQ8g@QM`GmR3kH7ES79 zh@9~W8nV81Vnh_*yyW9YQY9G#3J>qGd+R}ELoeeKJSEbeY)>Xc`SBGxaq$7J7#n@8 zcheYjk&Six^cI1iv)+B3&_~tvOr@_@MTB7efsNWs!mB-(Nm4@wG23LuEOH5zeHK%l zqBW>?+Fb)~?lVfPFFzXTYf>1C;TF8Z13-KFPp;J*|%BvML z^;>}Rv@dqDMpn*!`jjZqxq?oJ`MtkSb z;k@wj!VpmP1YIB9EapN!mkY^39wDg-$)#?Cu*W&1UBhUcl^;#0&r-P!RJD9VB9%>` z?tLL+MXv`@Eo=-8Nr;#sK=ncqeAR#rO$@_g#0D44+^kaDSog+lL^&z}KhDSH4V_#M z+8YM9_>WZNWT|n+BpGBoN-%*A#w5b@5(~Ox1Y$o*tIKA(aUOdb$oL!OS0@#ig zv%a?IggH{V`p{jK0wvcBaH3#-d94`_BenP8V!}O1@+i|YAIZxOiA|d0C-Q|ZW0A;D zVJ0K4r^TuYT-ue>5C4)QB>fCm-w-wiLHbr&OI@s!U~=-3Lu`7sH$7B9o+f;e;)^)J z5Zw_fLzw4o-Il|ou@O_B40L*NveV-SWNj&l{IYM11a9WLa}%5wg&^sDTQZ5K{vphUryou!oq9A+ICTFW(w6v-lvQ$5X=<9?%{OK(w%E^Jd&lueKO$<{fUe| zMQH_~c*>x%E=II9iqxT+-6WTq?SNWZ@R!&3XKEI(8$(opc1VHyiAp1}J41DxVuisi z6H33?_ zO$P$0X-z^p|Cbq0$|{Yb=HpZng&A75gR32WEPgpZF)p5VG!t)$0?DruE}VLnaC<~L z4SSzRG@>bn-3tgz``*n^O<=1Z0(3{d;U_^_bJjNOjU<~pHdR?r#_gu_Q*V$TN1#R& z%@#jJzZINO3lRu5CQ?v4E1Oa z@Y)y0FB$;x-5NXEQ(Q62aEmKs)Q@s7nTUCk%7#}2Ifq5UYZSYfOmd|PFnOPUG|!I4 zV@6Fkl_}(7YB({7&Kh!R`>?t(@#GMA!{Cd0lE4FC=WXfnJ-FAkvfvz?qV~~E_(|3U zOvH``N4&y(T+dZfLirlR8}hI%gbL{n*~^sH)w^-qHz+D*FHf+tDD1wt9Lwb=u=slg zX;nTcfcpB~>Tn!c+1bLq7G#<+F3Rk%5Emq+2Pg2Dh8BJv;laXR4jMsawh1gCIR$73 z%H1>JD%Y2+@E}5w3OSLX+Q5xSkt3b4(%Fi+h?&k^-D@Bd?PpjgiP6n&P$SaG9`Qk$ zX^x9n@WU4b;yD~0p0_OE0BAzB-Qy$u|*Kmil zzB~3u7zqG#3c+6tom;aochuoRF)jdea#m z4NB7S-9T9dq5^Wj#a?>p8tSOHP4Y5jwUIYoPNj~DE}Mp+05GWf>(mHc@)Dx_r#d-} z2*TGr&egCm>Ln@p5V`Uru?{spfp|SLajPqxgVa6@w(!z)cUsb~A5WEsjf3Jd?68;V zA-xb$BI}a+R|ew;*4!$=PC2o3Y=vuy<2RAWyT$bMOoBG|1l8c7@;XZR65a~~n5>s_c(oPc@+ zxh3<#(OGP7{Opp#Mr{@`X+aiap|ox-0SD==Yvc&da4MiS?9^ zuTM<(8i}bDC|hGIzFX^{_KxjY{$4JT#tcfRXjm7-a4N=YEPbdiQ|=fEN2>p zwEc&)V?!5JJA5^=E)*8LLXV_yxT;PH2i{g?B1G!iXTJjC zC8TB5V-K;#5(>v^lXq@RZ1mioh*#9Mhe)~Q8e#CRw3SjLxgS~H5ax=EFtk&*D|g2Z zuOgQ3NXsXabUns3$w|(F)C zNn97ZnJf_FB@NNSLsPrR0^#ZDZ2j9!B9^4v9ltBE?3Q|Iz7N7N^~NF%7nO$@k0s0d zIh0D8h?ZDb_9GbM>4EhTh)7a20b8+UTpVhEg24gxseC7T=KaaJfBBh7Zk3ezTFgUE z-E2TZK*}7m_Ym|-O*KwZo-u-3D?~nX-67qt^3em@88u5Xg;+9l9vGik**zMTU*9os z_xFe%~;f0(Gc>rth zy;w?5J$x_hkWJaR&AXl)1p@9@ce-WmIQfEHHBm@6F*O#pcqwyJW)KhPj)3F4Xl!mr zpd>Zb$UXT_ZcjdqQU%u38hbVIrm~aA8?Wk;IGk^JF-{k9B(FxoyZPHOi9F?SmL{JA z=_eO0C~KDP2>n(Tst|%CbW7@Ec2T79H#5{Yu#p@f9)z^}?hFJeimifQnw%7P-R&?U z=$;V8PFDaW{5TgJ(*G5 zapPFAbw9vXQ$Lp6%#PM3MvO`*0Bg^yf_^v9%HTtK*$UK17+1vAP3ZlsRt5ivnA`w^}+^0b7cvbga@M45A^dwzdS_aEQEE zyaT=LDRi6IKG3)X8W})RS>w7)Q%p&_z}TS_YnG5qXAYCG9*v2s?`q^sO`XuAjwGJd zf_~c~8i=%wDab$!Ba{Rc$y4SPQ*3-? z?Td)8n0_?~eG5ELi&=mz{rNpo-gP0U|IIsk55qk3iQ{7omI=sYb1lwU&SK17NEuCo zt#G{Jp+zr9*R?1ae>GXLN_8;C#M9bC^ikIKm zOIRmWcdY^GZft)2Du=*Kh-;VzWqJ)VVm)+1n1-NKX7g^{I<}`xyfn0gED$r{BPtnd zrHXMPwe;Nxz~F?*@PQl=K96};U#dHjCB!uY#wUa9dwgN4a%{sTYI*IBuYN2S?^9$l zMzlIGbJhAg>TBEB=E~Jw&M1NV%>H(0{MqdS^BLFT}|vt2NilvE5b=J z9{|QS>x^DAG;Zoq~I!#6I$vY)%o)4|w z$fqU@1Zhv7VLY>nS2%&iQPhD20nTVy3tSm#Yk z=56jyOg&!4Ll~3wo$B%V3)O1=5h8g}J`G2nZ6kcEeC&9TX48{hRI8knUv#oO z|NDx_>)Cl5YQwL_kArh&nABvS^>{>ga4SjGUjHfgG^3hwC)j zzkL%*6+b+>9&^tejYor#OJgeqAYFiEale<&^Um6Fui2~Uap**5DM9p3*;qtnf zji*&RshIO>Y$hMk3Mm8F>=jZMo)qZW8_x48!7 zo;XD=1mf$V_?!SwL1~aL9YvN;0Pua)1qObB-cWD!j!YNfV*? z`xT^!$t<)+ElIy3#G9&cO>DDLy8L>1;2V3W7wFmYR>v-xUS~!Idk8@c)niDd7Te6q zl$+sS3h+w%feY~H=MnM?2Bump`>l{I7nGkpS9!x2Xbd~#nNjk z1CI;l(XI>BA=nPpKYXJ3w|p^%Sxqz49eHTFlut#< z_WPParsXeZK0t zDrqGd<>WM`q|QLp6`A*DOp@@aD(ZPoNOp0z?I}(R{WP`xDT7c)X)5_9eqi(p#))~bmbvVwjN|Uo2s7)>f#Wb#+-Zss@^!XJ zRB)T$K%)Iw`8!tW>$C9aC?n@C9NP5!DT=Qd>U=0pCm)S4MIaYo=CRo5-{#*e;oj3o ze8ju-EGN*5n|!UqkjLq=yB@eA*E?cg$gREg!Ug{H`CI{O5>um3jy-VAv@WRn^&QL5NwcxCTPDHZ zKLx=K;&h#_0CLmNnk=9a<1BCruu;(YS7@CO!`kU;gny^aBeW{L=Dc ztaBX20-6L?>gYSkLc77J2FLq09+{X2Ot^rZT*B zuVsp2@1MYq7k)o=t{0^bO^t0NuV300y`rWQtE-@hUWrLca3K{;WM!GYHO4CbaPuca$2c10t059!zg6RO9KnND!6 zR3PbQnV*}qz=T|y_SR4~aR>3B!0ZN2YHe+fd<+{kxIhvZlO{mZG?J2!3iR}cOA(+l zws4N{reMTSr9;J4-LQGLM*_Wf*1zm^W->|~L}4Vr2_f8Y@W_Q#-1BLDBpR3hG~ktQ zy8KiwBGw9u0{)OdTN)&F%;WhXXqp1y!4p+}Ua$Ly()WDSaI)_Kj(c{DQfZyEVLY+$?kDSGMUPL;XFoDxcK(d8iws&c;!fUT$3^S2bVH_h3v4!{ifq{)ep_M1-!Y6!v$V-<<<95k}6(TB`sk20}>FwliO=M+ZMkh zT7@x*D=SIZR7Vx;(S5m3*d}kKTJ@koy`v|ip*<*eA8AQBrD!OAg(zj z@+CaE7FSg6F_%1+)o@INE*z`pm>{-NF;jjw8f4nsL+nj$)LBFb4gDiPQrxfA+uTpA zoeIa(+;7lv7e(sgu#=10q}$KkHe$X;y{Cv{R=rxHvC`W|dxz8KskpDOy4+cy_1k<1 z_&-9_uIH7^75Au4X~5#E!;53u$7)gG1m)DJneVQ8Njw*)KYsH4{8m7E`0SEfJvDhh zGZfZu52*GAq`#Paw*C}leyzy_-o9k@U+!D7b@8zHy>Cf4P8>Ca4IcWuXOI>kX%MI~ z?|1R-vdGgSSmZ*h)&O$xBN9$4)75p8UWRVYwkO;t`X11!)7~6Y{;}5&?a5 z9M^Omrm)B_IyQ?R_rI{(d`T|vzq|9p`>MZn z_(E(}VZ$*%i#fx&*&M?gW6jH4a4q5*p-mNem+v!JjD#ubX5@9B;J#`WYAy5j4UIT4 z1@0$drdzf;e1=P8S1on!NxUl-#{3Sxx^~zf8)^M)S_?X@Z>sp?s4|l!2!WPdYSJjU z=I`vvv&UzHHiks^?&kMXC$wQG#~_c@DENaeMDI2m^m7Z{(j;7q&qv;ij)v^8%sQ>z z+_pG&r~+O&T`D}Ef8ny-%KNp|NhgJTbllndP&eT}aDdA8RIV7cFBg<6Q3KsmG{=h* zQetRoI(6F6mNXAXE^EZR4S zddf-y7S4_=W|q$8AQm4-7qB$|03lHy7c&cckO#Rr$lBIP76b9;YE3+xP zNP=u^<^0@0ntm!;7Jl{?{FXpb5kw&$0Wg6h$is}>$I-#bUBE{e_=i^j{Qh?{E0FvT z#KT?~sHdz(F6rzBBIjb^Vqs&J_ObQi0E!@z3%OZZ31~>k{DT5~Bn-6i@Nf}eW%c&< zX7T1^adxw2W#{MTXJzAH<=|ijBbeQNojlBZn4R1ye^dO$Aq8@`a0C0<*4c^tH>a7o zv!{nJ5C}d`{txosZkP4}H~u5R@8du8?jDw`^56q5aDTu6Rt`2cK4vx!W)6PVzsG~m zDl7jp+R6PNDuVT7^)Yi{WoKbyb#(ks5$+z+UjNGXUq-lVfj1DcYJl9GJ>4uo(q14Z z56ZtMb#d@?|9ehPchK*qKXp4;TCswQ`jh$ZF|zW?;5**G$!Kls=<+AxH~Q~LON)Qv zTs++z{$MOESV0aTM{ptTU}pCJg!izu`qu#cr~dpt^S>1WPWMm#|AhVzz5dklM^^zU zXA95YN#&)4fxqh&uynStwG{aCmW$7fpU={YjhTy`mlOPlhl81)pU;e$lf&GSkC)%d z%95SqZ&dP5?jB}N7NFl$U~(2)Fb^*`I}bmP1uwHD2fH~lmnAzpGd~-bB{LU49~-X) z7aKdDCD-35RNZXBGr`Q^?^XS#vIJAHgZL~tEV#Lttt@%LCGqg`GMn@9@iBAoS#h(0 zlkoC#S^lB=Jq`lmYVyKB4i>h5wWv9md008SIfAE`t&^p*xBI_NY1ul0G(F6I%f`;b z&c(;e#=*tM#mmXY#q%$a4#>?NZ1dl&>})I?fA!qbLO=%0X$H2Nt)rPWh}Ff(`cKI3 zfe`?A2Q01G@7V!n{?iWbjew*Z$jrmpP0QKYK^XYETJqnNe%-cJADc>V?bCyTh7hqtqvy{en4xgE&DAlJW+{#ue8Z2w3_PX1>Z5HPd& zOMZ7VFVJ890PFbIDGM7jCu06fm%$@OQP$?5*#Kl(q0_$S`m2K3up zY#i*&Y@Ez&Tw3fr0&H9YY<$4KMw?uS_4f+Kpp!1f*5nIwgflAdB`hE!yQ0Dqd~({dW?|(0OSC9DRC{IwUb`& zfH%51ov-;RF!-Q25u@#eV%PVC^Pz93v?#)2@A-ZqFHeTgn$%* zE8&-)ywXyzl6cxVvf0YKew@1a_<6%WDA*={{Ay$FZ9NBJ@5~`*^8OaVRL||k^~UX% z`&Jox1f8rs3DiC$zZg$4f+gU)7-8~WFf~N1m=OA5aMv>{?G6B041%y3fOP>u`dT;t zdJI?#(J5YiwSy)M@!P>vLs|eVD%3dfYe;aFQ9(;!k5j>ALu22Yd<~T_B~C`OOb9Dd zhtq;Q5F>EG;5!d4&{S9)Pr;Sq31j;y;R@gkB^dB9t$o!#nNcY%kl8_YS$h>g$Oj}c zRw&t0gpJFf58^5Eu#siMeZK6sns;YZ?8GEaUcuY|Ozl?SE}YTd7MwPH%<>5}DPHQ={o2)L-;x&Ry>q;TtHKqBXC-u2rPhrv5%p zVX&WyP!FT?Xo5t|fH4HIqx@>$t#Odk1xq zu3?uKAqnn)O@n)A5+oT!rlM4Y_&3KL?*ZowjYUjUa4w#<>?A0%va0$%a%5`90p}1C z#rKr07^Nm`EIT4QSU;6Iu_ID;PH03rw*XiAwe@EC*oJ9q=H z^c5^}d{R7vM;V{ZcOYIF#eqO6y3>J~&?3f)trQoqP;-AuSESS!>t zU?Qd@pm(Kq!Q63i@XFvn3n4ve(f>+~kg%kbO0*P#e}WMPSwd6f`e59VRV-ZphO@Qi ze3k?fn7x%dbe-@3C%jOwg1R8^Q<#Jw)Z4S&8B?fX{)6*K)kh4-EHwKZ#+G?6y3?Po z;ZKbXw!sAzCxOq(tDAxpGz5U`f^tVzp{DJR5H1r`kVUusmcJCm;gxVMBzK|}YIuJ4 z7haX?7Rm&GE6)W@L3IO_L|+j^Q34kAS<#C7LicAqH6^^n-4n)LpT@B=LgctirvXRT zfXSl?OKc4J8!Y1;(U8~V3KVdykEOnVJ0jCWDM@tL49bCi#)=>oFwtAk1QTSR+)OS< z7&$UYKg5eNeY=SSMJs_~s`^0I6RUoZK6+HN!H)mHZMufRAVq7e*IYdmDTGs|4g6rt z1;>SvKD(ABRzs>Ql*vthDitt{t@It_Hbh@p`ql42nFHpw59o$~9|(!Z(D6WR>nJ#8 zsl_sdFGbrQSD#tCWH1zg0Xe1`i0Qfz&nq zPMuiG?^Yb^kYII%XC-irv6d72chuKeU~c@yrvcUiLa+ifU!>Yd?+i`D!Qj#`<^fU0 ziub>nQ-HZM_tI&sbLbJ(rC(U{U(t;KKkyXIUy0zdNE=s*fB1)u{~bwFIj%;6(C(Y*UOaJM5ME*yh1=YRWTC6;EUXzEEcHi{g6c|i3^UnK*aZ(f&6^SSiC z^EXy}XE|s$KNHNr5^q&v!bO%2E`lva=p1hvA|)Zl3g<$32bP}|S{xtJ7_P4-eT}}A zpu2Q+XU0b7(>*!-Ao^9M=+G*#mB7DrbsukvgC7kHg}u3pY}2>o^pbMVGvSIae?|&p zdwX(=Wc^v22;aSR&S3itZwk`(HjMl&0PcCvSH%)*v-F;Z&19{c9G-HV!VebZEx|6n zg&bFwb3v>Pn9=qdcsts87FAm;aLxhp1;bYcY<$ZWvbzU8d@l4~t(R!dFSs1XO6U>e zmS`94He|RYi>;TcD^GZqqzUBkDN}g^x@Eq5)(exPCSl*! zM8WD)=fL)9a3&)mG$G7{5y3JiE`ufzq!bspNFEWn8M=0%2?wAd*X2Mz<&QbAMj!4G zE_mnO0Sw=Pc(&2OE$A>$qQSv5Im*yh+$NRQVj*mZHg=A?N+U3ng!ZR4Vp>x3{7<| zD8(VsB&Vd(gc2DANYno!Nm1U9`KCF57V2Z0mlAE8o38byxUVi_^Dxo1x(hLS9cPRE zJyU9XxHgPtP9bOE(*l#*7tS#)VpeW$jV-mI?;qT@7zE|hu1elsIH_bCv$kQjb~;@V zbL^$kO_L?9A7si9c6k7bbiotNlJg4ePS#w6FD#UBpIU7koXt!o*k62{L`kD1%HU3~ zNyMTK_CiOEKevT^YF?lLu{pB#!Q;wYd5&%50*o#pn#5LvNOFOhIESR7sYt+;ux z&g2V=by!HLFvVGDb_G6gd{fC~Nf2oDf$))|*9N=ac=ZY2M{aF@Z_t2j4Zf>1by-673vS6tk&vh(}+mfnEfij7hR97KT z#*#Cj^xn`f&K{A{pO{bEE0OGETM2H7J3ws)KR}Wmd0?S9WAuyP&efM@F8ZSW&KIu@ z6^L3;XJ9;7?Y;JtSbR9lyy`LLj-Zfq_C5`N~x%0AlfBA_qp%WPrpiKX3 z2AEfJzHggC5_N5ro*zhBc~a=5srY37~0)=I~pfNqkR-rY_~nsm zU}2`dyfMCRop|NsPHoh9(U$(kTBt}r0885UE081cfLmMWWtuVF*I+29Y2KcLmp|(u z%_=3UFf!DSHR_=k-|j;XX<}Bvf!Kk*!qod_+2#`N9~?Ap+qHSFlNZ-=#YlJWG>+v{ zreM^%AVfEg`@A9j42hSR&b z8aF)Y7JL#vxJU>^IQ@kcE5RE9_(^52)~cRUm^K)7Pk52?FLUp+w<$m5rA_I*Dmb%uj(~lzel#l__4*TIE9HyST#I56KV8nTK(Q zDcrZ??)l@UoEIS)wK|rZovrr}7cjOal(O!INZYHITnC%a+gI}38v$nDnX#?D?RlYW za;7a!dljyCBsBawd?*Sp-Ia{I?)02k!$|l_bbfYlNbP)U$TS&h?y5n0+t?YK9x+Vu zC}LccbX=+UWO{hLpl|eiR&)^eH6_D4u}i)kW70yQrXPD1xzI)g?n!QKaeQuRZW_%? zXz2TN)Xvys>)lE@^&3%l_7?Ub-lw}aAOca*3qrge$7jUDc}?9Gznk%T^uF7L?x&-p zrJ$m`^4-0X5nhg7moLqMyh+QS3~AA>hHDD&pOwy-JZLY+*Sh89(+%w~28I-BUa?o- zblWUJ^+R7(C?_C`%P^Z&kTw8ot4isGe7Bg#dhPeTTKqUA|M^>6oinWnkJB)CmnIVR z)6PJhPsXz&cNNU`^T2aC+I(B|xA!`IS8Frhk=^QMN^pt_TXGNPza`#16+1OW({B02 zZYqPsZw;}Fh7Ngy^rEDbgbYLcGc>qU9$+W~N7r?wxhOxy##{TAy}X$XTmq@=?Q(m+ z?X&@;Yka29oh++68#G(E()Y69qrtB zy7%mGGyGUz{77uteDO{j1Wd@3CQ??fKd8sYeQC(LnjrWjPg1SvsPB;e_GX&9!Ki z_nP8F&hYl;jkP4^*m>U*W1`o3($3?>9n40nji+9btu4w+my9(7%k0LSLdUXVzF^eP zVarNv6{^t#8G3G;84V$0wBXk(R=N&>6IAk|qV~d(`ETPBoL5iA^uJQ%9yKs@{Uz+5 zV`9Kh6_wNQ6t-GUtKWexyKs2X;%o?W^Z4HA48|%RoHTWZ%#=VbOA10%E{`olAmidH zS=srCXpTkmF!>z3g&}Ta06E+5n0FlpG!P4Tr{0IZIkzktKc|cq%GAETrQ*Z9mr9 zPnt6j+4Z_EdR6%rtzXHX`(%lnoAGT0>a_%i%Cg^f1>oo~!Yd{;4 zgSaK&$LM1$(N0Ln`)Smm56`|^r}aeX+O!!_Ox8?jtlg29tM}rd%jq18C-7G%pofu$dN_fyPQ1RhShs!O z6aG5Fypqisnbf#qBWU`OOS$i9BiMa{Mp90lB`%f2=GdvKt2>RS2oCLotZ7zJ@J|7Y zcZ=62QuYao1XKcVsJhMN6BLWzmt_!|>>LsyRPH@?h*}zs%Gyu0`ETz;#+7ADx{Pv+{!!=_JUEbCl$IKPnMm|nz4TxN`Puvy?UlEI7Lf6zEC0`A634Ie}=`H@_J zZ1E^(`r5`EI1_6wzhfUnn2-#-V=@+e;@K8`TAPg--A;Yg(j7#5{}P({en3BGXm@8f zqr)sqjoY3z+OW23c2faMBtQjf=(`yI+rq?l)$Lm3NVlIS76RB)cXZe9UCS#*b+{B5 zU$*xW-j{wqIo=O`FFMwiAxTLX;&u8pSoM7W{#o$iVNjIxUcZY;o)JEPrEE3$GCf}| zWhm?Uxo%v>z9B?5gbG||ysf^F`%CiajVBDJOJrAJEJ$Cd;~b?>sGD^s7mqRVgCU{9{Nof2n{ ziX{D#QnN|p?&2htPoM~Y2{4>$2P0Ti@Ed0yD+I4%9=>m4W}!qkXZ5P zG%M=YV^7d`WYzQaI8bB<^j6QreC#!>NM+Rg#2HrfencFx)6H$0g)Wc559t(i=#uE; zM5}ej4m?)G$CC3<&N1Js&s!M29C~Y;gXcGWq%P|Z_n%sNzBLEag7i1uv0c#h9d50T zb~{qGF-<6DjIH|anXa5auGIf<=rFSBrlQrdyrd2e7PJEQ3v%@fstjoZHyqwb#STq>DVe1Y7gVX>byj^asYw{A?+=1irYp&@@D`O5RkQ z+XWK=-ZY%V^h)fQIi|yD_QRt&e6`nw-NZ{XqTTewduJk91=UZnIKH1RZ~KR)n?K3e z_xeQy)t~*sd~mtZs5pJ^D_x>X)#&WBfBNkz=E3o_GHYI27nL^Ab!_JfnlyCrR4dyo zaoUeH&CRa+-C?bP8+ct`@DrqD&>0gvu4zPjUY*(~t+~Nt^8~q>OVb3Oo=2C7wB0if zP>205nyxacjwZ-LAP@)=Ah^4`ySoMmF2UX1!-ofVhv04xcXxLl?(XjFWcTbj@CVLx zS65e6-&^CtH=SJ!{KcJ7_(ITc=$xFQ1F`_hw^vfsdd0%1$ z5=FDJ>|=ouS!|!CBR%>Y$z!8@4xtWPW9%*qiMxgbExD60N)K#aaRGadBQgDjc$@W8 z$f)q7d7TmyeSGbBY%`5b_-xDZAAZJeU+Kevg`~?`ob<4K2<|KO1rjT3i$XmQQWiDL zR_#l7QQD${gtycM8x25U2Dhh|ox}LZ1s;^go-f91ao|8yrVx+?Q4S}{EQo7C1HGoR zrmkW8kDp?cZ1q~>3a;;&h++(RVf-_KN2NvOppnGBdBt^uM2pD#8+@`Zg%#Is-yiFa z&5PtbhT11Srn&e(BC+9d92kf2Kfy4xb1Q}dvwgVT{k|)AiK$`{y}nF^?7z)8mO*x| zl>&Zc+AmWm%c$)#JBXrzNUit8W?K2RLmP~e(bjbf@gZB17U|^@U+C%KR*#;`a6)>H zKr>w@8Dd{d?%35|llCiJy|}r0v(Y#0m$DUXvS_oz5>>@7dz}inboFqKha(3S#tK&@ za6fymZwN}to8BN<&veGCLg(Uu7}I)kI3affK~ z9fX5r8=du^8jqLLNO;uh<#PySTWPkCR4VTzAU)pSJ#PP*mUF0|+y#b>v>Gduq4h}A z-!C2$a#rfb(E^qauKHIO^O^Vcn=ElEz3;EP&@VlEF1qSS1m-8_e-1U+-5P=@aBM#7 zQQ(BKW;hx~A`#p_%>J_^I#Kl!p@&$x4Qq+dL$-ks#w&?y>VM+C*_AY9R))Su`edi8 zYWwqgkDA4CK!C}}$#QMry*akd(8`9+LydIKWA1r%)hu7+>~uuZGBom_iaN6i+JV%R z9fSb)+^|a3$0ElJ%Cmv8n*UjYsEL`#b3_7BUMq=zzMiZmCmZLs{pa&bmAfwS7#rx0 zL{lz&b%b_>sIcfmje`D=hPJs_A{!&Eb9e3<3FPeChs8XO+NUulDHvnuWp)+5i4O14 znue+U3Au^vZ_}MMO!P19WtX#wnAQ*JaY?M3oy??C{*wU(lpE1MU4_L4EoFqw2d~Af zD%KJyW{oWlrw5eo7ey&?EAHsK)_P;+Z_y)u7$}mUS?!$;U1?|-?-(=264>bU zch#QdBRvFfO3!TT*X+62BaOYju%&=Fe-aB5x4F#A?8)p( zJE?yK6ACj-RVx-<@X{oY6*bObIYW)|q-;GsH5(5tnpGs-G?@0i^g!F+{Mb6L7qbbz zCt=PygQdiYjm7xwJ_b0w--I_842CS9;l)8|Rx#rVL|^l51f7~TChd9f#wMc1UT&A` zQk0eE;^)~GRL;NhEsxkx!Oy^UWa(RquuyY|{yp)|RDH6lL^t05CTi?Du!E)&7J1Zp zd}k&3u&f}L8I$7Q3naILR*x?_{~09jt~x5zJ)2!8?)P`Q@0rb2{jM2`laJaU}3sh_QM3L&Hb># zSJ0?CA+@PJmc4%%f@4AxMo88Rmy+j{mS>}LH`waki%`3*`+oo2qxP(QFWF9~8r4Kx ztsGA;JdqXizI9__HstJJ(LKjqCpJSVkGHkigleLoo4i$jKjXZMJzjkVn7H zv*skr!cH8^T5JacP=QZW}k~y0^6vp<9ee3J3WRh2}hrG%h0a=3< z5o-EdZaiHm7scJtH2wZ+Y@PG@ErL6!pR8;srLMSs-~Z{vfhUu#zTtAt<3+*IQyD4D zVNu|tf6uvwZqaw?;s$-N*rt4t3E@1o7RN?h0+BiSbJ?E*V6FRacGG{M)Pr3Aro>Z< zX660neSP!jGv&y}iFzjuM3BTHvXG`83mvas>!_~`yl&YXO{OF#yT06({{|^5S~Z?_ zyH1}uF1ayK^vMsfd2i$CyMTK)yR>$^(ZX*9EZ-Fg*Zoa2<%Z}tC*W=*`4r&$MQ=D_ zlJLBNW)CY?#-DigYXbeLBED)-8B){TfA0?hC;%Tc+*~&hl-yka|Gf$O$+=b6ymbHf zX^*$+;RU<@X-6}S4}^xfCNR$bb-{qDKh~*eNynqdbpx?kMOsz{yRI`6E4xcq{2mp& zW*^&L6a`W3u)q>G7OR_X0HUSN-|O^yx-Y9c>2k8d!y(Y9msa?DxTWXUN#Kw=4%JJQ zV*lRyje;6Z+Ipz*!d=0Y{c|YTy{K?tgrv-FgSXfAcrF3EObwn@X@?;(y{>` zJowJ>Qtm1&BfwpP?f6L?Ghm?#`44dTpauWn@b4qZ#U9(F>4!bgHA`QE;8XDpjJv)O z798Ajz6v_Ar5s0LE#KJi->y2aGgj6k2C_UG4X;^R*>8@Y(m_Sc)P9!-uzQ7O4qn}O zD`ypj$$o3syZ}{fwn+7M*c$H@m}b@EeoRkv`-X{z>)Z)Slks)+Eb>XOVSAuo2{ph1 z!w=E#N)ZiUa2TgMYY-9dA=pWsQ5cTdcDT<^dvIV_5hmyp5H*S;ZU!8LoKOnkp;o_gR zd*mA?Jz>%{D%_pt`FBlVvip{omU|lA_`~5z;-clWyY$_&R0Q0-s;2@S+d%>uWPR zeeN*p%&X1>k`{L_)Hizb+pM!gQvSadpvPD60syP-C8t!nmY0?5^$T>ew7c&nxYBlbDQPTee9-@E&= zHcfT-flVL5-kDV$rf{z$o5Kg}pC&%^(Y8(7weRP4W2Z1jOIGIHOCN=@n)Y`cU$sMsX;gU7q# zb9@Vtl9lP6!N4q~eY#po-=1N3QHhjM%O7jh^<8GkSffIdlH}RHkJ` z0Avxk(TYZ-QQz8f5?Ts9I2Xx)Xuz|H$c;1|xsb*vaKm*svU1U3#Y%C#)qf%#aq)qo z2dsCHR`%)aYR!EuXJfkFo5G5P66AX9O|cizdqgPivb##{>){Q{zCQ8y!Y%#s3|A0a z8T+j}zoIs?s`FWP1uixj$Of1e-61ek(}mBZIl5+YkCgqSw88ZGnlkJzf6kyc z*4ASPUwi$$)>+n?Zgy|CGk>hoAkM)U9~l49bjD~e{s>>u^VgwW*>Xi{c)t@nH8;J7 z33T!(?CW?u9`m3$gCtP!mN#j-Cn;auSdbYYG-3Gs{Qr23n4MDjj`>^zi_W;_Zr1hTiY2Q zUXP9TwD~%3T-esx2C_)Q6V&es+UL<4x1~uxu{b;LGfM5_$sil-V`X5Ehu$-kxo@hr zOh{DK+q`N|Z=Q=r@N%-7HsoN?pr#yN+Vo3MMn-A{Vo|dYTnyD9k>}Cr`SyjrR~(I< zmph^?t)nVJwx+|^&#ByR6I)FrPBnCO3x}hKs{T0BEntHR&ekwx642VfT_`#dX>Jcsi1yDvqcPC8JN%CDZTY9E&sN41h-6J zG-PngCC;O(jp84K9>~6joTIp;E`cW=8sGg<6`eFLM&}r8sj?&$ylHl3YKIjjPapX8 zs*O>!M*^`I&7rWG+2jg@rdebnvxelOmY^?Gj?N5g@yY^7VCb zPd}^MxNBC?;PGK_qLkkm)~y1S9f@d&I9)V#T)i91 zjXOt7slq?cxAYE_4`G@?;e74Op`)ZI2us^+WKoJe~=$J26DnDFzU^r$;VM1ACPX* zCEd(c^9k+Acu4*&pzE8JeY!SAzK1`3Q9(amxN5P=;)gyAmv_>jpe@N;MYuLfIEn;I zkI0JG)P{k~*b(M|R@DCBU0m9cRCV@PaVFkhL_u$M)>hznEhbpsC03xS(Z~i92VSMm z4YQ@I0P8y~xW44}t=vMidQJBVKWM@(JHjYl>> zX+Jn~qRHbxp*?~3OLotqj=8PiL)jkW(1T~2(fV52jpu<49*Z6|^)s{SdN?nq-r{kJ zHRhaFY}kP6#5*3_#}0O5S1?HV=fzHpuh6p&E;mfStE!sfrJL`2L*z%dyN-<7lstcz zR$}lWtdUrT1&41hu@PuAhb8pw8c_4I(p}G`HB7kr*4&`Wv>aR9x*}^1YeEgVP=Ywj zXgKLK2V2@F7^1r0526EZ7?#}ju_iNke-CnVwHGX;InYGCB&`bB%1HD@vfx^4@h%rz z*lrHDS2;~s_5B27(eMu4S=>-L}+E>Vp;B zE3Plk-aWTZQk<)D3KOQLz9^~8(sILugv#6*#bZ3JEnK<|SU znwcd^4VqyYl8HyXgNh^;QWhbRhU-<~9x=eO#^TIeEK}6a49hJsFTn&yATC%{6j2u_ z1c98mc4+P5mm~xy2^;jE8@ZUZlitvkWSFVJzCy>elpC=`k@zC_B3%4~6{w4c18njN zuADFv*7V_tvj?&<&=97IDUSU%ZR-h7%a_k>sQER*LwaANfqbmtuZlkQ7H$;>$X@Kn z?sI6fnG_!?Mjd$>L)6dEzOF@hNbCGwGXJr0Xq-Is{x#v?l$FcNE?t0Gu{rs3AoV+t zw#i|cWW~k|w~S24C?w|fTd6gNG+!QW1^Z8%d(d!1{GCi7E%nGqExoqe@NATc=D)e6 zd=LNSg6TFX!psszVYq&16WFH%YKfmEm+C<$YcdS0PhHhsRt=wIA(t`=NPo?0+8>iB zgo;L4(>eUYN4q)|lT5vL8AvOws6ak#%G|bWJ8V6G_2xdR09-pla@CgV@p-vdYKu&wd>6%n!?^;%AySnBrVX7z0>)Y8MW7<^W9LIQQ1Cp@mb_TnH!=-s7=w1RnyU@+&XW z(=myW*$84f+&n7!Qk4IU7A5FJmldiI%*v}>i9nk4HpU2zpq{lxaRsay3Nf#*CY$$nk+Os+OtX_ zTVLpGM0Hrzf`IC`=hFLMwKwn<$VOk$$-F$yH`DVjtQXSMkQt{PIJ0vr9S#rv`-kzWR(vN{B26yB2NVPDFZ)v0`08Kke`v~{1`*M=%EadV6$#td zwuF2m2BAQNsw=G6P`1gl)v-mEZm6uNPR1YH!>koe-(J3DPHwY?*oIb`Xuh7FjjKNK z{O7j(bFf*1vt{A81wdr$7CUswsFE`@y2;Thdf?;%%jf!)S{GI${)cznHC<8rl$Kwh z)OWJ#p`Bm`>R85(cRtcZbdlJt!70C>@I&@j!Q)ZZcplBb;K=06@y(L$WN2)nt_>)C zJ)S@B2iNE;!4YSzCJpxG5-BV5BWo(9;zdE>)=cpuz3!Us+ug zR}EG*R0%icU_-0E+|=Rj}c}a*ku-=+$1Wc)xfF0s|f2R-ej#K z=N!VJx)LdT|84zzSVo!>(o70S)KH*c;)2B$H+UMd)lU>22-Pc?66B5%h|9tj#weNq zF9Uadw}3Lb*|u_2HvM82vcHIVU;EUhjq+wC&~Qwl9PutGs+-Imn^8AL{6Zk@0B4QI znYZzVnh>v8(M+Ff^t@m5_F89*+41eU|Ml^EY^0B?Q8HL8-nsD6$1oi{mdpV(wI&$a zzW2pEe4GVAy&ArRKlZqDuw?WN^j)TU5E6k6Sb6oWFvt(CuFyG=p4^@`bL(}IIl3FZ z`?Wb`k`q&t7+GjhOrC>B6csNJp7`TuKn^^{89%VL!v^Tge3`&w@vcm+(k?*{ zZNFrDxjSx(HfENm31U0VBz`b|G`;W8MNX1SU~?ts#!7>y%4RwYX zV7-EmwcU-U$MZFibb_IZSnO#7xX+zC>fiN}5gEkF4Y3m&0*GZRSF7>tQGOSiz zZ3n_sRvT+YCz(CgMjo?AFd$)jz6I5_)74*5PP@BjTeH_OTn38|C`Ie*uYJ88!evWa zgF;2@8i}|<9h(ZA%{G^4bvCuCz?Y$}wKHW!i&-~_s5KoBX*lB%++?@bqC@_0@y1n= z+1RVZ9!=FG3&53x`nT|{T>_+fRsEaVB$Iek8KElrl`xD9-zvN#+H`Ekv7m^+?{PZJ z4eV8>gJur-?HukQ`QOQPBmWhX6yk4;cu~Xv)!Elc_lPV@99tD|wxHVOuJ( zjYUjux=Z?WS=kn`P)QBPbp>ay6%%1kn43xvf#s5S81pH}ft$u@nP1?(t<9VxtH+$2u*eLr5b-zgVc}tkLeWG(Qlu1@@k+l{KUd8yPqeK#>ZgJ& zeTuZG*VjQ_D8}&^TkhfUZ(sx^E)Gw^`I*#p-tp!!Rqp#wUHGi(musR`(5tSwWmzu+ zcH0*>l$RHFeB$J5Z@yU)v*q622VJAP{bVe(%ptrORixAc!oh<`Xj)c0{A+xT4~S89 zYCqQJanw#(=_sRhK8Tgk;vV;{6qnB*w!md%ON$g)=0>GM64fSJTVgZFtrk=2W}G%j z$+PpA8p?d>c5CmLr@T%HgoNwMc0VKzo5fYmEQ(sT@Txi;gUKy+G&1zZUUMz_`Uj*p zLw?&Ta)!t%I+UN2XqU;ZYyV_boi?A{o;0g8-Aib0(qT|Y%FG*?#khP63ywe9G5)^H zE=7S8#9IG=-0rP z`ER(A6F4`$Fendho)8~~AhCX3V{c~X8Fa|!Dbu{Q6Xw)UK$h;ge@!kauGqJ8dtY~b z2~eprnlBGh|B_czpZei_;vv(T&J645Ur5}&MYa-ZB)lbVeV8y z8mb?qP`YPL8~!OXuah(m|0@~KE{9Xl^Nsqx9!9UUX()4S!=~k<6vb4_ieW+RYUNFv zYki(+lXxx}ctcW#!`LuQJ`+P`P<&B*QU(nyjJd7V)+QKD)tSSgv?Fe2qVT~s7BN9O zitJq56ABIVb$wLJ>*j1+(Xoy8vg>#16}Gp>&fs3|FY`dw<$xAt>iQRuwEz+y^`O?F zymbHPj`|0W6Q|c$zvpf4bq3yHEA%2JodA`D=_BUUWVNoDFjlFsFiKM9 zpwrv2VgV$V$h)~l_Sx07?)Mu?ZYBNTMH>VeLgOd7g?%|waFfMYD%-egX8=$M^F~^= zV(aI=;F%>mNCtc9u2fC?96G={*{WVnM$Vqj5fq?uOLqSAm z^HY|}prWD7me~1GN0uNE3SF)>kCO|Ek5u+m~54GDyJ6MFcgUPnd$t?q( z6N~D#J7We&^@~WBE|4ZveCDO?XR2N=s&ti%{!aX|qvo?>niyBX@nU8O)RkPy+uPDn zVU!d20loI$4jgyvxk4S9-|UX}C<_j+osBdxGWC!PO=gac3Jb~t_A8On+B55(T8^r{ z%|-9{RN%Bc{DuAYbLhjd9gfPgK`~br;9Zko^@zHTf><-!_4>7}y@&!Xb;}%aY@e?| zdbB~wthkufge8x3OZFY58Fr`7P$LpwSa(<+X9l9TnP6PKidJ?ER$_e;8uYiFRnYG` z&aH4Xm=?kokqV3UlrvI>Tfnl?Seq;`T2{V~&nl;OWescZ_5F>^@-%#swbn4ATp@|d zH;D^8FjTNCuHN; z7`6yEy1#5q2mxZf;|VFM0!{Xa*whw%T)K-H^tQa9Xv&GxhP4hFqfgl#=Nu&-m}|M% z2uE|&1t4HDbn^tPuibz<@7*wdd^ce&rIWz+Hk#)LHVA*NQ+s1oN>-hBkX8n)=9ysG zGU$C!NG~6}XM7fzI)<$;_aAK%aPNQ@!1XP+p(e}D`UNpl1F}(DApXM6TZm#)qS5}- zngVXIyJxnUA^1R_iVQF3R zf{Ht5oTM|Gj%m6?tgnAiXH(OD3Z>Cz{g)tD7(v^^9ccC0#f#tD>4veGjo132+3%W# zV@#KVvVSR7-tGuNj7waBNZpV&uxFdPHdXDp^6ZyChso7VCgaW2bL%GscsvBW;DC}H z%JxRod=pI#EhCF?70gM8wpYBLnd8n}bXxqnY+fygEIyA3QB80mT2>iaC_DBsr%~i! zbjgg8p>SCP&v$53IjVspdqb+A>(1aEUjC3eaeovTefw}dt7LVj?YXYrWC!Em3VkMSZ|!^X-{h#hcgZ` zXNNIoBmN^LEw$a)EXL}3$lJ}PQ8V>EFb!dDiMK#7{G#pZ8}rm!7r z0C6NNAon0$1P%GSHQAPEv0C3@He}!HqOI=;qztDPsG_Gn-g)YYp%|l`H@w{KL(3_h zGhRcD|H-`@eircv=Az0V&xVXN7p1c2%<*dej*xuRM9`pGTN^0eHdWB&ql65fizqDo zb8+sOhKoiDiJP8VgQmb`E5-VI_nSEB3`!*tJ9R4T!P;+>T?^0j zy`2|Wy?+fna2okxg$=+EzDX3LMy=5~?AfWU{;Riia4Df7R#H;R2t?Ffuo`h|Uvil0 z?MaX)TOmoMz8*~pbhJNwj?(8Mg$yT}ejTkeGe?S>Hm_*5yLQ}%(exC;a>_lT$D>8y z%*PTEAsi0$VpPD|p`*#_H#;atUN+>@@V5IHJ>~P={4+AtbLrG{`~9Q(2D#2kf9g$u z+XVPr-f*8h%DC4@)qlnuZ<%w_Fy9D?(rd`dPpuWp)r7f|8SekE@mGS#Mw71T52m^t8c)E4clOtm4 z>v|z`UtfAil>oOLD=S&K1E&4+>$)SlI|q&{J?tbn#4+}thJkMeMy_f430qW@`Kx}U zdu^;RdA2irg$24f;a_B;uJha)+w?p8VT#mN*$o8JG0NkRY`-k|8WVO3Hb!%dF50PI z4^xje``)9b%91V(OFUfn?d|S@tTWz*3e_ws7^iPm2aazbC9=Ou8zseZtcGD z{_;0h);gj1h-z0M^^}${CPhebO2qUy%^U~#&fEFP%w@?@nI?-1QYA564&iu8;qaDI z(c@L@;#Hu1Aq5VD0IM4mP^X=^370b@@i1adSLwZqdfF@>GNaY_=yvFXJAP z{sPr9@;|@T=y~e=q$i|;YqQJA$8gFw{QzpkzjQsXTLPYHmE=YsiR{nFHRnQ z>&xIVDeiMqPvw?J!a_@Zbj6_w4bb9B1t8Y<>H`G7R(B-N1QZS#s zh!W$r3V(VAS<7_|zU-7Rc21LnP+-ylofCN7Xim6rhP-AO_l{rBU9C2I^E;fUFt7b^ z_2O&0oj`J(%PyE+#J_&I&b)e^FXbJQo=lLSLP)Y^7%kY?noQ0Hr}(U|^42>1u@3AW z09vU|`h?5g=n8(|Fu1>D||>1(TbyjY*H}lR7ObrNlEv%3~pbZ}B5xfH&XP#76?| za`zqy3@Qd(@%j`+U9QCOv!&=7w5wZ$vf|On5)9geFUEbx1iJMI=C6SKbDf*p)rfzi zdu>O8!WrqRjN@={IxoeT*;)oUmk1sk(HE#8Ya;uC)DCxG!xtEV{0h*j>*iAZ0yOSC zkLis;en*>1>hF7GqaZh}uCwQ1&GFre`TtsgKYt|WFRIGR3@hJm=tN(>khXIxK<%MyjNiY)&>cFKEbmoMAwqBA2h}n(@w~&EMkX8~W zmI)6u&5f4k7SO$MjG6Nm>K}V$FBOD z@D&VoY*7t&g5^ZCtbhiy!)wn=Edd2}onMR1Mj2Ne1~@R!IO>=77X7((4&LV7TB*@w z5rNj8$sAwzL$wO(1z_`??#$W#^_@d6xAJ%#W#U^~aY?ow;j_W@Q3d=1w#*KFX@3yR z_;yrkhh!&71X+vB^rDYDr}Y=afx9HF`lC;guw=BIp&P3 zclb|&$hlTSR^O6)Dk~{Mh&hq{{6WX)6!i9gGg{dY*ELl{Q`#J%_JOgv_3e>yX)L}@ z?!5Kx!SAOawq=lz)oN+cQ|r%38~*Hu>M3`+uLwab2no!$MG}8ZF2kBnQqp*{=JJg# zzW3OV6xuOQ*c&@3TT~!Jp}h zXgs^WM|{y}H>QL#N4vViIeq&30x$31x&^P_sn8wBlpqw6#yhLXNjB=TitT2%jor4! zAc`up+eRhCVFk|S33uDnmOxGMelC24=ART|Y1XO`;H4T-i;Biiqwapch3t%g6X1s^ zQ0i8pdv;3AJz`}tc!*_E_MNK@gO-(|+Tq+$c^g$qI$_0Fq%RZ(C~rksrkP-*uDJW! zg{af~taWi){E+s~>E@I?N`kM0wMMj$Jz7t{|Llme!ja)m(I}K|KpDO6Dfv!j7MGo& z@Ix;LkBvC47ANwJp6%J8w<>YrnX@ZXM;BCR=U+dH?En0tBpljSD{fV{kU=dt*=k}| za?>zv=&G_ZEfQb!l~K*T;Fri#deT4;Nk6)%t2YaEr3MS0!x%%#SRGtEb3&Z9QIB~7 zB4DLVzNgS>&DZPN`c8KWd3B)@SdmnIg90wEWQ9@L(7z|yuH4&GlEMm$j6c95;X372 zg_U_h6pFAwk+rQ!PNE3yXQUtze=w_zyhWreE*TDk@|uHjA0?{*X~^)?jykbFiqB6bVlfy`DQQ+yM6<(UgzX_s>Ir3;!wNFqjiVb-I-qPs18=r;01x}NvbszVRdYW-y zin68Yy5^DfqCg9W;=r(KHo0@xP77It`e{%&!~aJV`m1Nln+tUGVq)9@G>-olN?{Df zlA%V-h}>kP`KJTD30-2i3Kyst3I||_Z!lpJ1*$x4o#hZ+*kR$o15BZ92Z`$-ZuvME18J^U9dWY*JLhe?xt1>Y@CI&$Hta0Oe*_5M8t z@#PRX_Mfhpc0oV<-}4rDkc$&i{%x;8Tvb9TJaz5Wg}aa;U?xO?-Tp$k{+m{dZE$o6 zOK|O1Pu@B$&SA^TS~N5?#U&*k7umElH0Bl-IfaG6$H$fw@@ZmfY8XdHN8i!WW3#gr zU0peL1|mrX1+&Y`>2w$tWBklUK}(%}g6W)g`;naMzg8M-ez?fR#l>kf+Ts&FcEL71 z?SFK(^F~&d^38F*`^68bs!(9}6@RmlQHiC6f#dut&O{Wj%TSZJZ1}AX0aG-uN8EPn zKXIbp2*gAw7(GNYBYdGlL_d_uL0h74?Em zd@sfq2R=7n^-4vPxy#t9yW?LhUY_PV+`9Cpiu!6@!;4dUS#)ZyC3#Co&ynV@FT=AcSe zvk8rGOvZ*5my`Y4pd7_)frT`7tMXd2NzYS078Vu}ONaIjo8>xb%~i6mS82SfEd(Gs z*}U(DNccR)k=U#QRpFwlstN2y0|>N&f}J7ID4oe;k+&y{(Sd=`0?&t}m3r;R zGaVZ~54uv(XF*>v6Z#QB->Sfyu3#)90Z8}+1A88~b39F2`bk(u6${2e?B7FOzMsl#xZSf|W z)HLT^$yr(FH5XW3_oo&nra&ZEtZKZ!*s#kK@LA^Zx-b8!tZb;;?m6G$xcBF$mX;+t zI=bfP&z~3kp%0D6NyxgCWl9LyRTwGm`GR_vrViI^?3Lh_2c#STGgoLOxJDS*bzPTPR zRw>HKA;7`G-5yQ{#F9$JWoC}QKHa2p+GVW_viZGxqoSc5P3MKcX!Z#TP)&x9+*q(h zFo*n>A7NwCZFUR-9%*U+_$wT+)M!T-PpP;cMwvyWKD{b)wp@QSl>?=zsp)+^Oi4yT z(ekE8O--G_l|yM5`q)j;O30_e(U3U8&g63sHlx$1?%I0;wrOZ$f`Xnta%o8;f|)?D z;qua6tKPC7WHOq<<&dj=@k3R$ZzM4-i^Nyy+|Ru-VH|!BCuM&ub>~`Ot;NOYYP+A! zZhe(lk>=3>EGLMy4vNhTZl_>smBOYvMMFap zY;0_TCx=wp2xjNwx#0@4ay)Lwz&y!l%Pw(mvw*d)1f3vAjY4mz(e-vO-F35`K)AU| z-4(mR*CnIqtMk^2mbQ%=qd~S!l8zl^!X_;a*+jcvPk`QaJ>M?3dObG%VPAb$J0JTQ z57*mUWnVJ*z0{x>ws{%u{AEpyBA(LWDXULcn`a9a-HvAZQ3id0PQj~XtE%QYMR)fm z7@a;zaia(S#Y$@k4BCS(`@7?xAP`7rnUaTRnR##{Um8u%^QgeFvqLxl2FDp?!@e1I zl{#I#%fjT$JDEY(@#yZyj~}X4dTY#sb`d%1ReF02(v+`{ie%6k4PITXfCSVq7&LYV zt?pO3qb56S4h|0ag@x=57eTCZL7$<_x$ zjH$m*VBq4OTnkJJe(=3KT&TN~Y}ed%h?p3CG*Ui2JptwtmXU$?nTiC0 zu=S-ILQxlZc$SMxJ(I^}j)zLa^{CC^0|DD^Aef(Dnm zqoZTc{XYi)*BHU7SL*yoA;~$YZl8E*w2oj-_UFbyKCqn0hwFS;Hq^H#j)k z?&Zc|z0?I9Vns^gB7=BdnGY|()>v=;H_(^b)6?UAR=-H|%2fCHE5=lVjkfP~d^VHC z3>gra2lqyk(W|8{TYgntpzDY5T~Q3A7zrf}^f#ZkoG|Q9WS+!6@c2Blm6Vk5U3NhM zg2!fMO+?}Ia=D#n{D%R@pn3m%9sz3hRl>nEla-S*KAtNf@R>C(R zuF~U&K_(dcgHFrkd>Att&G!wZ({oX8k)Nce7nT$4v3_8vnS_J{i|H4{eCf|T{Z3!K zH!X2-alpafb~^L&@;tVJFwLiPKLfFv&3vkBk5?r;baL!=H{IiU&>YK%z;kgAA&z}N5WTQ?oT-3VMM^{SCqJJI2qnFMk5kjr&v%7#4Jv!_yP1NQ-isaJ z5yHa4PQkSy#G=8EdnZ*IHU9y&o5p4#GFvF0%Hu-g^>98+8dfPC@xJwymo{LbHhsZtuH!bsQp;4Q-36oe2h9 z?ZN?Cdn_Y!Fo>L1z0!C*jh&2~{OQiU!FEl?(UE0(dKv+X>31ft8_zjhE{X6M?Mb8h z2r!?HI;a1Eq~J3Ouo|PSo}k^`-OJ;$s^_9o23 zfFebOIH_0vr%VFmwLbZb0@Lv{zyadp;(Gqsr1vA!KMh@Oc8f_!Jnv%Z`P}^kOi1p> zk60M7d_&Wf6PI5}kv2GgVRN8sLPNjg=H(sDm7oD}eSfY*<#eqz`C;8FI*Z@S^Ug{! zjV%n=S|#A9@c-s8iSCtoPg9i~$xMMeH02%~Rr<5+`X>7$!*;FZsz~rHtfxl=R9pLR z-vJ;(@ZkwW$_PAeWn?!rhnwt}b_G~hu#O=1Um+u$G|Po@>6`uWqs1!1e=xYqA8IU( zJsdz-(LRyWd6Ko}u>#Q~+qs|gbrBx(0k047r^Zna3k@Zrpa>@5_pCPPg=w(cP>_-N zBe~uRL^M1;cO~NRgpKcBK1fl3D=G^`4X9O?sg|J9Yt{X_-W>)UkHvno3%>K9$zkVq zhb{Jc_-ar5_atB>P6Semk0X(6wyP<1b!-A}w<^nZ7CZMZ#-^qqrvru0G>%qB_bqrj zCPtV*d2r2uhVL)mZQpc-^m>5uKFDBdg{w!1b zw*dY3>tyezy<|uSBm)D3w%Zw5z_l0md<5SgjpRSY;(8PASJ~aaDRT^@#CJhQwz}a7 zc&BjKEHimOUS7O)lFKFm@oMt@?FDKd-fZfxFvCTo{gwznl;rLU#nOqLc|xQvmu8n2 zj@M+1`xOZQ1>2A2Wq4-En9;DP-FamN;AqF+S81?W071xd z?7V-x+JSV^17fJ-35}u{1eRS@V~!@rq9}yGl+GoBy1=#9 zz#JoQ%SW?CPS4K2UPn%s>#Z0r{-HwoFK{kKXLs`#JsIEOtF@f+49`1WY?JjmO-7S` zb1}LRL=}Q!zcnsPZ}jmN{yU_8v;9?a>hlqNJ5mJTP71RT6*aZPt4Ai^(Q`J;$UaO^EgTl`qPLEe6s%;ukE5b&kn@r9V0LnuU!Yc_t*+&KX#pH@j%7r9 z7LP>AEGo-D1k`(O^L7>h2$Iz*v;aJGvEKd!L^V7w1pvqV1Lq89H@CBjXaoe6z>|lJ z_dLf`21tlj2=-D=11?)zTenBEg9Sc+{``@WmoF$DN+tZV+F*k#n?&bek)1@ZZOI}< z#S3P#lON(jRXQ5z!@8iOgQm$bAazaKu%5`fyLaj*9i;Ez1{J0J}VZZ z#0u8q#HSwH+KPl(utI9C1P%?`Gfvl2jpv68MPXro{pil7Cg7``?g6*Xi`irXhJ9a3 z)hbG*iE@njkv=S9^8UeBc&yJEQ#y|Fy}^j7EGB=nUna8n(}BaK)?r7Y@jV5U+wuNv z1(1c_6z21{!)Cy8(;!e~6d>|W@03kc{CmK^Fr%8NxVcmN`otRV$45tZfP;vVojnmC za?iW&li7lkx#HpG3+31Vt8+QwABe#2)pc7d=uTE*8PTJ}r>FZC$2K7Md4|162Mrs0 zSljOBY2`MM1?cPRV=^B4Ch+mP2&nK!m&gnC2odp2Gs#WS}ZarZ0d$P%=a})qGkEL@)1Edw+$MI};IKEuJGb=|l zWTx6c)c3AT|L3E_JaF*l0l@^v84kVd%!Qdq40^;L$4Egti5=A(nr(rPwi7rcSO6yN z`Eahg^`KR2iU{oA(_rH4SH`w3QDpu;{)bi33a$Fkh=_aup9Y46IGruC%gD+CJSC^N zICN()8bF$Z0LA;bBFOq@0X(j!#ob9tHH26e5pXFXiW1PuEdwbwu3N7t#ko&v=c|YF zWtho~`h|WU?*v{Oxw#~b0`&V9($dmx*CVu$PJfI8Y?a%cX^Fp>ru&++*BhFC4P6<~I6F%3AZu$| znV67Z3<=p>P*|w7;rmiS3gYv;HE)h?1gwTAt_ zUJN=-zvQz8_&5KUp0749^oL{OC+@X*+>E%K0+bf+*`t82jbsH>&QB~X3dC-iqarRC`&WO3=Wo)3AVDS2222YYo2<(c#a-DN?u+b zJ9g|KD=VwQPxh=^w=P|~b|pSOo}c~fXEbWmh&%4MLt%@+4B!=GOtH$wxrK8lFdleL z^VNGUUAn}%bLTjF_AG^kg%lPRQc_aF#fujyEiI+Aw3Mq?uTrT}B`Q^_M3pL4s8OQ^ zHEPtLdiCnmu3ej2wQ4#3cQ7J;Fdul$7*h~TCl!?Uim)sz1{eo)(YzIr0J0nyXpG4V zwi62r#?i6|Y6P%Ob5}$H#5!O^FcwYFmW^FFyMW=qX3b#{ak$OEaC_YfN@2k~dd8To z_7%)zA`Ak;(4yykLJgj*yB0iK8YF9KNT{d>%bg&Djh33DA_`vl~ovjYVlQ*F}@|I<_ zwF{?}{vC*j7i}g)K6R$D`QgGzpzM!7pU}St5%Gd`l>Ncx)=&|zHs8~E1aj4hKT%&74{X)PDP=z z@l@e7u&;3XC=C$-rrTFEClrOs#v7$G#4epGN<$Qk6yj?iI27V7TRvIE9 zWj`>|81t^8P}%sZaQ@RSoDNDuBqXf1i)NdmP}%spaOwj83#2Fwk)W0iylIR%tteDB z{w$sU28I9)l!i!9IRT6^#!OWdDjTO1P8+*$+A0l^ps?O9nhlCVW#hEMsRz6c3{V;( z0c|oc+8A?6QK)R3Tsl4N(z#!0hy;{FcFE|rGb)>aDV&FZ!N4z-hDbp83oynQvs+Q9 zYyz=#2HK@lOKFHWQlVWk;}nI;CeRD#G0L8gqrb@!ahBCUnlWaRqEOicOW{-k2H9Jr z5)tvMB48{q));e1aj0y9v2OJ5sV@!^sP}ziX>GS|z0h%Zc5s&%;m|%>_QWPp1 zDJz`%K&rjnrbI+4Ug@+2`T|{*hJ=_!z}v={ z^@>7eBfcx0PCy@^v(k_tw-T6cj9H~9R5s$j(&+-cq|%WfEtyPX%yLDcvJq!p>AVR1 zm(q{`w-orJu4Gg;;_T9C1M~!btu!RSybH`U#%xd&DjNx$(uo6l*xQekh&a>*APdMc z#_UuaDjNyh(z%zik4rxf+^0muAr1p`fZ4{FV~Rs%Bf(QTw*lRO-vEy*5%H6cfqB4O zW6Wj6p|X+SE}quFbCf+x=XNC`UUr_cXUfdi6Eamc5}Kt`ALs@=3&blC@v!Z{pMeFs z)xXL{UH~81py9Au1d3MDhF-c#5*y;yk8ALe?hAZX>h87;`{z zh{{I1Sv>ax9f1zOlS(A$eF&@uJ}|}{QXHbP5q}g<9iTnX4tN4RRWukB0PBE{fHlUL zGm1o1HsZAMd6cr>?>r7Xszd_DCzSoc;g91 z+a_boH;P15HWDo5b0_ehKuddT0fZ|Vue%ED0RC!k+l(;H*O}LwmaixEqK98US?y;55zvCxCq5IPf>%sJ$IA#+=efi^@jC*=1B2s1MYm z>;|KE0JVWy_Ek}JdyAy(aSPRes`ggN-XbXbAVZk_@5TbIQTB`XE5IdtE3vm?%5JM) y1e~+CZ-GMK@4z>d-LUc$aMBoaQ6nY6?Ee9hV$0aX5B~4~0000 Date: Tue, 7 Sep 2021 22:03:09 +0200 Subject: [PATCH 07/11] Fix header logo scaling --- doc/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index 0cfab4fc..1707d2ac 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,7 +5,6 @@ .. image:: https://raw.githubusercontent.com/coqui-ai/STT/main/images/coqui-STT-logo-green.png :alt: Coqui STT logo and wordmark - :height: 155 **Coqui STT** (🐸STT) is an open-source deep-learning toolkit for training and deploying speech-to-text models. From 09172068277d80d50fdbaf889e8f2fc24320cb60 Mon Sep 17 00:00:00 2001 From: Jeremiah Rose Date: Wed, 8 Sep 2021 10:20:24 +1000 Subject: [PATCH 08/11] Update Dockerfile.build documentation in DEPLOYMENT.rst --- doc/DEPLOYMENT.rst | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/DEPLOYMENT.rst b/doc/DEPLOYMENT.rst index ab0f0176..a7499d19 100644 --- a/doc/DEPLOYMENT.rst +++ b/doc/DEPLOYMENT.rst @@ -184,13 +184,24 @@ Dockerfile for building from source We provide ``Dockerfile.build`` to automatically build ``libstt.so``, the C++ native client, Python bindings, and KenLM. -If you want to specify a different repository or branch, you can specify the ``STT_REPO`` or ``STT_SHA`` arguments: +Before building, make sure that git submodules have been initialised: .. code-block:: bash - docker build . -f Dockerfile.build --build-arg STT_REPO=git://your/fork --build-arg STT_SHA=origin/your-branch + git submodule sync + git submodule update --init + +Then build with: -.. _runtime-deps: +.. code-block:: bash + + docker build . -f Dockerfile.build -t stt-image + +You can then use stt inside the Docker container: + +.. code-block:: bash + + docker run -it stt-image bash Runtime Dependencies From 738874fb6f7c7889699aee7ed3c7041a5422d31d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 8 Sep 2021 12:00:11 +0200 Subject: [PATCH 09/11] Fix Dockerfile.build build after TFLite changes --- .dockerignore | 5 +++ .github/workflows/build-and-test.yml | 48 ++++++++++++++++++++++++---- Dockerfile.build | 16 +++++----- 3 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..91db6201 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git/lfs +native_client/ds-swig +native_client/python/dist/*.whl +native_client/ctcdecode/*.a +native_client/javascript/build/ diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8b7e0ff1..71576a1c 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -851,10 +851,35 @@ jobs: EOF - run: | twine upload --repository pypi *.whl + docker-build: + name: "Build Dockerfile.build image" + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + submodules: 'recursive' + - name: Check VERSION matches Git tag and compute Docker tag + id: compute-tag + run: | + if [[ "${{ startsWith(github.ref, 'refs/tags/') }}" != "true" ]]; then + # PR build + echo "::set-output name=tag::dev" + else + VERSION="v$(cat VERSION)" + if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then + echo "Pushed tag does not match VERSION file. Aborting push." + exit 1 + fi + echo "::set-output name=tag::${VERSION}" + fi + - name: Build + run: | + DOCKER_TAG="${{ steps.compute-tag.outputs.tag }}" + docker build -f Dockerfile.build . -t ghcr.io/coqui-ai/stt-build:latest -t "ghcr.io/coqui-ai/stt-build:${DOCKER_TAG}" docker-publish: name: "Build and publish Docker training image to GHCR" runs-on: ubuntu-20.04 - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') steps: - uses: actions/checkout@v2 with: @@ -869,18 +894,27 @@ jobs: - name: Check VERSION matches Git tag and compute Docker tag id: compute-tag run: | - VERSION="v$(cat VERSION)" - if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then - echo "Pushed tag does not match VERSION file. Aborting push." - exit 1 + if [[ "${{ startsWith(github.ref, 'refs/tags/') }}" != "true" ]]; then + # PR build + echo "::set-output name=tag::dev" + else + VERSION="v$(cat VERSION)" + if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then + echo "Pushed tag does not match VERSION file. Aborting push." + exit 1 + fi + echo "::set-output name=tag::${VERSION}" fi - echo "::set-output name=tag::${VERSION}" - - name: Build and push + - name: Build run: | DOCKER_TAG="${{ steps.compute-tag.outputs.tag }}" docker build -f Dockerfile.train . -t ghcr.io/coqui-ai/stt-train:latest -t "ghcr.io/coqui-ai/stt-train:${DOCKER_TAG}" + - name: Push + run: | + DOCKER_TAG="${{ steps.compute-tag.outputs.tag }}" docker push "ghcr.io/coqui-ai/stt-train:${DOCKER_TAG}" docker push ghcr.io/coqui-ai/stt-train:latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') twine-upload-decoder: name: "Upload coqui_stt_ctcdecoder packages to PyPI" runs-on: ubuntu-20.04 diff --git a/Dockerfile.build b/Dockerfile.build index 426d3e11..0a0c612e 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -125,13 +125,11 @@ RUN ./configure # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment # Build STT + RUN bazel build \ + --verbose_failures \ --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ - --config=monolithic \ - --config=cuda \ -c opt \ - --copt=-O3 \ - --copt="-D_GLIBCXX_USE_CXX11_ABI=0" \ --copt=-mtune=generic \ --copt=-march=x86-64 \ --copt=-msse \ @@ -140,10 +138,11 @@ RUN bazel build \ --copt=-msse4.1 \ --copt=-msse4.2 \ --copt=-mavx \ - --copt=-fvisibility=hidden \ - //native_client:libstt.so \ - --verbose_failures \ - --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} + --config=noaws \ + --config=nogcp \ + --config=nohdfs \ + --config=nonccl \ + //native_client:libstt.so # Copy built libs to /STT/native_client RUN cp bazel-bin/native_client/libstt.so /STT/native_client/ @@ -158,6 +157,7 @@ RUN make NUM_PROCESSES=$(nproc) stt WORKDIR /STT RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings +RUN pip3 install -U pip setuptools wheel RUN pip3 install --upgrade native_client/python/dist/*.whl RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings From e6d5a0ca8d8e07e252d5c85377ca23b1359ea004 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 8 Sep 2021 12:16:25 +0200 Subject: [PATCH 10/11] Fix linter error [skip ci] --- doc/DEPLOYMENT.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/DEPLOYMENT.rst b/doc/DEPLOYMENT.rst index f94f949a..acf36f19 100644 --- a/doc/DEPLOYMENT.rst +++ b/doc/DEPLOYMENT.rst @@ -164,13 +164,13 @@ Before building, make sure that git submodules have been initialised: git submodule sync git submodule update --init - + Then build with: .. code-block:: bash docker build . -f Dockerfile.build -t stt-image - + You can then use stt inside the Docker container: .. code-block:: bash From 01c992caefc8ec122e1f651b986c181901292015 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 9 Sep 2021 17:58:06 +0200 Subject: [PATCH 11/11] Upload built artifacts to GitHub releases --- .github/actions/upload-cache-asset/action.yml | 58 --------- .../actions/upload-release-asset/action.yml | 107 +++++++++++++++++ .github/workflows/build-and-test.yml | 110 +++++++++++++++--- 3 files changed, 201 insertions(+), 74 deletions(-) delete mode 100644 .github/actions/upload-cache-asset/action.yml create mode 100644 .github/actions/upload-release-asset/action.yml diff --git a/.github/actions/upload-cache-asset/action.yml b/.github/actions/upload-cache-asset/action.yml deleted file mode 100644 index 40b35eb9..00000000 --- a/.github/actions/upload-cache-asset/action.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: "Upload cache asset to release" -description: "Upload a build cache asset to a release" -inputs: - name: - description: "Artifact name" - required: true - path: - description: "Path of file to upload" - required: true - token: - description: "GitHub token" - required: false - default: ${{ github.token }} - repo: - description: "Repository name with owner (like actions/checkout)" - required: false - default: ${{ github.repository }} - release-tag: - description: "Tag of release to check artifacts under" - required: false - default: "v0.10.0-alpha.7" -runs: - using: "composite" - steps: - - run: | - set -xe - - asset_name="${{ inputs.name }}" - filename="${{ inputs.path }}" - - # Check input - if [[ ! -f "${filename}" ]]; then - echo "Error: Input file (${filename}) missing" - exit 1; - fi - - AUTH="Authorization: token ${{ inputs.token }}" - - owner=$(echo "${{inputs.repo}}" | cut -f1 -d/) - repo=$(echo "${{inputs.repo}}" | cut -f2 -d/) - tag="${{ inputs.release-tag }}" - - GH_REPO="https://api.github.com/repos/${owner}/${repo}" - - # Check token - curl -o /dev/null -sH "$AUTH" $GH_REPO || { echo "Error: Invalid repo, token or network issue!"; exit 1; } - - # Get ID of the release based on given tag name - GH_TAGS="${GH_REPO}/releases/tags/${tag}" - response=$(curl -sH "$AUTH" $GH_TAGS) - eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=') - [ "$id" ] || { echo "Error: Failed to get release id for tag: $tag"; echo "$response" | awk 'length($0)<100' >&2; exit 1; } - - # Upload asset - echo "Uploading asset..." - GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset_name}" - curl -T "${filename}" -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET - shell: bash diff --git a/.github/actions/upload-release-asset/action.yml b/.github/actions/upload-release-asset/action.yml new file mode 100644 index 00000000..93327473 --- /dev/null +++ b/.github/actions/upload-release-asset/action.yml @@ -0,0 +1,107 @@ +name: "Upload cache asset to release" +description: "Upload a build cache asset to a release" +inputs: + name: + description: "Artifact name" + required: true + path: + description: "Path of file to upload" + required: true + token: + description: "GitHub token" + required: false + default: ${{ github.token }} + repo: + description: "Repository name with owner (like actions/checkout)" + required: false + default: ${{ github.repository }} + release-tag: + description: "Tag of release to check artifacts under" + required: false + default: "v0.10.0-alpha.7" + should-create-release: + description: "Whether this action should automatically create a release for the given tag if one doesn't already exist" + required: false + default: false +runs: + using: "composite" + steps: + - run: | + set -xe + + asset_name="${{ inputs.name }}" + filenames="${{ inputs.path }}" + + if [ $(compgen -G "$filenames" | wc -l) -gt 1 -a -n "$asset_name" ]; then + echo "Error: multiple input files specified, but also specified an asset_name." + echo "When uploading multiple files leave asset_name empty to use the file names as asset names." + exit 1 + fi + + # Check input + for file in $filenames; do + if [[ ! -f $file ]]; then + echo "Error: Input file (${filename}) missing" + exit 1; + fi + done + + # If no asset name is specified, use filename + [ "$asset_name" ] || asset_name=$(basename "$filename") + + AUTH="Authorization: token ${{ inputs.token }}" + + owner=$(echo "${{inputs.repo}}" | cut -f1 -d/) + repo=$(echo "${{inputs.repo}}" | cut -f2 -d/) + tag="${{ inputs.release-tag }}" + should_create="${{ inputs.should-create-release }}" + + GH_REPO="https://api.github.com/repos/${owner}/${repo}" + + # Check token + curl -o /dev/null -sH "$AUTH" $GH_REPO || { + echo "Error: Invalid repo, token or network issue!" + exit 1 + } + + # Check if tag exists + response=$(curl -sH "$AUTH" "${GH_REPO}/git/refs/tags/${tag}") + eval $(echo "$response" | grep -m 1 "sha.:" | grep -w sha | tr : = | tr -cd '[[:alnum:]]=') + [ "$sha" ] || { + echo "Error: Tag does not exist: $tag" + echo "$response" | awk 'length($0)<100' >&2 + exit 1 + } + + # Get ID of the release based on given tag name + GH_TAGS="${GH_REPO}/releases/tags/${tag}" + response=$(curl -sH "$AUTH" $GH_TAGS) + eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=') + [ "$id" ] || { + # If release does not exist, create it + if [[ "$should_create" == "true" ]]; then + echo "Tag does not have corresponding release, creating release for tag: $tag..." + response=$(curl -X POST -sH "$AUTH" -H "Content-Type: application/json" "${GH_REPO}/releases" -d '{"tag_name":"$tag","name":"Coqui STT $tag","prerelease":true}') + eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=') + [ "$id" ] || { + echo "Error: Could not create release for tag: $tag" + echo "$response" | awk 'length($0)<100' >&2 + exit 1 + } + else + echo "Error: Could not find release for tag: $tag" + echo "$response" | awk 'length($0)<100' >&2 + exit 1 + fi + } + + # Upload assets + for $file in $filenames; do + if [ -z $asset_name ]; then + asset_name=$(basename $file) + fi + echo "Uploading asset with name: $asset_name from file: $file" + GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset_name}" + curl -T $filename -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET + done + shell: bash diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 71576a1c..09b2af3a 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -320,7 +320,7 @@ jobs: if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - - uses: ./.github/actions/upload-cache-asset + - uses: ./.github/actions/upload-release-asset with: name: ${{ needs.tensorflow_opt-Linux.outputs.cache_key }}.tar.xz path: ${{ github.workspace }}/artifacts/home.tar.xz @@ -365,7 +365,7 @@ jobs: path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: - name: "libstt.tflite.zip" + name: "libstt.tflite.Linux.zip" path: ${{ github.workspace }}/artifacts/libstt.zip build-python-Linux: name: "Lin|Build Python bindings" @@ -834,14 +834,6 @@ jobs: - uses: actions/download-artifact@v2 with: name: stt-tflite-3.9-Linux.whl - # We need proper manylinux2014 builds before we can publish these wheels. - # https://github.com/coqui-ai/STT/issues/1904 - # - uses: actions/download-artifact@v2 - # with: - # name: stt-tflite-3.7-armv7.whl - # - uses: actions/download-artifact@v2 - # with: - # name: stt-tflite-3.7-aarch64.whl - name: Setup PyPI config run: | cat << EOF > ~/.pypirc @@ -851,6 +843,68 @@ jobs: EOF - run: | twine upload --repository pypi *.whl + # PyPI only supports ARM wheels built on manylinux images, but those aren't + # ready for use yet, so we upload our wheels to the corresponding release + # for this tag. + - uses: actions/download-artifact@v2 + with: + name: stt-tflite-3.7-armv7.whl + - uses: actions/download-artifact@v2 + with: + name: stt-tflite-3.7-aarch64.whl + - name: Get tag name + id: get-tag + run: | + tag=$(echo "${{ github.ref }}" | sed -e 's|^refs/tags/||)' + echo "::set-output name=tag::$tag" + - name: Upload artifacts to GitHub release + uses: ./.github/actions/upload-release-asset + with: + name: '' # use filename + path: "*.whl" + release-tag: ${{ steps.get-tag.outputs.tag }} + should-create-release: true + upload-nc-release-assets: + name: "Upload native client artifacts to release assets" + runs-on: ubuntu-20.04 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + needs: [build-lib_Windows, build-lib_Linux, build-lib_macOS, build-lib_LinuxAarch64, build-lib_LinuxArmv7] + steps: + - uses: actions/download-artifact@v2 + with: + name: libstt.tflite.Linux.zip + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.Linux.tar.xz + - uses: actions/download-artifact@v2 + with: + name: libstt.tflite.macOS.zip + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.macOS.tar.xz + - uses: actions/download-artifact@v2 + with: + name: libstt.tflite.Windows.zip + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.Windows.tar.xz + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.linux.armv7.tar.xz + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.linux.aarch64.tar.xz + - name: Get tag name + id: get-tag + run: | + tag=$(echo "${{ github.ref }}" | sed -e 's|^refs/tags/||)' + echo "::set-output name=tag::$tag" + - uses: ./.github/actions/upload-release-asset + with: + name: '' # use filename + path: "*.{tar.gz,zip}" + release-tag: ${{ steps.get-tag.outputs.tag }} + should-create-release: true docker-build: name: "Build Dockerfile.build image" runs-on: ubuntu-20.04 @@ -964,6 +1018,18 @@ jobs: EOF - run: | twine upload --repository pypi *.whl + - name: Get tag name + id: get-tag + run: | + tag=$(echo "${{ github.ref }}" | sed -e 's|^refs/tags/||)' + echo "::set-output name=tag::$tag" + - name: Upload artifacts to GitHub release + uses: ./.github/actions/upload-release-asset + with: + name: '' # use filename + path: "*.whl" + release-tag: ${{ steps.get-tag.outputs.tag }} + should-create-release: true npmjs-publish: name: "Upload STT packages to npmjs.com" runs-on: ubuntu-20.04 @@ -998,6 +1064,18 @@ jobs: npm publish --access=public --verbose ${{ github.workspace }}/stt-*.tgz --tag ${{ steps.compute-npm-tag.outputs.npm-tag }} env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - name: Get tag name + id: get-tag + run: | + tag=$(echo "${{ github.ref }}" | sed -e 's|^refs/tags/||)' + echo "::set-output name=tag::$tag" + - name: Upload artifacts to GitHub release + uses: ./.github/actions/upload-release-asset + with: + name: '' # use filename + path: "*.tgz" + release-tag: ${{ steps.get-tag.outputs.tag }} + should-create-release: true # macOS jobs swig_macOS: name: "Mac|Build SWIG" @@ -1191,7 +1269,7 @@ jobs: if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - - uses: ./.github/actions/upload-cache-asset + - uses: ./.github/actions/upload-release-asset with: name: ${{ needs.tensorflow_opt-macOS.outputs.cache_key }}.tar.xz path: ${{ github.workspace }}/artifacts/home.tar.xz @@ -1229,7 +1307,7 @@ jobs: path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: - name: "libstt.tflite.zip" + name: "libstt.tflite.macOS.zip" path: ${{ github.workspace }}/artifacts/libstt.zip build-python-macOS: name: "Mac|Build Python bindings" @@ -1630,7 +1708,7 @@ jobs: if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - run: ./ci_scripts/tf-package.sh if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - - uses: ./.github/actions/upload-cache-asset + - uses: ./.github/actions/upload-release-asset with: name: ${{ needs.tensorflow_opt-Windows.outputs.cache_key }}.tar.xz path: ${{ github.workspace }}/artifacts/home.tar.xz @@ -1676,7 +1754,7 @@ jobs: path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: - name: "libstt.tflite.zip" + name: "libstt.tflite.Windows.zip" path: ${{ github.workspace }}/artifacts/libstt.zip build-python-Windows: name: "Win|Build Python bindings" @@ -2491,7 +2569,7 @@ jobs: if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - - uses: ./.github/actions/upload-cache-asset + - uses: ./.github/actions/upload-release-asset with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }}.tar.xz path: ${{ github.workspace }}/artifacts/home.tar.xz @@ -2516,7 +2594,7 @@ jobs: if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - - uses: ./.github/actions/upload-cache-asset + - uses: ./.github/actions/upload-release-asset with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }}.tar.xz path: ${{ github.workspace }}/artifacts/home.tar.xz