Merge branch 'master' into r0.8

2020-07-03 15:25:56 +02:00 · 2020-07-03 15:25:56 +02:00 · 7b22af1c6d
commit 7b22af1c6d
parent 86c88c2310 c2dfc7118a
131 changed files with 1948 additions and 693 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -2,3 +2,6 @@
 	path = doc/examples
 	url = https://github.com/mozilla/DeepSpeech-examples.git
 	branch = master
 [submodule "tensorflow"]
 	path = tensorflow
 	url = https://github.com/mozilla/tensorflow.git
--- a/Dockerfile.build.tmpl
+++ b/Dockerfile.build.tmpl
@ -53,11 +53,6 @@ RUN dpkg -i bazel_*.deb
 # >> START Configure Tensorflow Build
 # Clone TensorFlow from Mozilla repo
 RUN git clone https://github.com/mozilla/tensorflow/
 WORKDIR /tensorflow
 RUN git checkout r2.2
 # GPU Environment Setup
 ENV TF_NEED_ROCM 0
 ENV TF_NEED_OPENCL_SYCL 0
@ -116,16 +111,15 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
 WORKDIR /
-RUN git clone $DEEPSPEECH_REPO
+RUN git clone --recursive $DEEPSPEECH_REPO
 WORKDIR /DeepSpeech
 RUN git checkout $DEEPSPEECH_SHA
-
+RUN git submodule sync tensorflow/
-# Link DeepSpeech native_client libs to tf folder
+RUN git submodule update --init tensorflow/
 RUN ln -s /DeepSpeech/native_client /tensorflow
 # >> START Build and bind
-WORKDIR /tensorflow
+WORKDIR /DeepSpeech/tensorflow
 # Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
 RUN ./configure
@ -158,10 +152,10 @@ RUN bazel build \
 	--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
 # Copy built libs to /DeepSpeech/native_client
-RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
+RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
 # Build client.cc and install Python client and decoder bindings
-ENV TFDIR /tensorflow
+ENV TFDIR /DeepSpeech/tensorflow
 RUN nproc
--- a/Dockerfile.train.tmpl
+++ b/Dockerfile.train.tmpl
@ -10,9 +10,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
        apt-utils \
        bash-completion \
        build-essential \
        cmake \
        curl \
        git \
        git-lfs \
        libboost-all-dev \
        libbz2-dev \
        locales \
        python3-venv \
@ -50,4 +52,16 @@ RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
 RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
        --artifact convert_graphdef_memmapped_format  --target .
 # Build KenLM to generate new scorers
 WORKDIR /DeepSpeech/native_client
 RUN rm -rf kenlm && \
 	git clone https://github.com/kpu/kenlm && \
 	cd kenlm && \
 	git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
 	mkdir -p build && \
 	cd build && \
 	cmake .. && \
 	make -j $(nproc)
 WORKDIR /DeepSpeech
 RUN ./bin/run-ldc93s1.sh
--- a/SUPPORT.rst
+++ b/SUPPORT.rst
@ -1,3 +1,5 @@
 .. _support:
 Contact/Getting Help
 ====================
--- a/bin/run-tc-graph_augmentations.sh
+++ b/bin/run-tc-graph_augmentations.sh
@ -20,6 +20,7 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
  --augment dropout \
  --augment pitch \
  --augment tempo \
  --augment warp \
  --augment time_mask \
  --augment frequency_mask \
  --augment add \
--- a/data/README.rst
+++ b/data/README.rst
@ -3,9 +3,9 @@ Language-Specific Data
 This directory contains language-specific data files. Most importantly, you will find here:
-1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt`
+1. A list of unique characters for the target language (e.g. English) in ``data/alphabet.txt``. After installing the training code, you can check ``python -m deepspeech_training.util.check_characters --help`` for a tool that creates an alphabet file from a list of training CSV files.
-2. A scorer package (`data/lm/kenlm.scorer`) generated with `data/lm/generate_package.py`. The scorer package includes a binary n-gram language model generated with `data/lm/generate_lm.py`.
+2. A scorer package (``data/lm/kenlm.scorer``) generated with ``generate_scorer_package`` (``native_client/generate_scorer_package.cpp``). The scorer package includes a binary n-gram language model generated with ``data/lm/generate_lm.py``.
 For more information on how to build these resources from scratch, see the ``External scorer scripts`` section on `deepspeech.readthedocs.io <https://deepspeech.readthedocs.io/>`_.
--- a/data/lm/generate_package.py
+++ b/data/lm/generate_package.py
@ -1,157 +0,0 @@
 #!/usr/bin/env python
 from __future__ import absolute_import, division, print_function
 import argparse
 import shutil
 import sys
 import ds_ctcdecoder
 from deepspeech_training.util.text import Alphabet, UTF8Alphabet
 from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet
 def create_bundle(
    alphabet_path,
    lm_path,
    vocab_path,
    package_path,
    force_utf8,
    default_alpha,
    default_beta,
 ):
    words = set()
    vocab_looks_char_based = True
    with open(vocab_path) as fin:
        for line in fin:
            for word in line.split():
                words.add(word.encode("utf-8"))
                if len(word) > 1:
                    vocab_looks_char_based = False
    print("{} unique words read from vocabulary file.".format(len(words)))
    cbm = "Looks" if vocab_looks_char_based else "Doesn't look"
    print("{} like a character based model.".format(cbm))
    if force_utf8 != None:  # pylint: disable=singleton-comparison
        use_utf8 = force_utf8.value
    else:
        use_utf8 = vocab_looks_char_based
        print("Using detected UTF-8 mode: {}".format(use_utf8))
    if use_utf8:
        serialized_alphabet = UTF8Alphabet().serialize()
    else:
        if not alphabet_path:
            raise RuntimeError("No --alphabet path specified, can't continue.")
        serialized_alphabet = Alphabet(alphabet_path).serialize()
    alphabet = NativeAlphabet()
    err = alphabet.deserialize(serialized_alphabet, len(serialized_alphabet))
    if err != 0:
        raise RuntimeError("Error loading alphabet: {}".format(err))
    scorer = Scorer()
    scorer.set_alphabet(alphabet)
    scorer.set_utf8_mode(use_utf8)
    scorer.reset_params(default_alpha, default_beta)
    err = scorer.load_lm(lm_path)
    if err != ds_ctcdecoder.DS_ERR_SCORER_NO_TRIE:
        print('Error loading language model file: 0x{:X}.'.format(err))
        print('See the error codes section in https://deepspeech.readthedocs.io for a description.')
        sys.exit(1)
    scorer.fill_dictionary(list(words))
    shutil.copy(lm_path, package_path)
    # append, not overwrite
    if scorer.save_dictionary(package_path, True):
        print("Package created in {}".format(package_path))
    else:
        print("Error when creating {}".format(package_path))
        sys.exit(1)
 class Tristate(object):
    def __init__(self, value=None):
        if any(value is v for v in (True, False, None)):
            self.value = value
        else:
            raise ValueError("Tristate value must be True, False, or None")
    def __eq__(self, other):
        return (
            self.value is other.value
            if isinstance(other, Tristate)
            else self.value is other
        )
    def __ne__(self, other):
        return not self == other
    def __bool__(self):
        raise TypeError("Tristate object may not be used as a Boolean")
    def __str__(self):
        return str(self.value)
    def __repr__(self):
        return "Tristate(%s)" % self.value
 def main():
    parser = argparse.ArgumentParser(
        description="Generate an external scorer package for DeepSpeech."
    )
    parser.add_argument(
        "--alphabet",
        help="Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using UTF-8 mode.",
    )
    parser.add_argument(
        "--lm",
        required=True,
        help="Path of KenLM binary LM file. Must be built without including the vocabulary (use the -v flag). See generate_lm.py for how to create a binary LM.",
    )
    parser.add_argument(
        "--vocab",
        required=True,
        help="Path of vocabulary file. Must contain words separated by whitespace.",
    )
    parser.add_argument("--package", required=True, help="Path to save scorer package.")
    parser.add_argument(
        "--default_alpha",
        type=float,
        required=True,
        help="Default value of alpha hyperparameter.",
    )
    parser.add_argument(
        "--default_beta",
        type=float,
        required=True,
        help="Default value of beta hyperparameter.",
    )
    parser.add_argument(
        "--force_utf8",
        type=str,
        default="",
        help="Boolean flag, force set or unset UTF-8 mode in the scorer package. If not set, infers from the vocabulary. See <https://github.com/mozilla/DeepSpeech/blob/master/doc/Decoder.rst#utf-8-mode> for further explanation",
    )
    args = parser.parse_args()
    if args.force_utf8 in ("True", "1", "true", "yes", "y"):
        force_utf8 = Tristate(True)
    elif args.force_utf8 in ("False", "0", "false", "no", "n"):
        force_utf8 = Tristate(False)
    else:
        force_utf8 = Tristate(None)
    create_bundle(
        args.alphabet,
        args.lm,
        args.vocab,
        args.package,
        force_utf8,
        args.default_alpha,
        args.default_beta,
    )
 if __name__ == "__main__":
    main()
--- a/native_client/README.rst
+++ b/native_client/README.rst
@ -1,13 +1,15 @@
 .. _build-native-client:
 Building DeepSpeech Binaries
 ============================
 This section describes how to rebuild binaries. We have already several prebuilt binaries for all the supported platform,
 it is highly advised to use them except if you know what you are doing.
 If you'd like to build the DeepSpeech binaries yourself, you'll need the following pre-requisites downloaded and installed:
 * `Mozilla's TensorFlow r2.2 branch <https://github.com/mozilla/tensorflow/tree/r2.2>`_
 * `Bazel 2.0.0 <https://github.com/bazelbuild/bazel/releases/tag/2.0.0>`_
-* `General TensorFlow requirements <https://www.tensorflow.org/install/install_sources>`_
+* `General TensorFlow r2.2 requirements <https://www.tensorflow.org/install/source#tested_build_configurations>`_
 * `libsox <https://sourceforge.net/projects/sox/>`_
 It is required to use our fork of TensorFlow since it includes fixes for common problems encountered when building the native client files.
@ -28,15 +30,16 @@ If you follow these instructions, you should compile your own binaries of DeepSp
 For more information on configuring TensorFlow, read the docs up to the end of `"Configure the Build" <https://www.tensorflow.org/install/source#configure_the_build>`_.
-TensorFlow: Clone & Checkout
+Checkout source code
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^
-Clone our fork of TensorFlow and checkout the correct version:
+Clone DeepSpeech source code (TensorFlow will come as a submdule):
 .. code-block::
-   git clone https://github.com/mozilla/tensorflow.git
+   git clone https://github.com/mozilla/DeepSpeech.git
-   git checkout origin/r2.2
+   git submodule sync tensorflow/
   git submodule update --init tensorflow/
 Bazel: Download & Install
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@ -57,16 +60,16 @@ Compile DeepSpeech
 ------------------
 Compile ``libdeepspeech.so``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Within your TensorFlow checkout, create a symbolic link to the DeepSpeech ``native_client`` directory. Assuming DeepSpeech and TensorFlow checkouts are in the same directory, do:
+Within your TensorFlow directory, there should be a symbolic link to the DeepSpeech ``native_client`` directory. If it is not present, create it with the follow command:
 .. code-block::
   cd tensorflow
-   ln -s ../DeepSpeech/native_client ./
+   ln -s ../native_client
-You can now use Bazel to build the main DeepSpeech library, ``libdeepspeech.so``\ . Add ``--config=cuda`` if you want a CUDA build.
+You can now use Bazel to build the main DeepSpeech library, ``libdeepspeech.so``. Add ``--config=cuda`` if you want a CUDA build.
 .. code-block::
@ -77,11 +80,10 @@ The generated binaries will be saved to ``bazel-bin/native_client/``.
 Compile Language Bindings
 ^^^^^^^^^^^^^^^^^^^^^^^^^
-Now, ``cd`` into the ``DeepSpeech/native_client`` directory and use the ``Makefile`` to build all the language bindings (C++ client, Python package, Nodejs package, etc.). Set the environment variable ``TFDIR`` to point to your TensorFlow checkout.
+Now, ``cd`` into the ``DeepSpeech/native_client`` directory and use the ``Makefile`` to build all the language bindings (C++ client, Python package, Nodejs package, etc.).
 .. code-block::
   TFDIR=~/tensorflow
   cd ../DeepSpeech/native_client
   make deepspeech
@ -167,13 +169,31 @@ The path of the system tree can be overridden from the default values defined in
   cd ../DeepSpeech/native_client
   make TARGET=<system> deepspeech
-Android devices
+Android devices support
-^^^^^^^^^^^^^^^
+-----------------------
-We have preliminary support for Android relying on TensorFlow Lite, with Java and JNI bindinds. For more details on how to experiment with those, please refer to ``native_client/java/README.rst``.
+We have support for Android relying on TensorFlow Lite, with Java and JNI bindinds. For more details on how to experiment with those, please refer to the section below.
 Please refer to TensorFlow documentation on how to setup the environment to build for Android (SDK and NDK required).
 Using the library from Android project
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 We provide uptodate and tested ``libdeepspeech`` usable as an ``AAR`` package,
 for Android versions starting with 7.0 to 11.0. The package is published on
 `JCenter <https://bintray.com/alissy/org.mozilla.deepspeech/libdeepspeech>`_,
 and the ``JCenter`` repository should be available by default in any Android
 project.  Please make sure your project is setup to pull from this repository.
 You can then include the library by just adding this line to your
 ``gradle.build``, adjusting ``VERSION`` to  the version you need:
 .. code-block::
   implementation 'deepspeech.mozilla.org:libdeepspeech:VERSION@aar'
 Building ``libdeepspeech.so``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 You can build the ``libdeepspeech.so`` using (ARMv7):
 .. code-block::
@ -186,16 +206,109 @@ Or (ARM64):
   bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++11 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so
 Building ``libdeepspeech.aar``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 In the unlikely event you have to rebuild the JNI bindings, source code is
 available under the ``libdeepspeech`` subdirectory.  Building depends on shared
 object: please ensure to place ``libdeepspeech.so`` into the
 ``libdeepspeech/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` matching subdirectories.
 Building the bindings is managed by ``gradle`` and should be limited to issuing
 ``./gradlew libdeepspeech:build``, producing an ``AAR`` package in
 ``./libdeepspeech/build/outputs/aar/``.
 Please note that you might have to copy the file to a local Maven repository
 and adapt file naming (when missing, the error message should states what
 filename it expects and where).
 Building C++ ``deepspeech`` binary
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Building the ``deepspeech`` binary will happen through ``ndk-build`` (ARMv7):
 .. code-block::
   cd ../DeepSpeech/native_client
-   $ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../../tensorflow/ TARGET_ARCH_ABI=armeabi-v7a
+   $ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../tensorflow/ TARGET_ARCH_ABI=armeabi-v7a
 And (ARM64):
 .. code-block::
   cd ../DeepSpeech/native_client
-   $ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../../tensorflowx/ TARGET_ARCH_ABI=arm64-v8a
+   $ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../tensorflow/ TARGET_ARCH_ABI=arm64-v8a
 Android demo APK
 ^^^^^^^^^^^^^^^^
 Provided is a very simple Android demo app that allows you to test the library.
 You can build it with ``make apk`` and install the resulting APK file. Please
 refer to Gradle documentation for more details.
 The ``APK`` should be produced in ``/app/build/outputs/apk/``. This demo app might
 require external storage permissions. You can then push models files to your
 device, set the path to the file in the UI and try to run on an audio file.
 When running, it should first play the audio file and then run the decoding. At
 the end of the decoding, you should be presented with the decoded text as well
 as time elapsed to decode in miliseconds.
 This application is very limited on purpose, and is only here as a very basic
 demo of one usage of the application. For example, it's only able to read PCM
 mono 16kHz 16-bits file and it might fail on some WAVE file that are not
 following exactly the specification.
 Running ``deepspeech`` via adb
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 You should use ``adb push`` to send data to device, please refer to Android
 documentation on how to use that.
 Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
 * ``output_graph.tflite`` which is the TF Lite model
 * ``kenlm.scorer``, if you want to use the scorer; please be aware that too big
  scorer will make the device run out of memory
 Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ :
 * ``deepspeech``
 * ``libdeepspeech.so``
 * ``libc++_shared.so``
 You should then be able to run as usual, using a shell from ``adb shell``\ :
 .. code-block::
   user@device$ cd /data/local/tmp/ds/
   user@device$ LD_LIBRARY_PATH=$(pwd)/ ./deepspeech [...]
 Please note that Android linker does not support ``rpath`` so you have to set
 ``LD_LIBRARY_PATH``. Properly wrapped / packaged bindings does embed the library
 at a place the linker knows where to search, so Android apps will be fine.
 Delegation API
 ^^^^^^^^^^^^^^
 TensorFlow Lite supports Delegate API to offload some computation from the main
 CPU. Please refer to `TensorFlow's documentation
 <https://www.tensorflow.org/lite/performance/delegates>`_ for details.
 To ease with experimentations, we have enabled some of those delegations on our
 Android builds: * GPU, to leverage OpenGL capabilities * NNAPI, the Android API
 to leverage GPU / DSP / NPU * Hexagon, the Qualcomm-specific DSP
 This is highly experimental:
 * Requires passing environment variable ``DS_TFLITE_DELEGATE`` with values of
  ``gpu``, ``nnapi`` or ``hexagon`` (only one at a time)
 * Might require exported model changes (some Op might not be supported)
 * We can't guarantee it will work, nor it will be faster than default
  implementation
 Feedback on improving this is welcome: how it could be exposed in the API, how
 much performance gains do you get in your applications, how you had to change
 the model to make it work with a delegate, etc.
 See :ref:`the support / contact details <support>`
--- a/doc/C-API.rst
+++ b/doc/C-API.rst
@ -1,3 +1,5 @@
 .. _c-usage:
 C API
 =====
--- a/doc/Decoder.rst
+++ b/doc/Decoder.rst
@ -56,9 +56,11 @@ At decoding time, the scorer is queried every time a Unicode codepoint is predic
 **Acoustic models trained with ``--utf8`` MUST NOT be used with an alphabet based scorer. Conversely, acoustic models trained with an alphabet file MUST NOT be used with a UTF-8 scorer.**
-UTF-8 scorers can be built by using an input corpus with space separated codepoints. If your corpus only contains single codepoints separated by spaces, ``data/lm/generate_package.py`` should automatically enable UTF-8 mode, and it should print the message "Looks like a character based model."
+UTF-8 scorers can be built by using an input corpus with space separated codepoints. If your corpus only contains single codepoints separated by spaces, ``generate_scorer_package`` should automatically enable UTF-8 mode, and it should print the message "Looks like a character based model."
-If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. UTF-8 mode can be forced by specifying the ``--force_utf8`` flag when running ``data/lm/generate_package.py``, but it is NOT RECOMMENDED.
+If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. UTF-8 mode can be forced by specifying the ``--force_utf8`` flag when running ``generate_scorer_package``, but it is NOT RECOMMENDED.
 See :ref:`scorer-scripts` for more details on using ``generate_scorer_package``.
 Because KenLM uses spaces as a word separator, the resulting language model will not include space characters in it. If you wish to use UTF-8 mode but still model spaces, you need to replace spaces in the input corpus with a different character **before** converting it to space separated codepoints. For example:
--- a/doc/Scorer.rst
+++ b/doc/Scorer.rst
@ -5,7 +5,9 @@ External scorer scripts
 DeepSpeech pre-trained models include an external scorer. This document explains how to reproduce our external scorer, as well as adapt the scripts to create your own.
-The scorer is composed of two sub-components, a KenLM language model and a trie data structure containing all words in the vocabulary. In order to create the scorer package, first we must create a KenLM language model (using ``data/lm/generate_lm.py``, and then use ``data/lm/generate_package.py`` to create the final package file including the trie data structure.
+The scorer is composed of two sub-components, a KenLM language model and a trie data structure containing all words in the vocabulary. In order to create the scorer package, first we must create a KenLM language model (using ``data/lm/generate_lm.py``, and then use ``generate_scorer_package`` to create the final package file including the trie data structure.
 The ``generate_scorer_package`` binary is part of the native client package that is included with official releases. You can find the appropriate archive for your platform in the `GitHub release downloads <https://github.com/mozilla/DeepSpeech/releases/latest>`_. The native client package is named ``native_client.{arch}.{config}.{plat}.tar.xz``, where ``{arch}`` is the architecture the binary was built for, for example ``amd64`` or ``arm64``, ``config`` is the build configuration, which for building decoder packages does not matter, and ``{plat}`` is the platform the binary was built-for, for example ``linux`` or ``osx``. If you wanted to run the ``generate_scorer_package`` binary on a Linux desktop, you would download ``native_client.amd64.cpu.linux.tar.xz``.
 Reproducing our external scorer
 -------------------------------
@ -36,12 +38,15 @@ Else you have to build `KenLM <https://github.com/kpu/kenlm>`_ first and then pa
      --binary_a_bits 255 --binary_q_bits 8 --binary_type trie
-Afterwards you can use ``generate_package.py`` to generate the scorer package using the ``lm.binary`` and ``vocab-500000.txt`` files:
+Afterwards you can use ``generate_scorer_package`` to generate the scorer package using the ``lm.binary`` and ``vocab-500000.txt`` files:
 .. code-block:: bash
    cd data/lm
-    python3 generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt \
+    # Download and extract appropriate native_client package:
    curl -LO http://github.com/mozilla/DeepSpeech/releases/...
    tar xvf native_client.*.tar.xz
    ./generate_scorer_package --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt \
      --package kenlm.scorer --default_alpha 0.931289039105002 --default_beta 1.1834137581510284
 Building your own scorer
@ -51,7 +56,6 @@ Building your own scorer can be useful if you're using models in a narrow usage
 The LibriSpeech LM training text used by our scorer is around 4GB uncompressed, which should give an idea of the size of a corpus needed for a reasonable language model for general speech recognition. For more constrained use cases with smaller vocabularies, you don't need as much data, but you should still try to gather as much as you can.
-With a text corpus in hand, you can then re-use the ``generate_lm.py`` and ``generate_package.py`` scripts to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit <https://kheafield.com/code/kenlm/>`_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior.
+With a text corpus in hand, you can then re-use ``generate_lm.py`` and ``generate_scorer_package`` to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit <https://kheafield.com/code/kenlm/>`_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior.
-After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_package.py`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script <lm_optimizer.py>` which can be used to find good default values for alpha and beta. To use it, you must first 
+After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_scorer_package`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script <lm_optimizer.py>` which can be used to find good default values for alpha and beta. To use it, you must first generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py`` later. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_scorer_package`` again, this time with the new values.
 generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values.
--- a/doc/TRAINING.rst
+++ b/doc/TRAINING.rst
@ -399,6 +399,20 @@ Spectrogram domain augmentations
  * **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo)
 **Warp augmentation** ``--augment warp[p=<float>,nt=<int-range>,nf=<int-range>,wt=<float-range>,wf=<float-range>]``
  Applies a non-linear image warp to the spectrogram. This is achieved by randomly shifting a grid of equally distributed warp points along time and frequency axis.
  * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
  * **nt**: number of equally distributed warp grid lines along time axis of the spectrogram (excluding the edges)
  * **nf**: number of equally distributed warp grid lines along frequency axis of the spectrogram (excluding the edges)
  * **wt**: standard deviation of the random shift applied to warp points along time axis (0.0 = no warp, 1.0 = half the distance to the neighbour point)
  * **wf**: standard deviation of the random shift applied to warp points along frequency axis (0.0 = no warp, 1.0 = half the distance to the neighbour point)
 **Frequency mask augmentation** ``--augment frequency_mask[p=<float>,n=<int-range>,size=<int-range>]``
  Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779
@ -469,6 +483,7 @@ Example training with all augmentations:
          --augment volume[p=0.1,dbfs=-10:-40] \
          --augment pitch[p=0.1,pitch=1~0.2] \
          --augment tempo[p=0.1,factor=1~0.5] \
          --augment warp[p=0.1,nt=4,nf=1,wt=0.5:1.0,wf=0.1:0.2] \
          --augment frequency_mask[p=0.1,n=1:3,size=1:5] \
          --augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \
          --augment dropout[p=0.1,rate=0.05] \
--- a/doc/USING.rst
+++ b/doc/USING.rst
@ -5,7 +5,7 @@ Using a Pre-trained Model
 Inference using a DeepSpeech pre-trained model can be done with a client/language binding package. We have four clients/language bindings in this repository, listed below, and also a few community-maintained clients/language bindings in other repositories, listed `further down in this README <#third-party-bindings>`_.
-* `The C API <c-usage>`.
+* :ref:`The C API <c-usage>`.
 * :ref:`The Python package/language binding <py-usage>`
 * :ref:`The Node.JS package/language binding <nodejs-usage>`
 * :ref:`The command-line client <cli-usage>`
@ -40,6 +40,20 @@ If you want to use the pre-trained English model for performing speech-to-text,
   wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm
   wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer
 There are several pre-trained model files available in official releases. Files ending in ``.pbmm`` are compatible with clients and language bindings built against the standard TensorFlow runtime. Usually these packages are simply called ``deepspeech``. These files are also compatible with CUDA enabled clients and language bindings. These packages are usually called ``deepspeech-gpu``. Files ending in ``.tflite`` are compatible with clients and language bindings built against the `TensorFlow Lite runtime <https://www.tensorflow.org/lite/>`_. These models are optimized for size and performance in low power devices. On desktop platforms, the compatible packages are called ``deepspeech-tflite``. On Android and Raspberry Pi, we only publish TensorFlow Lite enabled packages, and they are simply called ``deepspeech``. You can see a full list of supported platforms and which TensorFlow runtime is supported at :ref:`supported-platforms-inference`.
 +--------------------+---------------------+---------------------+
 | Package/Model type | .pbmm               | .tflite             |
 +====================+=====================+=====================+
 | deepspeech         | Depends on platform | Depends on platform |
 +--------------------+---------------------+---------------------+
 | deepspeech-gpu     | ✅                  | ❌                  |
 +--------------------+---------------------+---------------------+
 | deepspeech-tflite  | ❌                  | ✅                  |
 +--------------------+---------------------+---------------------+
 Finally, the pre-trained model files also include files ending in ``.scorer``. These are external scorers (language models) that are used at inference time in conjunction with an acoustic model (``.pbmm`` or ``.tflite`` file) to produce transcriptions. We also provide further documentation on :ref:`the decoding process <decoder-docs>` and :ref:`how language models are generated <scorer-scripts>`.
 Model compatibility
 ^^^^^^^^^^^^^^^^^^^
--- a/doc/index.rst
+++ b/doc/index.rst
@ -60,6 +60,10 @@ See the output of ``deepspeech -h`` for more information on the use of ``deepspe
   SUPPORTED_PLATFORMS
   BUILDING
 .. include:: ../SUPPORT.rst
 .. toctree::
   :maxdepth: 2
   :caption: Decoder and scorer
--- a/hooks/pre_build
+++ b/hooks/pre_build
@ -0,0 +1,34 @@
 #!/bin/bash
 #
 # This script is intended for Docker Hub Automated Builds since we can't
 # build on TaskCluster because of too old Docker version (see issue #3057)
 #
 # Docker Hub expects an existing ready-to-use Dockerfile, and this hook allows
 # to generate one basing on the existing |make| template, but we need to change
 # in-place, hence why the |cp| statement
 #
 # Docker Hub Automated Builds needs to be configured as:
 #
 # Source Type | Source                | Docker Tag | Dockerfile Location   | Build Context
 # ----------------------------------------------------------------------------------------
 # Tag         | /^v([0-9.\-a-zA-Z]+)/ | v{\1}      | Dockerfile.train.tmpl | /
 # Tag         | /^v([0-9.\-a-zA-Z]+)/ | v{\1}      | Dockerfile.build.tmpl | /
 #
 # Docker Hub Automated builds will populate some env variables:
 #  - DOCKERFILE_PATH: the filename defined as "Dockerfile Location"
 #  - SOURCE_REPOSITORY_URL: the url of the repo triggering the tag event
 #  - SOURCE_COMMIT: the sha1 of the commit triggering the tag event
 #
 # More details: https://docs.docker.com/docker-hub/builds/advanced/#environment-variables-for-building-and-testing
 set -e # Exit immediately if a command exits with a non-zero status.
 set -u # Treat unset variables as an error.
 DOCKERFILE_TARGET=$(echo ${DOCKERFILE_PATH} | sed -e 's/\.tmpl//g')
 make DEEPSPEECH_REPO=${SOURCE_REPOSITORY_URL} DEEPSPEECH_SHA=${SOURCE_COMMIT} ${DOCKERFILE_TARGET}
 cp ${DOCKERFILE_TARGET} ${DOCKERFILE_PATH}
 #### If you need to inspect the generated Dockerfile
 ## echo "----8<----8<----8<----8<----8<----8<----8<----8<----8<----8<----8<----8<----"
 ## cat ${DOCKERFILE_PATH}
 ## echo "----8<----8<----8<----8<----8<----8<----8<----8<----8<----8<----8<----8<----"
--- a/native_client/BUILD
+++ b/native_client/BUILD
@ -2,6 +2,7 @@
 load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
 load(
    "@org_tensorflow//tensorflow/lite:build_def.bzl",
@ -74,16 +75,24 @@ cc_library(
        "ctcdecode/scorer.cpp",
        "ctcdecode/path_trie.cpp",
        "ctcdecode/path_trie.h",
        "alphabet.cc",
    ] + OPENFST_SOURCES_PLATFORM,
    hdrs = [
        "ctcdecode/ctc_beam_search_decoder.h",
        "ctcdecode/scorer.h",
        "ctcdecode/decoder_utils.h",
        "alphabet.h",
    ],
    includes = [
        ".",
        "ctcdecode/third_party/ThreadPool",
    ] + OPENFST_INCLUDES_PLATFORM,
-    deps = [":kenlm"]
+    deps = [":kenlm"],
    linkopts = [
        "-lm",
        "-ldl",
        "-pthread",
    ],
 )
 tf_cc_shared_object(
@ -91,11 +100,11 @@ tf_cc_shared_object(
    srcs = [
        "deepspeech.cc",
        "deepspeech.h",
-        "alphabet.h",
+        "deepspeech_errors.cc",
        "modelstate.h",
        "modelstate.cc",
-        "workspace_status.h",
+        "modelstate.h",
        "workspace_status.cc",
        "workspace_status.h",
    ] + select({
        "//native_client:tflite": [
            "tflitemodelstate.h",
@ -130,6 +139,7 @@ tf_cc_shared_object(
    deps = select({
        "//native_client:tflite": [
            "//tensorflow/lite/kernels:builtin_ops",
            "//tensorflow/lite/tools/evaluation:utils",
        ],
        "//conditions:default": [
            "//tensorflow/core:core_cpu",
@ -185,6 +195,27 @@ genrule(
    cmd = "dsymutil $(location :libdeepspeech.so) -o $@"
 )
 cc_binary(
    name = "generate_scorer_package",
    srcs = [
        "generate_scorer_package.cpp",
        "deepspeech_errors.cc",
    ],
    copts = ["-std=c++11"],
    deps = [
        ":decoder",
        "@com_google_absl//absl/flags:flag",
        "@com_google_absl//absl/flags:parse",
        "@com_google_absl//absl/types:optional",
        "@boost//:program_options",
    ],
    linkopts = [
        "-lm",
        "-ldl",
        "-pthread",
    ],
 )
 cc_binary(
    name = "enumerate_kenlm_vocabulary",
    srcs = [
@ -201,10 +232,5 @@ cc_binary(
        "trie_load.cc",
    ],
    copts = ["-std=c++11"],
    linkopts = [
        "-lm",
        "-ldl",
        "-pthread",
    ],
    deps = [":decoder"],
 )
--- a/native_client/alphabet.cc
+++ b/native_client/alphabet.cc
@ -0,0 +1,189 @@
 #include "alphabet.h"
 #include "ctcdecode/decoder_utils.h"
 #include <fstream>
 // std::getline, but handle newline conventions from multiple platforms instead
 // of just the platform this code was built for
 std::istream&
 getline_crossplatform(std::istream& is, std::string& t)
 {
  t.clear();
  // The characters in the stream are read one-by-one using a std::streambuf.
  // That is faster than reading them one-by-one using the std::istream.
  // Code that uses streambuf this way must be guarded by a sentry object.
  // The sentry object performs various tasks,
  // such as thread synchronization and updating the stream state.
  std::istream::sentry se(is, true);
  std::streambuf* sb = is.rdbuf();
  while (true) {
    int c = sb->sbumpc();
    switch (c) {
    case '\n':
      return is;
    case '\r':
      if(sb->sgetc() == '\n')
          sb->sbumpc();
      return is;
    case std::streambuf::traits_type::eof():
      // Also handle the case when the last line has no line ending
      if(t.empty())
        is.setstate(std::ios::eofbit);
      return is;
    default:
      t += (char)c;
    }
  }
 }
 int
 Alphabet::init(const char *config_file)
 {
  std::ifstream in(config_file, std::ios::in);
  if (!in) {
    return 1;
  }
  unsigned int label = 0;
  space_label_ = -2;
  for (std::string line; getline_crossplatform(in, line);) {
    if (line.size() == 2 && line[0] == '\\' && line[1] == '#') {
      line = '#';
    } else if (line[0] == '#') {
      continue;
    }
    //TODO: we should probably do something more i18n-aware here
    if (line == " ") {
      space_label_ = label;
    }
    label_to_str_[label] = line;
    str_to_label_[line] = label;
    ++label;
  }
  size_ = label;
  in.close();
  return 0;
 }
 std::string
 Alphabet::Serialize()
 {
  // Serialization format is a sequence of (key, value) pairs, where key is
  // a uint16_t and value is a uint16_t length followed by `length` UTF-8
  // encoded bytes with the label.
  std::stringstream out;
  // We start by writing the number of pairs in the buffer as uint16_t.
  uint16_t size = size_;
  out.write(reinterpret_cast<char*>(&size), sizeof(size));
  for (auto it = label_to_str_.begin(); it != label_to_str_.end(); ++it) {
    uint16_t key = it->first;
    string str = it->second;
    uint16_t len = str.length();
    // Then we write the key as uint16_t, followed by the length of the value
    // as uint16_t, followed by `length` bytes (the value itself).
    out.write(reinterpret_cast<char*>(&key), sizeof(key));
    out.write(reinterpret_cast<char*>(&len), sizeof(len));
    out.write(str.data(), len);
  }
  return out.str();
 }
 int
 Alphabet::Deserialize(const char* buffer, const int buffer_size)
 {
  // See util/text.py for an explanation of the serialization format.
  int offset = 0;
  if (buffer_size - offset < sizeof(uint16_t)) {
    return 1;
  }
  uint16_t size = *(uint16_t*)(buffer + offset);
  offset += sizeof(uint16_t);
  size_ = size;
  for (int i = 0; i < size; ++i) {
    if (buffer_size - offset < sizeof(uint16_t)) {
      return 1;
    }
    uint16_t label = *(uint16_t*)(buffer + offset);
    offset += sizeof(uint16_t);
    if (buffer_size - offset < sizeof(uint16_t)) {
      return 1;
    }
    uint16_t val_len = *(uint16_t*)(buffer + offset);
    offset += sizeof(uint16_t);
    if (buffer_size - offset < val_len) {
      return 1;
    }
    std::string val(buffer+offset, val_len);
    offset += val_len;
    label_to_str_[label] = val;
    str_to_label_[val] = label;
    if (val == " ") {
      space_label_ = label;
    }
  }
  return 0;
 }
 std::string
 Alphabet::DecodeSingle(unsigned int label) const
 {
  auto it = label_to_str_.find(label);
  if (it != label_to_str_.end()) {
    return it->second;
  } else {
    std::cerr << "Invalid label " << label << std::endl;
    abort();
  }
 }
 unsigned int
 Alphabet::EncodeSingle(const std::string& string) const
 {
  auto it = str_to_label_.find(string);
  if (it != str_to_label_.end()) {
    return it->second;
  } else {
    std::cerr << "Invalid string " << string << std::endl;
    abort();
  }
 }
 std::string
 Alphabet::Decode(const std::vector<unsigned int>& input) const
 {
  std::string word;
  for (auto ind : input) {
    word += DecodeSingle(ind);
  }
  return word;
 }
 std::string
 Alphabet::Decode(const unsigned int* input, int length) const
 {
  std::string word;
  for (int i = 0; i < length; ++i) {
    word += DecodeSingle(input[i]);
  }
  return word;
 }
 std::vector<unsigned int>
 Alphabet::Encode(const std::string& input) const
 {
  std::vector<unsigned int> result;
  for (auto cp : split_into_codepoints(input)) {
    result.push_back(EncodeSingle(cp));
  }
  return result;
 }
--- a/native_client/alphabet.h
+++ b/native_client/alphabet.h
@ -1,9 +1,6 @@
 #ifndef ALPHABET_H
 #define ALPHABET_H
 #include <cassert>
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <unordered_map>
 #include <vector>
@ -18,92 +15,15 @@ public:
  Alphabet() = default;
  Alphabet(const Alphabet&) = default;
  Alphabet& operator=(const Alphabet&) = default;
  virtual ~Alphabet() = default;
-  int init(const char *config_file) {
+  virtual int init(const char *config_file);
    std::ifstream in(config_file, std::ios::in);
    if (!in) {
      return 1;
    }
    unsigned int label = 0;
    space_label_ = -2;
    for (std::string line; std::getline(in, line);) {
      if (line.size() == 2 && line[0] == '\\' && line[1] == '#') {
        line = '#';
      } else if (line[0] == '#') {
        continue;
      }
      //TODO: we should probably do something more i18n-aware here
      if (line == " ") {
        space_label_ = label;
      }
      label_to_str_[label] = line;
      str_to_label_[line] = label;
      ++label;
    }
    size_ = label;
    in.close();
    return 0;
  }
-  int deserialize(const char* buffer, const int buffer_size) {
+  // Serialize alphabet into a binary buffer.
-    // See util/text.py for an explanation of the serialization format.
+  std::string Serialize();
    int offset = 0;
    if (buffer_size - offset < sizeof(uint16_t)) {
      return 1;
    }
    uint16_t size = *(uint16_t*)(buffer + offset);
    offset += sizeof(uint16_t);
    size_ = size;
-    for (int i = 0; i < size; ++i) {
+  // Deserialize alphabet from a binary buffer.
-      if (buffer_size - offset < sizeof(uint16_t)) {
+  int Deserialize(const char* buffer, const int buffer_size);
        return 1;
      }
      uint16_t label = *(uint16_t*)(buffer + offset);
      offset += sizeof(uint16_t);
      if (buffer_size - offset < sizeof(uint16_t)) {
        return 1;
      }
      uint16_t val_len = *(uint16_t*)(buffer + offset);
      offset += sizeof(uint16_t);
      if (buffer_size - offset < val_len) {
        return 1;
      }
      std::string val(buffer+offset, val_len);
      offset += val_len;
      label_to_str_[label] = val;
      str_to_label_[val] = label;
      if (val == " ") {
        space_label_ = label;
      }
    }
    return 0;
  }
  const std::string& StringFromLabel(unsigned int label) const {
    auto it = label_to_str_.find(label);
    if (it != label_to_str_.end()) {
      return it->second;
    } else {
      std::cerr << "Invalid label " << label << std::endl;
      abort();
    }
  }
  unsigned int LabelFromString(const std::string& string) const {
    auto it = str_to_label_.find(string);
    if (it != str_to_label_.end()) {
      return it->second;
    } else {
      std::cerr << "Invalid string " << string << std::endl;
      abort();
    }
  }
  size_t GetSize() const {
    return size_;
@ -117,20 +37,47 @@ public:
    return space_label_;
  }
-  template <typename T>
+  // Decode a single label into a string.
-  std::string LabelsToString(const std::vector<T>& input) const {
+  std::string DecodeSingle(unsigned int label) const;
    std::string word;
    for (auto ind : input) {
      word += StringFromLabel(ind);
    }
    return word;
  }
-private:
+  // Encode a single character/output class into a label.
  unsigned int EncodeSingle(const std::string& string) const;
  // Decode a sequence of labels into a string.
  std::string Decode(const std::vector<unsigned int>& input) const;
  // We provide a C-style overload for accepting NumPy arrays as input, since
  // the NumPy library does not have built-in typemaps for std::vector<T>.
  std::string Decode(const unsigned int* input, int length) const;
  // Encode a sequence of character/output classes into a sequence of labels.
  // Characters are assumed to always take a single Unicode codepoint.
  std::vector<unsigned int> Encode(const std::string& input) const;
 protected:
  size_t size_;
  unsigned int space_label_;
  std::unordered_map<unsigned int, std::string> label_to_str_;
  std::unordered_map<std::string, unsigned int> str_to_label_;
 };
 class UTF8Alphabet : public Alphabet
 {
 public:
  UTF8Alphabet() {
    size_ = 255;
    space_label_ = ' ' - 1;
    for (size_t i = 0; i < size_; ++i) {
      std::string val(1, i+1);
      label_to_str_[i] = val;
      str_to_label_[val] = i;
    }
  }
  int init(const char*) override {
    return 0;
  }
 };
 #endif //ALPHABET_H
--- a/native_client/ctcdecode/init.py
+++ b/native_client/ctcdecode/init.py
@ -1,7 +1,7 @@
 from __future__ import absolute_import, division, print_function
 from . import swigwrapper # pylint: disable=import-self
-from .swigwrapper import Alphabet
+from .swigwrapper import UTF8Alphabet
 __version__ = swigwrapper.__version__
@ -30,24 +30,25 @@ class Scorer(swigwrapper.Scorer):
            assert beta is not None, 'beta parameter is required'
            assert scorer_path, 'scorer_path parameter is required'
-            serialized = alphabet.serialize()
+            err = self.init(scorer_path, alphabet)
            native_alphabet = swigwrapper.Alphabet()
            err = native_alphabet.deserialize(serialized, len(serialized))
            if err != 0:
-                raise ValueError('Error when deserializing alphabet.')
+                raise ValueError('Scorer initialization failed with error code 0x{:X}'.format(err))
            err = self.init(scorer_path.encode('utf-8'),
                            native_alphabet)
            if err != 0:
                raise ValueError('Scorer initialization failed with error code {}'.format(err))
            self.reset_params(alpha, beta)
    def load_lm(self, lm_path):
        return super(Scorer, self).load_lm(lm_path.encode('utf-8'))
-    def save_dictionary(self, save_path, *args, **kwargs):
+class Alphabet(swigwrapper.Alphabet):
-        return super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs)
+    """Convenience wrapper for Alphabet which calls init in the constructor"""
    def __init__(self, config_path):
        super(Alphabet, self).__init__()
        err = self.init(config_path)
        if err != 0:
            raise ValueError('Alphabet initialization failed with error code 0x{:X}'.format(err))
    def Encode(self, input):
        """Convert SWIG's UnsignedIntVec to a Python list"""
        res = super(Alphabet, self).Encode(input)
        return [el for el in res]
 def ctc_beam_search_decoder(probs_seq,
@ -79,15 +80,10 @@ def ctc_beam_search_decoder(probs_seq,
             results, in descending order of the confidence.
    :rtype: list
    """
    serialized = alphabet.serialize()
    native_alphabet = swigwrapper.Alphabet()
    err = native_alphabet.deserialize(serialized, len(serialized))
    if err != 0:
        raise ValueError("Error when deserializing alphabet.")
    beam_results = swigwrapper.ctc_beam_search_decoder(
-        probs_seq, native_alphabet, beam_size, cutoff_prob, cutoff_top_n,
+        probs_seq, alphabet, beam_size, cutoff_prob, cutoff_top_n,
        scorer)
-    beam_results = [(res.confidence, alphabet.decode(res.tokens)) for res in beam_results]
+    beam_results = [(res.confidence, alphabet.Decode(res.tokens)) for res in beam_results]
    return beam_results
@ -126,14 +122,9 @@ def ctc_beam_search_decoder_batch(probs_seq,
             results, in descending order of the confidence.
    :rtype: list
    """
-    serialized = alphabet.serialize()
+    batch_beam_results = swigwrapper.ctc_beam_search_decoder_batch(probs_seq, seq_lengths, alphabet, beam_size, num_processes, cutoff_prob, cutoff_top_n, scorer)
    native_alphabet = swigwrapper.Alphabet()
    err = native_alphabet.deserialize(serialized, len(serialized))
    if err != 0:
        raise ValueError("Error when deserializing alphabet.")
    batch_beam_results = swigwrapper.ctc_beam_search_decoder_batch(probs_seq, seq_lengths, native_alphabet, beam_size, num_processes, cutoff_prob, cutoff_top_n, scorer)
    batch_beam_results = [
-        [(res.confidence, alphabet.decode(res.tokens)) for res in beam_results]
+        [(res.confidence, alphabet.Decode(res.tokens)) for res in beam_results]
        for beam_results in batch_beam_results
    ]
    return batch_beam_results
--- a/native_client/ctcdecode/build_archive.py
+++ b/native_client/ctcdecode/build_archive.py
@ -46,7 +46,8 @@ CTC_DECODER_FILES = [
    'scorer.cpp',
    'path_trie.cpp',
    'decoder_utils.cpp',
-    'workspace_status.cc'
+    'workspace_status.cc',
    '../alphabet.cc',
 ]
 def build_archive(srcs=[], out_name='', build_dir='temp_build/temp_build', debug=False, num_parallel=1):
--- a/native_client/ctcdecode/decoder_utils.cpp
+++ b/native_client/ctcdecode/decoder_utils.cpp
@ -119,7 +119,7 @@ bool prefix_compare_external(const PathTrie *x, const PathTrie *y, const std::un
  }
 }
-void add_word_to_fst(const std::vector<int> &word,
+void add_word_to_fst(const std::vector<unsigned int> &word,
                     fst::StdVectorFst *dictionary) {
  if (dictionary->NumStates() == 0) {
    fst::StdVectorFst::StateId start = dictionary->AddState();
@ -144,7 +144,7 @@ bool add_word_to_dictionary(
    fst::StdVectorFst *dictionary) {
  auto characters = utf8 ? split_into_bytes(word) : split_into_codepoints(word);
-  std::vector<int> int_word;
+  std::vector<unsigned int> int_word;
  for (auto &c : characters) {
    auto int_c = char_map.find(c);
--- a/native_client/ctcdecode/decoder_utils.h
+++ b/native_client/ctcdecode/decoder_utils.h
@ -86,7 +86,7 @@ std::vector<std::string> split_into_codepoints(const std::string &str);
 std::vector<std::string> split_into_bytes(const std::string &str);
 // Add a word in index to the dicionary of fst
-void add_word_to_fst(const std::vector<int> &word,
+void add_word_to_fst(const std::vector<unsigned int> &word,
                     fst::StdVectorFst *dictionary);
 // Return whether a byte is a code point boundary (not a continuation byte).
--- a/native_client/ctcdecode/output.h
+++ b/native_client/ctcdecode/output.h
@ -8,8 +8,8 @@
 */
 struct Output {
    double confidence;
-    std::vector<int> tokens;
+    std::vector<unsigned int> tokens;
-    std::vector<int> timesteps;
+    std::vector<unsigned int> timesteps;
 };
 #endif  // OUTPUT_H_
--- a/native_client/ctcdecode/path_trie.cpp
+++ b/native_client/ctcdecode/path_trie.cpp
@ -35,7 +35,7 @@ PathTrie::~PathTrie() {
  }
 }
-PathTrie* PathTrie::get_path_trie(int new_char, int new_timestep, float cur_log_prob_c, bool reset) {
+PathTrie* PathTrie::get_path_trie(unsigned int new_char, unsigned int new_timestep, float cur_log_prob_c, bool reset) {
  auto child = children_.begin();
  for (; child != children_.end(); ++child) {
    if (child->first == new_char) {
@ -102,7 +102,7 @@ PathTrie* PathTrie::get_path_trie(int new_char, int new_timestep, float cur_log_
  }
 }
-void PathTrie::get_path_vec(std::vector<int>& output, std::vector<int>& timesteps) {
+void PathTrie::get_path_vec(std::vector<unsigned int>& output, std::vector<unsigned int>& timesteps) {
  // Recursive call: recurse back until stop condition, then append data in
  // correct order as we walk back down the stack in the lines below.
  if (parent != nullptr) {
@ -114,8 +114,8 @@ void PathTrie::get_path_vec(std::vector<int>& output, std::vector<int>& timestep
  }
 }
-PathTrie* PathTrie::get_prev_grapheme(std::vector<int>& output,
+PathTrie* PathTrie::get_prev_grapheme(std::vector<unsigned int>& output,
-                                      std::vector<int>& timesteps,
+                                      std::vector<unsigned int>& timesteps,
                                      const Alphabet& alphabet)
 {
  PathTrie* stop = this;
@ -124,7 +124,7 @@ PathTrie* PathTrie::get_prev_grapheme(std::vector<int>& output,
  }
  // Recursive call: recurse back until stop condition, then append data in
  // correct order as we walk back down the stack in the lines below.
-  if (!byte_is_codepoint_boundary(alphabet.StringFromLabel(character)[0])) {
+  if (!byte_is_codepoint_boundary(alphabet.DecodeSingle(character)[0])) {
    stop = parent->get_prev_grapheme(output, timesteps, alphabet);
  }
  output.push_back(character);
@ -135,7 +135,7 @@ PathTrie* PathTrie::get_prev_grapheme(std::vector<int>& output,
 int PathTrie::distance_to_codepoint_boundary(unsigned char *first_byte,
                                             const Alphabet& alphabet)
 {
-  if (byte_is_codepoint_boundary(alphabet.StringFromLabel(character)[0])) {
+  if (byte_is_codepoint_boundary(alphabet.DecodeSingle(character)[0])) {
    *first_byte = (unsigned char)character + 1;
    return 1;
  }
@ -146,8 +146,8 @@ int PathTrie::distance_to_codepoint_boundary(unsigned char *first_byte,
  return 0;
 }
-PathTrie* PathTrie::get_prev_word(std::vector<int>& output,
+PathTrie* PathTrie::get_prev_word(std::vector<unsigned int>& output,
-                                  std::vector<int>& timesteps,
+                                  std::vector<unsigned int>& timesteps,
                                  const Alphabet& alphabet)
 {
  PathTrie* stop = this;
@ -225,7 +225,7 @@ void PathTrie::print(const Alphabet& a) {
  for (PathTrie* el : chain) {
    printf("%X ", (unsigned char)(el->character));
    if (el->character != ROOT_) {
-      tr.append(a.StringFromLabel(el->character));
+      tr.append(a.DecodeSingle(el->character));
    }
  }
  printf("\ntimesteps:\t ");
--- a/native_client/ctcdecode/path_trie.h
+++ b/native_client/ctcdecode/path_trie.h
@ -21,22 +21,22 @@ public:
  ~PathTrie();
  // get new prefix after appending new char
-  PathTrie* get_path_trie(int new_char, int new_timestep, float log_prob_c, bool reset = true);
+  PathTrie* get_path_trie(unsigned int new_char, unsigned int new_timestep, float log_prob_c, bool reset = true);
  // get the prefix data in correct time order from root to current node
-  void get_path_vec(std::vector<int>& output, std::vector<int>& timesteps);
+  void get_path_vec(std::vector<unsigned int>& output, std::vector<unsigned int>& timesteps);
  // get the prefix data in correct time order from beginning of last grapheme to current node
-  PathTrie* get_prev_grapheme(std::vector<int>& output,
+  PathTrie* get_prev_grapheme(std::vector<unsigned int>& output,
-                              std::vector<int>& timesteps,
+                              std::vector<unsigned int>& timesteps,
                              const Alphabet& alphabet);
  // get the distance from current node to the first codepoint boundary, and the byte value at the boundary
  int distance_to_codepoint_boundary(unsigned char *first_byte, const Alphabet& alphabet);
  // get the prefix data in correct time order from beginning of last word to current node
-  PathTrie* get_prev_word(std::vector<int>& output,
+  PathTrie* get_prev_word(std::vector<unsigned int>& output,
-                          std::vector<int>& timesteps,
+                          std::vector<unsigned int>& timesteps,
                          const Alphabet& alphabet);
  // update log probs
@ -64,8 +64,8 @@ public:
  float log_prob_c;
  float score;
  float approx_ctc;
-  int character;
+  unsigned int character;
-  int timestep;
+  unsigned int timestep;
  PathTrie* parent;
 private:
@ -73,7 +73,7 @@ private:
  bool exists_;
  bool has_dictionary_;
-  std::vector<std::pair<int, PathTrie*>> children_;
+  std::vector<std::pair<unsigned int, PathTrie*>> children_;
  // pointer to dictionary of FST
  std::shared_ptr<FstType> dictionary_;
--- a/native_client/ctcdecode/scorer.cpp
+++ b/native_client/ctcdecode/scorer.cpp
@ -65,7 +65,7 @@ void Scorer::setup_char_map()
    // The initial state of FST is state 0, hence the index of chars in
    // the FST should start from 1 to avoid the conflict with the initial
    // state, otherwise wrong decoding results would be given.
-    char_map_[alphabet_.StringFromLabel(i)] = i + 1;
+    char_map_[alphabet_.DecodeSingle(i)] = i + 1;
  }
 }
@ -314,11 +314,11 @@ void Scorer::reset_params(float alpha, float beta)
  this->beta = beta;
 }
-std::vector<std::string> Scorer::split_labels_into_scored_units(const std::vector<int>& labels)
+std::vector<std::string> Scorer::split_labels_into_scored_units(const std::vector<unsigned int>& labels)
 {
  if (labels.empty()) return {};
-  std::string s = alphabet_.LabelsToString(labels);
+  std::string s = alphabet_.Decode(labels);
  std::vector<std::string> words;
  if (is_utf8_mode_) {
    words = split_into_codepoints(s);
@ -339,8 +339,8 @@ std::vector<std::string> Scorer::make_ngram(PathTrie* prefix)
      break;
    }
-    std::vector<int> prefix_vec;
+    std::vector<unsigned int> prefix_vec;
-    std::vector<int> prefix_steps;
+    std::vector<unsigned int> prefix_steps;
    if (is_utf8_mode_) {
      new_node = current_node->get_prev_grapheme(prefix_vec, prefix_steps, alphabet_);
@ -350,14 +350,14 @@ std::vector<std::string> Scorer::make_ngram(PathTrie* prefix)
    current_node = new_node->parent;
    // reconstruct word
-    std::string word = alphabet_.LabelsToString(prefix_vec);
+    std::string word = alphabet_.Decode(prefix_vec);
    ngram.push_back(word);
  }
  std::reverse(ngram.begin(), ngram.end());
  return ngram;
 }
-void Scorer::fill_dictionary(const std::vector<std::string>& vocabulary)
+void Scorer::fill_dictionary(const std::unordered_set<std::string>& vocabulary)
 {
  // ConstFst is immutable, so we need to use a MutableFst to create the trie,
  // and then we convert to a ConstFst for the decoder and for storing on disk.
--- a/native_client/ctcdecode/scorer.h
+++ b/native_client/ctcdecode/scorer.h
@ -4,6 +4,7 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 #include "lm/virtual_interface.hh"
@ -72,7 +73,7 @@ public:
  // trransform the labels in index to the vector of words (word based lm) or
  // the vector of characters (character based lm)
-  std::vector<std::string> split_labels_into_scored_units(const std::vector<int> &labels);
+  std::vector<std::string> split_labels_into_scored_units(const std::vector<unsigned int> &labels);
  void set_alphabet(const Alphabet& alphabet);
@ -83,7 +84,7 @@ public:
  bool is_scoring_boundary(PathTrie* prefix, size_t new_label);
  // fill dictionary FST from a vocabulary
-  void fill_dictionary(const std::vector<std::string> &vocabulary);
+  void fill_dictionary(const std::unordered_set<std::string> &vocabulary);
  // load language model from given path
  int load_lm(const std::string &lm_path);
--- a/native_client/ctcdecode/swigwrapper.i
+++ b/native_client/ctcdecode/swigwrapper.i
@ -3,7 +3,6 @@
 %{
 #include "ctc_beam_search_decoder.h"
 #define SWIG_FILE_WITH_INIT
 #define SWIG_PYTHON_STRICT_BYTE_CHAR
 #include "workspace_status.h"
 %}
@ -19,6 +18,9 @@ import_array();
 namespace std {
    %template(StringVector) vector<string>;
    %template(UnsignedIntVector) vector<unsigned int>;
    %template(OutputVector) vector<Output>;
    %template(OutputVectorVector) vector<vector<Output>>;
 }
 %shared_ptr(Scorer);
@ -27,6 +29,7 @@ namespace std {
 %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(const double *probs, int time_dim, int class_dim)};
 %apply (double* IN_ARRAY3, int DIM1, int DIM2, int DIM3) {(const double *probs, int batch_size, int time_dim, int class_dim)};
 %apply (int* IN_ARRAY1, int DIM1) {(const int *seq_lengths, int seq_lengths_size)};
 %apply (unsigned int* IN_ARRAY1, int DIM1) {(const unsigned int *input, int length)};
 %ignore Scorer::dictionary;
@ -38,10 +41,6 @@ namespace std {
 %constant const char* __version__ = ds_version();
 %constant const char* __git_version__ = ds_git_version();
 %template(IntVector) std::vector<int>;
 %template(OutputVector) std::vector<Output>;
 %template(OutputVectorVector) std::vector<std::vector<Output>>;
 // Import only the error code enum definitions from deepspeech.h
 // We can't just do |%ignore "";| here because it affects this file globally (even
 // files %include'd above). That causes SWIG to lose destructor information and
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -501,20 +501,3 @@ DS_Version()
 {
  return strdup(ds_version());
 }
 char*
 DS_ErrorCodeToErrorMessage(int aErrorCode)
 {
 #define RETURN_MESSAGE(NAME, VALUE, DESC) \
    case NAME: \
      return strdup(DESC);
  switch(aErrorCode)
  {
    DS_FOR_EACH_ERROR(RETURN_MESSAGE)
    default:
      return strdup("Unknown error, please make sure you are using the correct native binary.");
  }
 #undef RETURN_MESSAGE
 }
--- a/native_client/deepspeech_errors.cc
+++ b/native_client/deepspeech_errors.cc
@ -0,0 +1,19 @@
 #include "deepspeech.h"
 #include <string.h>
 char*
 DS_ErrorCodeToErrorMessage(int aErrorCode)
 {
 #define RETURN_MESSAGE(NAME, VALUE, DESC) \
    case NAME: \
      return strdup(DESC);
  switch(aErrorCode)
  {
    DS_FOR_EACH_ERROR(RETURN_MESSAGE)
    default:
      return strdup("Unknown error, please make sure you are using the correct native binary.");
  }
 #undef RETURN_MESSAGE
 }
--- a/native_client/definitions.mk
+++ b/native_client/definitions.mk
@ -1,7 +1,7 @@
 NC_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 TARGET    ?= host
-TFDIR     ?= $(abspath $(NC_DIR)/../../tensorflow)
+TFDIR     ?= $(abspath $(NC_DIR)/../tensorflow)
 PREFIX    ?= /usr/local
 SO_SEARCH ?= $(TFDIR)/bazel-bin/
--- a/native_client/dotnet/README.rst
+++ b/native_client/dotnet/README.rst
@ -43,36 +43,34 @@ We highly recommend sticking to the recommended versions of CUDA/cuDNN in order
 Getting the code
 ----------------
-We need to clone ``mozilla/DeepSpeech`` and ``mozilla/tensorflow``.
+We need to clone ``mozilla/DeepSpeech``.
 .. code-block:: bash
   git clone https://github.com/mozilla/DeepSpeech
-
+   git submodule sync tensorflow/
-.. code-block:: bash
+   git submodule update --init tensorflow/
   git clone --branch r2.2 https://github.com/mozilla/tensorflow
 Configuring the paths
 ---------------------
-We need to create a symbolic link, for this example let's suppose that we cloned into ``D:\cloned`` and now the structure looks like:
+There should already be a symbolic link, for this example let's suppose that we cloned into ``D:\cloned`` and now the structure looks like:
 .. code-block::
   .
   ├── D:\
   │   ├── cloned                 # Contains DeepSpeech and tensorflow side by side
-   │   │   ├── DeepSpeech         # Root of the cloned DeepSpeech
+   │   │   └── DeepSpeech         # Root of the cloned DeepSpeech
-   │   │   ├── tensorflow         # Root of the cloned Mozilla's tensorflow 
+   │   │       ├── tensorflow     # Root of the cloned Mozilla's tensorflow 
   └── ...
-Change your path accordingly to your path structure, for the structure above we are going to use the following command:
+Change your path accordingly to your path structure, for the structure above we are going to use the following command if the symbolic link does not exists:
 .. code-block:: bash
-   mklink /d "D:\cloned\tensorflow\native_client" "D:\cloned\DeepSpeech\native_client"
+   mklink /d "D:\cloned\DeepSpeech\tensorflow\native_client" "D:\cloned\DeepSpeech\native_client"
 Adding environment variables
 ----------------------------
@ -82,7 +80,7 @@ After you have installed the requirements there are few environment variables th
 MSYS2 paths
 ~~~~~~~~~~~
-For MSYS2 we need to add ``bin`` directory, if you installed in the default route the path that we need to add should looks like ``C:\msys64\usr\bin``. Now we can run ``pacman``\ :
+For MSYS2 we need to add ``bin`` directory, if you installed in the default route the path that we need to add should looks like ``C:\msys64\usr\bin``. Now we can run ``pacman``:
 .. code-block:: bash
@ -120,7 +118,7 @@ Building the native_client
 There's one last command to run before building, you need to run the `configure.py <https://github.com/mozilla/tensorflow/blob/master/configure.py>`_ inside ``tensorflow`` cloned directory.
-At this point we are ready to start building the ``native_client``\ , go to ``tensorflow`` directory that you cloned, following our examples should be ``D:\cloned\tensorflow``.  
+At this point we are ready to start building the ``native_client``, go to ``tensorflow`` sub-directory, following our examples should be ``D:\cloned\DeepSpeech\tensorflow``.  
 CPU
 ~~~
--- a/native_client/generate_scorer_package.cpp
+++ b/native_client/generate_scorer_package.cpp
@ -0,0 +1,146 @@
 #include <string>
 #include <vector>
 #include <fstream>
 #include <unordered_set>
 #include <iostream>
 using namespace std;
 #include "absl/types/optional.h"
 #include "boost/program_options.hpp"
 #include "ctcdecode/decoder_utils.h"
 #include "ctcdecode/scorer.h"
 #include "alphabet.h"
 #include "deepspeech.h"
 namespace po = boost::program_options;
 int
 create_package(absl::optional<string> alphabet_path,
               string lm_path,
               string vocab_path,
               string package_path,
               absl::optional<bool> force_utf8,
               float default_alpha,
               float default_beta)
 {
    // Read vocabulary
    unordered_set<string> words;
    bool vocab_looks_char_based = true;
    ifstream fin(vocab_path);
    if (!fin) {
        cerr << "Invalid vocabulary file " << vocab_path << "\n";
        return 1;
    }
    string word;
    while (fin >> word) {
        words.insert(word);
        if (get_utf8_str_len(word) > 1) {
            vocab_looks_char_based = false;
        }
    }
    cerr << words.size() << " unique words read from vocabulary file.\n"
         << (vocab_looks_char_based ? "Looks" : "Doesn't look")
         << " like a character based (Bytes Are All You Need) model.\n";
    if (!force_utf8.has_value()) {
        force_utf8 = vocab_looks_char_based;
        cerr << "--force_utf8 was not specified, using value "
             << "infered from vocabulary contents: "
             << (vocab_looks_char_based ? "true" : "false") << "\n";
    }
    if (force_utf8.value() && !alphabet_path.has_value()) {
        cerr << "No --alphabet file specified, not using bytes output mode, can't continue.\n";
        return 1;
    }
    Scorer scorer;
    if (force_utf8.value()) {
        scorer.set_alphabet(UTF8Alphabet());
    } else {
        Alphabet alphabet;
        alphabet.init(alphabet_path->c_str());
        scorer.set_alphabet(alphabet);
    }
    scorer.set_utf8_mode(force_utf8.value());
    scorer.reset_params(default_alpha, default_beta);
    int err = scorer.load_lm(lm_path);
    if (err != DS_ERR_SCORER_NO_TRIE) {
        cerr << "Error loading language model file: "
             << DS_ErrorCodeToErrorMessage(err) << "\n";
        return 1;
    }
    scorer.fill_dictionary(words);
    // Copy LM file to final package file destination
    {
        ifstream lm_src(lm_path, std::ios::binary);
        ofstream package_dest(package_path, std::ios::binary);
        package_dest << lm_src.rdbuf();
    }
    // Save dictionary to package file, appending instead of overwriting
    if (!scorer.save_dictionary(package_path, true)) {
        cerr << "Error when saving package in " << package_path << ".\n";
        return 1;
    }
    cerr << "Package created in " << package_path << ".\n";
    return 0;
 }
 int
 main(int argc, char** argv)
 {
    po::options_description desc("Options");
    desc.add_options()
        ("help", "show help message")
        ("alphabet", po::value<string>(), "Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using UTF-8 mode.")
        ("lm", po::value<string>(), "Path of KenLM binary LM file. Must be built without including the vocabulary (use the -v flag). See generate_lm.py for how to create a binary LM.")
        ("vocab", po::value<string>(), "Path of vocabulary file. Must contain words separated by whitespace.")
        ("package", po::value<string>(), "Path to save scorer package.")
        ("default_alpha", po::value<float>(), "Default value of alpha hyperparameter (float).")
        ("default_beta", po::value<float>(), "Default value of beta hyperparameter (float).")
        ("force_utf8", po::value<bool>(), "Boolean flag, force set or unset UTF-8 mode in the scorer package. If not set, infers from the vocabulary. See <https://deepspeech.readthedocs.io/en/master/Decoder.html#utf-8-mode> for further explanation.")
    ;
    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm);
    if (vm.count("help")) {
        cout << desc << "\n";
        return 1;
    }
    // Check required flags.
    for (const string& flag : {"lm", "vocab", "package", "default_alpha", "default_beta"}) {
        if (!vm.count(flag)) {
            cerr << "--" << flag << " is a required flag. Pass --help for help.\n";
            return 1;
        }
    }
    // Parse optional --force_utf8
    absl::optional<bool> force_utf8 = absl::nullopt;
    if (vm.count("force_utf8")) {
        force_utf8 = vm["force_utf8"].as<bool>();
    }
    // Parse optional --alphabet
    absl::optional<string> alphabet = absl::nullopt;
    if (vm.count("alphabet")) {
        alphabet = vm["alphabet"].as<string>();
    }
    create_package(alphabet,
                   vm["lm"].as<string>(),
                   vm["vocab"].as<string>(),
                   vm["package"].as<string>(),
                   force_utf8,
                   vm["default_alpha"].as<float>(),
                   vm["default_beta"].as<float>());
    return 0;
 }
--- a/native_client/java/README.rst
+++ b/native_client/java/README.rst
@ -1,72 +0,0 @@
 DeepSpeech Java / Android bindings
 ==================================
 This is still preliminary work. Please refer to ``native_client/README.rst`` for
 building ``libdeepspeech.so`` and ``deepspeech`` binary for Android on ARMv7 and
 ARM64 arch.
 Android Java / JNI bindings: ``libdeepspeech``
 ==================================================
 Java / JNI bindings are available under the ``libdeepspeech`` subdirectory.
 Building depends on prebuilt shared object.  Please ensure to place
 ``libdeepspeech.so`` into the ``libdeepspeech/libs/{arm64-v8a,armeabi-v7a}/``
 matching subdirectories.
 Building the bindings is managed by ``gradle`` and should be limited to issuing
 ``./gradlew libdeepspeech:build``\ , producing an ``AAR`` package in
 ``./libdeepspeech/build/outputs/aar/``. This can later be used by other
 Gradle-based build with the following configuration:
 .. code-block::
   implementation 'deepspeech.mozilla.org:libdeepspeech:VERSION@aar'
 Please note that you might have to copy the file to a local Maven repository
 and adapt file naming (when missing, the error message should states what
 filename it expects and where).
 Android demo APK
 ================
 Provided is a very simple Android demo app that allows you to test the library.
 You can build it with ``make apk`` and install the resulting APK file. Please
 refer to Gradle documentation for more details.
 The ``APK`` should be produced in ``/app/build/outputs/apk/``. This demo app might
 require external storage permissions. You can then push models files to your
 device, set the path to the file in the UI and try to run on an audio file.
 When running, it should first play the audio file and then run the decoding. At
 the end of the decoding, you should be presented with the decoded text as well
 as time elapsed to decode in miliseconds.
 Running ``deepspeech`` via adb
 ==================================
 You should use ``adb push`` to send data to device, please refer to Android
 documentation on how to use that.
 Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
 * ``output_graph.tflite`` which is the TF Lite model
 * ``kenlm.scorer``, if you want to use the scorer; please be aware that too big
  scorer will make the device run out of memory
 Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ :
 * ``deepspeech``
 * ``libdeepspeech.so``
 * ``libc++_shared.so``
 You should then be able to run as usual, using a shell from ``adb shell``\ :
 .. code-block::
   user@device$ cd /data/local/tmp/ds/
   user@device$ LD_LIBRARY_PATH=$(pwd)/ ./deepspeech [...]
 Please note that Android linker does not support ``rpath`` so you have to set
 ``LD_LIBRARY_PATH``. Properly wrapped / packaged bindings does embed the library
 at a place the linker knows where to search, so Android apps will be fine.
--- a/native_client/modelstate.cc
+++ b/native_client/modelstate.cc
@ -33,7 +33,7 @@ char*
 ModelState::decode(const DecoderState& state) const
 {
  vector<Output> out = state.decode();
-  return strdup(alphabet_.LabelsToString(out[0].tokens).c_str());
+  return strdup(alphabet_.Decode(out[0].tokens).c_str());
 }
 Metadata*
@ -50,7 +50,7 @@ ModelState::decode_metadata(const DecoderState& state,
    for (int j = 0; j < out[i].tokens.size(); ++j) {
      TokenMetadata token {
-        strdup(alphabet_.StringFromLabel(out[i].tokens[j]).c_str()),   // text
+        strdup(alphabet_.DecodeSingle(out[i].tokens[j]).c_str()),   // text
        static_cast<unsigned int>(out[i].timesteps[j]),                // timestep
        out[i].timesteps[j] * ((float)audio_win_step_ / sample_rate_), // start_time
      };
--- a/native_client/tflitemodelstate.cc
+++ b/native_client/tflitemodelstate.cc
@ -1,8 +1,17 @@
 #include "tflitemodelstate.h"
 #include "tensorflow/lite/string_util.h"
 #include "workspace_status.h"
 #ifdef __ANDROID__
 #include <android/log.h>
 #define  LOG_TAG    "libdeepspeech"
 #define  LOGD(...)  __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
 #define  LOGE(...)  __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
 #else
 #define  LOGD(...)
 #define  LOGE(...)
 #endif // __ANDROID__
 using namespace tflite;
 using std::vector;
@ -90,6 +99,62 @@ TFLiteModelState::~TFLiteModelState()
 {
 }
 std::map<std::string, tflite::Interpreter::TfLiteDelegatePtr>
 getTfliteDelegates()
 {
  std::map<std::string, tflite::Interpreter::TfLiteDelegatePtr> delegates;
  const char* env_delegate_c = std::getenv("DS_TFLITE_DELEGATE");
  std::string env_delegate = (env_delegate_c != nullptr) ? env_delegate_c : "";
 #ifdef __ANDROID__
  if (env_delegate == std::string("gpu")) {
    LOGD("Trying to get GPU delegate ...");
    // Try to get GPU delegate
    {
      tflite::Interpreter::TfLiteDelegatePtr delegate = evaluation::CreateGPUDelegate();
      if (!delegate) {
        LOGD("GPU delegation not supported");
      } else {
        LOGD("GPU delegation supported");
        delegates.emplace("GPU", std::move(delegate));
      }
    }
  }
  if (env_delegate == std::string("nnapi")) {
    LOGD("Trying to get NNAPI delegate ...");
    // Try to get Android NNAPI delegate
    {
      tflite::Interpreter::TfLiteDelegatePtr delegate = evaluation::CreateNNAPIDelegate();
      if (!delegate) {
        LOGD("NNAPI delegation not supported");
      } else {
        LOGD("NNAPI delegation supported");
        delegates.emplace("NNAPI", std::move(delegate));
      }
    }
  }
  if (env_delegate == std::string("hexagon")) {
    LOGD("Trying to get Hexagon delegate ...");
    // Try to get Android Hexagon delegate
    {
      const std::string libhexagon_path("/data/local/tmp");
      tflite::Interpreter::TfLiteDelegatePtr delegate = evaluation::CreateHexagonDelegate(libhexagon_path, /* profiler */ false);
      if (!delegate) {
        LOGD("Hexagon delegation not supported");
      } else {
        LOGD("Hexagon delegation supported");
        delegates.emplace("Hexagon", std::move(delegate));
      }
    }
  }
 #endif // __ANDROID__
  return delegates;
 }
 int
 TFLiteModelState::init(const char* model_path)
 {
@ -111,9 +176,21 @@ TFLiteModelState::init(const char* model_path)
    return DS_ERR_FAIL_INTERPRETER;
  }
  LOGD("Trying to detect delegates ...");
  std::map<std::string, tflite::Interpreter::TfLiteDelegatePtr> delegates = getTfliteDelegates();
  LOGD("Finished enumerating delegates ...");
  interpreter_->AllocateTensors();
  interpreter_->SetNumThreads(4);
  LOGD("Trying to use delegates ...");
  for (const auto& delegate : delegates) {
    LOGD("Trying to apply delegate %s", delegate.first.c_str());
    if (interpreter_->ModifyGraphWithDelegate(delegate.second.get()) != kTfLiteOk) {
      LOGD("FAILED to apply delegate %s to the graph", delegate.first.c_str());
    }
  }
  // Query all the index once
  input_node_idx_       = get_input_tensor_by_name("input_node");
  previous_state_c_idx_ = get_input_tensor_by_name("previous_state_c");
@ -206,7 +283,7 @@ TFLiteModelState::init(const char* model_path)
  beam_width_ = (unsigned int)(*beam_width);
  tflite::StringRef serialized_alphabet = tflite::GetString(interpreter_->tensor(metadata_alphabet_idx), 0);
-  err = alphabet_.deserialize(serialized_alphabet.str, serialized_alphabet.len);
+  err = alphabet_.Deserialize(serialized_alphabet.str, serialized_alphabet.len);
  if (err != 0) {
    return DS_ERR_INVALID_ALPHABET;
  }
--- a/native_client/tflitemodelstate.h
+++ b/native_client/tflitemodelstate.h
@ -6,6 +6,7 @@
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/tools/evaluation/utils.h"
 #include "modelstate.h"
--- a/native_client/tfmodelstate.cc
+++ b/native_client/tfmodelstate.cc
@ -119,7 +119,7 @@ TFModelState::init(const char* model_path)
  beam_width_ = (unsigned int)(beam_width);
  string serialized_alphabet = metadata_outputs[4].scalar<tensorflow::tstring>()();
-  err = alphabet_.deserialize(serialized_alphabet.data(), serialized_alphabet.size());
+  err = alphabet_.Deserialize(serialized_alphabet.data(), serialized_alphabet.size());
  if (err != 0) {
    return DS_ERR_INVALID_ALPHABET;
  }
--- a/taskcluster/.build.yml
+++ b/taskcluster/.build.yml
@ -10,6 +10,8 @@ build:
  routes: []
  maxRunTime: 3600
  docker_image: "ubuntu:16.04"
  generic:
    workerType: 'ds-macos-light'
  system_setup:
    >
      true
@ -17,12 +19,13 @@ build:
    >
      true
  scripts:
    setup: ''
    build: ''
    package: ''
  nc_asset_name: 'native_client.tar.xz'
  args:
    tests_cmdline: ''
-  tensorflow_git_desc: 'TensorFlow: v2.2.0-12-gc29895f'
+  tensorflow_git_desc: 'TensorFlow: v2.2.0-15-g518c1d0'
  test_model_task: ''
  homebrew:
    url: ''
--- a/taskcluster/.shared.yml
+++ b/taskcluster/.shared.yml
@ -8,9 +8,17 @@ python:
 training:
  packages_xenial:
    apt: 'libopus0'
 deepspeech:
  packages_xenial:
    apt: 'make build-essential gfortran git libblas-dev liblapack-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev libpng-dev python python-dev zlib1g-dev'
 tensorflow:
  packages_xenial:
-    apt: 'make build-essential gfortran git libblas-dev liblapack-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev libpng-dev python zlib1g-dev'
+    apt: 'apt-get -qq update && apt-get -qq -y install realpath build-essential python-virtualenv python-dev python-pip libblas-dev liblapack-dev gfortran wget software-properties-common pixz zip zlib1g-dev unzip'
  packages_macos:
    brew: '$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tf_tc-brew.sh'
  packages_win:
    pacman: 'pacman --noconfirm -S patch unzip tar'
    msys64: 'ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64'
 java:
  packages_xenial:
    apt: 'apt-get -qq -y install curl software-properties-common wget unzip && add-apt-repository --yes ppa:openjdk-r/ppa && apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y --force-yes install openjdk-8-jdk && java -version && update-ca-certificates -f'
@ -132,6 +140,34 @@ system:
    win:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz"
      namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118"
  tensorflow:
    linux_amd64_cpu:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.cpu/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.cpu"
    linux_amd64_cuda:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.cuda/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.cuda"
    linux_armv7:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm"
    linux_arm64:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm64/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm64"
    darwin_amd64:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.osx/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.osx"
    android_arm64:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-arm64/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-arm64"
    android_armv7:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-armv7/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-armv7"
    win_amd64_cpu:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.win/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.win"
    win_amd64_cuda:
      url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.win-cuda/artifacts/public/home.tar.xz"
      namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.win-cuda"
  username: 'build-user'
  homedir:
    linux: '/home/build-user'
@ -141,7 +177,3 @@ system:
  msys2:
    url: 'https://github.com/msys2/msys2-installer/releases/download/2020-06-02/msys2-base-x86_64-20200602.tar.xz'
    sha: '598ceeaa3e2ccf86a25a2e3c449d00a9fd35300e36011bee610036dfa59d670a'
  msys2_filesystem_pkg:
    url: 'http://repo.msys2.org/msys/x86_64/filesystem-2020.02-3-x86_64.pkg.tar.xz'
    sha: '927b020a67a05139ee1b2c45bff491c1d42335e64350cc7758ee20d7c3099477'
    install: 'pacman -Udd --noconfirm $USERPROFILE/filesystem-2020.02-3-x86_64.pkg.tar.xz'
--- a/taskcluster/android-apk-build.sh
+++ b/taskcluster/android-apk-build.sh
@ -6,6 +6,6 @@ arm_flavor=$1
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 do_deepspeech_java_apk_build
--- a/taskcluster/android-apk-package.sh
+++ b/taskcluster/android-apk-package.sh
@ -8,7 +8,7 @@ source $(dirname "$0")/tc-tests-utils.sh
 mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
-cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
+cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
 cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/java/app/build/outputs/apk/release/app*.apk ${TASKCLUSTER_ARTIFACTS}/
 cp ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/java/libdeepspeech/build/outputs/aar/libdeepspeech*.aar ${TASKCLUSTER_ARTIFACTS}/
--- a/taskcluster/android-arm64-cpu-opt.yml
+++ b/taskcluster/android-arm64-cpu-opt.yml
@ -4,12 +4,14 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_android-arm64-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-arm64"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-arm64"
    - "index.project.deepspeech.deepspeech.native_client.android-arm64.${event.head.sha}"
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-arm64/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.android_arm64.url}
  scripts:
    setup: ""
    build: "taskcluster/android-build.sh arm64-v8a"
    package: "taskcluster/android-package.sh arm64-v8a"
  nc_asset_name: "native_client.arm64.cpu.android.tar.xz"
--- a/taskcluster/android-armv7-cpu-opt.yml
+++ b/taskcluster/android-armv7-cpu-opt.yml
@ -4,11 +4,12 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_android-armv7-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-armv7"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-armv7"
    - "index.project.deepspeech.deepspeech.native_client.android-armv7.${event.head.sha}"
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.android_armv7.url}
  scripts:
    build: "taskcluster/android-build.sh armeabi-v7a"
    package: "taskcluster/android-package.sh armeabi-v7a"
--- a/taskcluster/android-build.sh
+++ b/taskcluster/android-build.sh
@ -6,10 +6,11 @@ arm_flavor=$1
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 BAZEL_TARGETS="
 //native_client:libdeepspeech.so
 //native_client:generate_scorer_package
 "
 if [ "${arm_flavor}" = "armeabi-v7a" ]; then
@ -21,7 +22,7 @@ if [ "${arm_flavor}" = "arm64-v8a" ]; then
 fi
 if [ "${arm_flavor}" = "x86_64" ]; then
-    LOCAL_ANDROID_FLAGS="--config=android --cpu=x86_64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++11 --copt=-D_GLIBCXX_USE_C99"
+    LOCAL_ANDROID_FLAGS="--config=android --cpu=x86_64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99"
 fi
 BAZEL_BUILD_FLAGS="--define=runtime=tflite ${LOCAL_ANDROID_FLAGS} ${BAZEL_EXTRA_FLAGS}"
--- a/taskcluster/android-cache-arm64-v8a-android-24.yml
+++ b/taskcluster/android-cache-arm64-v8a-android-24.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.arm64_v8a.android_24.url}
    artifact_namespace: ${system.android_cache.arm64_v8a.android_24.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh arm64-v8a android-24"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-arm64-v8a-android-25.yml
+++ b/taskcluster/android-cache-arm64-v8a-android-25.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.arm64_v8a.android_25.url}
    artifact_namespace: ${system.android_cache.arm64_v8a.android_25.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh arm64-v8a android-25"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-armeabi-v7a-android-24.yml
+++ b/taskcluster/android-cache-armeabi-v7a-android-24.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.armeabi_v7a.android_24.url}
    artifact_namespace: ${system.android_cache.armeabi_v7a.android_24.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh armeabi-v7a android-24 default"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-armeabi-v7a-android-25.yml
+++ b/taskcluster/android-cache-armeabi-v7a-android-25.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.armeabi_v7a.android_25.url}
    artifact_namespace: ${system.android_cache.armeabi_v7a.android_25.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh armeabi-v7a android-25"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-sdk-android-27.yml
+++ b/taskcluster/android-cache-sdk-android-27.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.sdk.android_27.url}
    artifact_namespace: ${system.android_cache.sdk.android_27.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh sdk android-27"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-x86_64-android-24.yml
+++ b/taskcluster/android-cache-x86_64-android-24.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.x86_64.android_24.url}
    artifact_namespace: ${system.android_cache.x86_64.android_24.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh x86_64 android-24"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-x86_64-android-25.yml
+++ b/taskcluster/android-cache-x86_64-android-25.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.x86_64.android_25.url}
    artifact_namespace: ${system.android_cache.x86_64.android_25.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh x86_64 android-25"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-x86_64-android-26.yml
+++ b/taskcluster/android-cache-x86_64-android-26.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.x86_64.android_26.url}
    artifact_namespace: ${system.android_cache.x86_64.android_26.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh x86_64 android-26"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-x86_64-android-28.yml
+++ b/taskcluster/android-cache-x86_64-android-28.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.x86_64.android_28.url}
    artifact_namespace: ${system.android_cache.x86_64.android_28.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh x86_64 android-28"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-x86_64-android-29.yml
+++ b/taskcluster/android-cache-x86_64-android-29.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.x86_64.android_29.url}
    artifact_namespace: ${system.android_cache.x86_64.android_29.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh x86_64 android-29"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-cache-x86_64-android-30.yml
+++ b/taskcluster/android-cache-x86_64-android-30.yml
@ -7,6 +7,7 @@ build:
    artifact_url: ${system.android_cache.x86_64.android_30.url}
    artifact_namespace: ${system.android_cache.x86_64.android_30.namespace}
  scripts:
    setup: ""
    build: "taskcluster/android_cache-build.sh x86_64 android-30"
    package: "taskcluster/android_cache-package.sh"
  metadata:
--- a/taskcluster/android-java-opt.yml
+++ b/taskcluster/android-java-opt.yml
@ -14,7 +14,7 @@ build:
  system_setup:
    >
      ${java.packages_xenial.apt}
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.android_armv7.url}
  gradle_cache:
    url: ${system.gradle_cache.url}
    namespace: ${system.gradle_cache.namespace}
--- a/taskcluster/android-package.sh
+++ b/taskcluster/android-package.sh
@ -8,6 +8,6 @@ source $(dirname "$0")/tc-tests-utils.sh
 mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
-cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
+cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
 package_native_client_ndk "native_client.tar.xz" "${arm_flavor}"
--- a/taskcluster/android-x86_64-cpu-opt.yml
+++ b/taskcluster/android-x86_64-cpu-opt.yml
@ -8,7 +8,7 @@ build:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-x86_64"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-x86_64"
    - "index.project.deepspeech.deepspeech.native_client.android-x86_64.${event.head.sha}"
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-arm64/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.android_arm64.url}
  scripts:
    build: "taskcluster/android-build.sh x86_64"
    package: "taskcluster/android-package.sh x86_64"
--- a/taskcluster/arm64-build.sh
+++ b/taskcluster/arm64-build.sh
@ -4,10 +4,11 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 BAZEL_TARGETS="
 //native_client:libdeepspeech.so
 //native_client:generate_scorer_package
 "
 BAZEL_BUILD_FLAGS="${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}"
--- a/taskcluster/cuda-build.sh
+++ b/taskcluster/cuda-build.sh
@ -4,10 +4,11 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 BAZEL_TARGETS="
 //native_client:libdeepspeech.so
 //native_client:generate_scorer_package
 "
 BAZEL_ENV_FLAGS="TF_NEED_CUDA=1 ${TF_CUDA_FLAGS}"
--- a/taskcluster/darwin-amd64-cpu-opt.yml
+++ b/taskcluster/darwin-amd64-cpu-opt.yml
@ -5,11 +5,12 @@ build:
    - "node-gyp-cache"
    - "homebrew_builds-darwin-amd64"
    - "pyenv-darwin-amd64"
    - "tf_darwin-amd64-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx"
    - "index.project.deepspeech.deepspeech.native_client.osx.${event.head.sha}"
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.darwin_amd64.url}
  scripts:
    build: "taskcluster/host-build.sh"
    package: "taskcluster/package.sh"
--- a/taskcluster/darwin-amd64-ctc-opt.yml
+++ b/taskcluster/darwin-amd64-ctc-opt.yml
@ -5,11 +5,12 @@ build:
    - "node-gyp-cache"
    - "homebrew_builds-darwin-amd64"
    - "pyenv-darwin-amd64"
    - "tf_darwin-amd64-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx-ctc"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx-ctc"
    - "index.project.deepspeech.deepspeech.native_client.osx-ctc.${event.head.sha}"
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.darwin_amd64.url}
  maxRunTime: 14400
  scripts:
    build: 'taskcluster/decoder-build.sh'
--- a/taskcluster/darwin-amd64-tflite-opt.yml
+++ b/taskcluster/darwin-amd64-tflite-opt.yml
@ -5,11 +5,12 @@ build:
    - "node-gyp-cache"
    - "homebrew_builds-darwin-amd64"
    - "pyenv-darwin-amd64"
    - "tf_darwin-amd64-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx-tflite"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx-tflite"
    - "index.project.deepspeech.deepspeech.native_client.osx-tflite.${event.head.sha}"
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.darwin_amd64.url}
  scripts:
    build: "taskcluster/host-build.sh tflite"
    package: "taskcluster/package.sh"
--- a/taskcluster/darwin-opt-base.tyml
+++ b/taskcluster/darwin-opt-base.tyml
@ -60,9 +60,8 @@ payload:
        cd $TASKCLUSTER_ORIG_TASKDIR/ && rm -fr $TASKCLUSTER_TASK_DIR/ &&
        exit $TASKCLUSTER_TASK_EXIT_CODE" 0 &&
        (pixz -d < $TASKCLUSTER_ORIG_TASKDIR/home.tar.xz | gtar -C $TASKCLUSTER_TASK_DIR -xf - ) &&
-        git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ &&
+        cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git remote set-url origin ${event.head.repo.url} && git fetch origin && git checkout --quiet ${event.head.sha} &&
-        cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} &&
+        git submodule --quiet sync tensorflow/ && git submodule --quiet update tensorflow/ &&
        ln -s $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/native_client/ $TASKCLUSTER_TASK_DIR/DeepSpeech/tf/native_client &&
        cd $TASKCLUSTER_TASK_DIR &&
        (mkdir pyenv-root/ && gtar -C pyenv-root/ -xf $TASKCLUSTER_ORIG_TASKDIR/pyenv.tar.gz) &&
        (mkdir homebrew-builds/ && gtar -C homebrew-builds/ -xf $TASKCLUSTER_ORIG_TASKDIR/homebrew-builds.tar.gz) &&
--- a/taskcluster/decoder-build.sh
+++ b/taskcluster/decoder-build.sh
@ -4,7 +4,7 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
  export SYSTEM_TARGET=host-win
--- a/taskcluster/generic_tc_caching-darwin-opt-base.tyml
+++ b/taskcluster/generic_tc_caching-darwin-opt-base.tyml
@ -1,6 +1,6 @@
 taskId: ${taskcluster.taskId}
 provisionerId: ${taskcluster.generic.provisionerId}
-workerType: ${taskcluster.generic.workerType}
+workerType: ${build.generic.workerType}
 taskGroupId: ${taskcluster.taskGroupId}
 schedulerId: ${taskcluster.schedulerId}
 dependencies:
@ -24,6 +24,8 @@ payload:
      - "--login"
      - "-cxe"
      - $let:
          extraSystemSetup: { $eval: strip(str(build.system_setup)) }
          extraSystemConfig: { $eval: strip(str(build.system_config)) }
          taskIndexExpire: { $fromNow: '6 months' }
        in: >
          export TASKCLUSTER_ARTIFACTS="$(pwd)/public/" &&
@ -32,13 +34,14 @@ payload:
          export TASKCLUSTER_TASK_DIR="$(pwd)" &&
          export LC_ALL=C &&
          export MACOSX_DEPLOYMENT_TARGET=10.10 &&
          export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ &&
          env &&
          mkdir -p $TASKCLUSTER_ARTIFACTS/ &&
          cache_artifact=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.artifact_url}` &&
          if [ "$cache_artifact" != "200" ]; then
            git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} &&
            cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} &&
            ${extraSystemConfig} &&
            $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.setup} &&
            $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} &&
            $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} &&
            $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} 127.0.0.1:8080 ${build.cache.artifact_namespace}
--- a/taskcluster/generic_tc_caching-linux-opt-base.tyml
+++ b/taskcluster/generic_tc_caching-linux-opt-base.tyml
@ -22,6 +22,7 @@ payload:
    - "-cxe"
    - $let:
        extraSystemSetup: { $eval: strip(str(build.system_setup)) }
        extraSystemConfig: { $eval: strip(str(build.system_config)) }
        taskIndexExpire: { $fromNow: '6 months' }
      in: >
        (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) &&
@ -31,7 +32,8 @@ payload:
          adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ &&
          mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts &&
          echo -e "#!/bin/bash\nset -xe\n env && id && (git clone --quiet ${build.build_or_cache.repo} ~/${build.build_or_cache.dir}/ && cd ~/${build.build_or_cache.dir}/ && git checkout --quiet ${build.build_or_cache.sha})" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
-          sudo -H -u ${system.username} /bin/bash /tmp/clone.sh &&
+          sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} &&
          sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.setup} &&
          sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.build} &&
          sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.package} &&
          sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}
--- a/taskcluster/generic_tc_caching-win-opt-base.tyml
+++ b/taskcluster/generic_tc_caching-win-opt-base.tyml
@ -20,10 +20,6 @@ payload:
      content:
        sha256: ${system.msys2.sha}
        url: ${system.msys2.url}
    - file: filesystem-2020.02-3-x86_64.pkg.tar.xz
      content:
        sha256: ${system.msys2_filesystem_pkg.sha}
        url: ${system.msys2_filesystem_pkg.url}
  env:
    TC_MSYS_VERSION: 'MSYS_NT-6.3-9600'
@ -34,18 +30,19 @@ payload:
      "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
      "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
    - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0"
    - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}"
    - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm"
    - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm"
    - $let:
        extraSystemSetup: { $eval: strip(str(build.system_setup)) }
        extraSystemConfig: { $eval: strip(str(build.system_config)) }
        taskIndexExpire: { $fromNow: '6 months' }
      in: >
        echo .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C &&
        export PATH=\"$USERPROFILE/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&
        export TASKCLUSTER_ARTIFACTS=\"$(cygpath -u $USERPROFILE/public)\" &&
        export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" &&
        (mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) &&
        echo \"export TASKCLUSTER_TASK_EXIT_CODE=0\" > $USERPROFILE/tc-exit.sh &&
-        env && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${build.cache.artifact_url}`\" != \"200\" ]; then git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/ && cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k
+        env && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${build.cache.artifact_url}`\" != \"200\" ]; then git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/ && cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && ${extraSystemConfig} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.setup} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k
    - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE"
--- a/taskcluster/gradle-cache.yml
+++ b/taskcluster/gradle-cache.yml
@ -7,6 +7,7 @@ build:
    >
      ${java.packages_xenial.apt}
  scripts:
    setup: ""
    build: "taskcluster/gradle-build.sh"
    package: "taskcluster/gradle-package.sh"
  metadata:
--- a/taskcluster/homebrew_builds-darwin-amd64.yml
+++ b/taskcluster/homebrew_builds-darwin-amd64.yml
@ -4,6 +4,7 @@ build:
    artifact_url: ${system.homebrew_builds.url}
    artifact_namespace: ${system.homebrew_builds.namespace}
  scripts:
    setup: ""
    build: "taskcluster/homebrew-build.sh --builds"
    package: "taskcluster/homebrew-package.sh --builds"
  metadata:
--- a/taskcluster/homebrew_tests-darwin-amd64.yml
+++ b/taskcluster/homebrew_tests-darwin-amd64.yml
@ -4,6 +4,7 @@ build:
    artifact_url: ${system.homebrew_tests.url}
    artifact_namespace: ${system.homebrew_tests.namespace}
  scripts:
    setup: ""
    build: "taskcluster/homebrew-build.sh --tests"
    package: "taskcluster/homebrew-package.sh --tests"
  metadata:
--- a/taskcluster/host-build.sh
+++ b/taskcluster/host-build.sh
@ -6,10 +6,11 @@ runtime=$1
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 BAZEL_TARGETS="
 //native_client:libdeepspeech.so
 //native_client:generate_scorer_package
 "
 if [ "${runtime}" = "tflite" ]; then
--- a/taskcluster/linux-amd64-cpu-opt.yml
+++ b/taskcluster/linux-amd64-cpu-opt.yml
@ -4,6 +4,7 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_linux-amd64-cpu-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.cpu"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.cpu"
@ -12,8 +13,9 @@ build:
    >
      ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning}
      && apt-get -qq update && apt-get -qq -y install nodejs python-yaml
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.linux_amd64_cpu.url}
  scripts:
    setup: ""
    build: "taskcluster/host-build.sh"
    package: "taskcluster/package.sh"
  nc_asset_name: "native_client.amd64.cpu.linux.tar.xz"
--- a/taskcluster/linux-amd64-ctc-opt.yml
+++ b/taskcluster/linux-amd64-ctc-opt.yml
@ -4,6 +4,7 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_linux-amd64-cpu-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.cpu-ctc"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.cpu-ctc"
@ -12,8 +13,9 @@ build:
    >
      ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning}
      && apt-get -qq update && apt-get -qq -y install nodejs python-yaml
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.linux_amd64_cpu.url}
  scripts:
    setup: ""
    build: 'taskcluster/decoder-build.sh'
    package: 'taskcluster/decoder-package.sh'
  metadata:
--- a/taskcluster/linux-amd64-gpu-opt.yml
+++ b/taskcluster/linux-amd64-gpu-opt.yml
@ -4,6 +4,7 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_linux-amd64-gpu-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.gpu"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.gpu"
@ -12,9 +13,10 @@ build:
    >
      ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning}
      && apt-get -qq update && apt-get -qq -y install nodejs python-yaml
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.gpu/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.linux_amd64_cuda.url}
  maxRunTime: 14400
  scripts:
    setup: ""
    build: "taskcluster/cuda-build.sh"
    package: "taskcluster/package.sh"
  nc_asset_name: "native_client.amd64.cuda.linux.tar.xz"
--- a/taskcluster/linux-amd64-tflite-opt.yml
+++ b/taskcluster/linux-amd64-tflite-opt.yml
@ -4,6 +4,7 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_linux-amd64-cpu-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.tflite"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.tflite"
@ -12,8 +13,9 @@ build:
    >
      ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning}
      && apt-get -qq update && apt-get -qq -y install nodejs python-yaml
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.linux_amd64_cpu.url}
  scripts:
    setup: ""
    build: "taskcluster/host-build.sh tflite"
    package: "taskcluster/package.sh"
  nc_asset_name: "native_client.amd64.tflite.linux.tar.xz"
--- a/taskcluster/linux-arm64-cpu-opt.yml
+++ b/taskcluster/linux-arm64-cpu-opt.yml
@ -4,6 +4,7 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_linux-arm64-cpu-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm64"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm64"
@ -19,8 +20,9 @@ build:
  system_config:
    >
      multistrap -d /tmp/multistrap-armbian64-buster/ -f ${system.homedir.linux}/DeepSpeech/ds/native_client/multistrap_armbian64_buster.conf
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.arm64/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.linux_arm64.url}
  scripts:
    setup: ""
    build: "taskcluster/arm64-build.sh"
    package: "taskcluster/package.sh"
  nc_asset_name: "native_client.arm64.cpu.linux.tar.xz"
--- a/taskcluster/linux-opt-base.tyml
+++ b/taskcluster/linux-opt-base.tyml
@ -44,9 +44,9 @@ then:
          extraSystemConfig: { $eval: strip(str(build.system_config)) }
        in: >
          adduser --system --home ${system.homedir.linux} ${system.username} &&
-          apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_xenial.apt} pixz pkg-config realpath sudo unzip wget zip && ${extraSystemSetup} &&
+          apt-get -qq update && apt-get -qq -y install ${deepspeech.packages_xenial.apt} pixz pkg-config realpath sudo unzip wget zip && ${extraSystemSetup} &&
          cd ${system.homedir.linux}/ &&
-          echo -e "#!/bin/bash\nset -xe\n env && id && (wget -O - $TENSORFLOW_BUILD_ARTIFACT | pixz -d | tar -C ${system.homedir.linux}/ -xf - ) && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && ln -s ~/DeepSpeech/ds/native_client/ ~/DeepSpeech/tf/native_client && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf - && mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf - && if [ ! -z "${build.gradle_cache.url}" ]; then wget -O - ${build.gradle_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi && if [ ! -z "${build.android_cache.url}" ]; then wget -O - ${build.android_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi;" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
+          echo -e "#!/bin/bash\nset -xe\n env && id && (wget -O - $TENSORFLOW_BUILD_ARTIFACT | pixz -d | tar -C ${system.homedir.linux}/ -xf - ) && cd ~/DeepSpeech/ds && git remote set-url origin ${event.head.repo.url} && git fetch origin && git checkout --quiet ${event.head.sha} && git submodule --quiet sync tensorflow/ && git submodule --quiet update tensorflow/ && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf - && mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf - && if [ ! -z "${build.gradle_cache.url}" ]; then wget -O - ${build.gradle_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi && if [ ! -z "${build.android_cache.url}" ]; then wget -O - ${build.android_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi;" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
          sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} &&
          sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} &&
          sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package}
--- a/taskcluster/linux-rpi3-cpu-opt.yml
+++ b/taskcluster/linux-rpi3-cpu-opt.yml
@ -4,6 +4,7 @@ build:
    - "swig-linux-amd64"
    - "node-gyp-cache"
    - "pyenv-linux-amd64"
    - "tf_linux-rpi3-cpu-opt"
  routes:
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm"
    - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm"
@ -19,8 +20,9 @@ build:
  system_config:
    >
      multistrap -d /tmp/multistrap-raspbian-buster/ -f ${system.homedir.linux}/DeepSpeech/ds/native_client/multistrap_raspbian_buster.conf
-  tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.arm/artifacts/public/home.tar.xz"
+  tensorflow: ${system.tensorflow.linux_armv7.url}
  scripts:
    setup: ""
    build: "taskcluster/rpi3-build.sh"
    package: "taskcluster/package.sh"
  nc_asset_name: "native_client.rpi3.cpu.linux.tar.xz"
--- a/taskcluster/node-build.sh
+++ b/taskcluster/node-build.sh
@ -6,6 +6,6 @@ package_option=$1
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 do_deepspeech_npm_package "${package_option}"
--- a/taskcluster/node-gyp-cache.yml
+++ b/taskcluster/node-gyp-cache.yml
@ -8,6 +8,7 @@ build:
    >
      (apt-get -qq -y install sudo || true)
  scripts:
    setup: ""
    build: "taskcluster/node-gyp-populate.sh"
    package: "taskcluster/node-gyp-package.sh"
  metadata:
--- a/taskcluster/node-package-opt-base.tyml
+++ b/taskcluster/node-package-opt-base.tyml
@ -43,7 +43,7 @@ then:
          adduser --system --home ${system.homedir.linux} ${system.username} &&
          apt-get -qq update && apt-get -qq -y install realpath git wget curl make sudo && ${extraSystemSetup} &&
          cd ${system.homedir.linux}/ &&
-          echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && mkdir -p ~/DeepSpeech/tf/ && touch ~/DeepSpeech/tf/tc-vars.sh && chmod +x ~/DeepSpeech/tf/tc-vars.sh && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf -" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
+          echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf -" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
          sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} &&
          sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} &&
          sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package}
--- a/taskcluster/package.sh
+++ b/taskcluster/package.sh
@ -6,7 +6,7 @@ source $(dirname "$0")/tc-tests-utils.sh
 mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
-cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
+cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
 package_native_client "native_client.tar.xz"
--- a/taskcluster/pyenv-darwin-amd64.yml
+++ b/taskcluster/pyenv-darwin-amd64.yml
@ -4,6 +4,7 @@ build:
    artifact_url: ${system.pyenv.osx.url}
    artifact_namespace: ${system.pyenv.osx.namespace}
  scripts:
    setup: ""
    build: "taskcluster/pyenv-build.sh"
    package: "taskcluster/pyenv-package.sh"
  metadata:
--- a/taskcluster/pyenv-linux-amd64.yml
+++ b/taskcluster/pyenv-linux-amd64.yml
@ -7,6 +7,7 @@ build:
    >
      apt-get -qq update && apt-get -qq -y install python-yaml ${python.packages_xenial.apt} wget
  scripts:
    setup: ""
    build: "taskcluster/pyenv-build.sh"
    package: "taskcluster/pyenv-package.sh"
  metadata:
--- a/taskcluster/pyenv-win-amd64.yml
+++ b/taskcluster/pyenv-win-amd64.yml
@ -4,6 +4,7 @@ build:
    artifact_url: "${system.pyenv.win.url}"
    artifact_namespace: "${system.pyenv.win.namespace}"
  scripts:
    setup: ""
    build: "taskcluster/pyenv-build.sh"
    package: "taskcluster/pyenv-package.sh"
  metadata:
--- a/taskcluster/rpi3-build.sh
+++ b/taskcluster/rpi3-build.sh
@ -4,10 +4,11 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
+source $(dirname "$0")/tf_tc-vars.sh
 BAZEL_TARGETS="
 //native_client:libdeepspeech.so
 //native_client:generate_scorer_package
 "
 BAZEL_BUILD_FLAGS="${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS}"
--- a/taskcluster/swig-darwin-amd64.yml
+++ b/taskcluster/swig-darwin-amd64.yml
@ -8,6 +8,7 @@ build:
    artifact_url: "${system.swig_build.osx.url}"
    artifact_namespace: "${system.swig_build.osx.namespace}"
  scripts:
    setup: ""
    build: "taskcluster/build.sh"
    package: "taskcluster/package.sh"
  metadata:
--- a/taskcluster/swig-linux-amd64.yml
+++ b/taskcluster/swig-linux-amd64.yml
@ -12,6 +12,7 @@ build:
    >
      apt-get -qq -y install autoconf automake bison build-essential
  scripts:
    setup: ""
    build: "taskcluster/build.sh"
    package: "taskcluster/package.sh"
  metadata:
--- a/taskcluster/swig-win-amd64.yml
+++ b/taskcluster/swig-win-amd64.yml
@ -13,6 +13,7 @@ build:
      apt-get -qq -y install autoconf automake bison build-essential mingw-w64 &&
      (apt-get -qq -y install sudo || true)
  scripts:
    setup: ""
    build: "taskcluster/build.sh x86_64-w64-mingw32"
    package: "taskcluster/package.sh"
  metadata:
--- a/taskcluster/tc-all-utils.sh
+++ b/taskcluster/tc-all-utils.sh
@ -99,7 +99,7 @@ verify_bazel_rebuild()
  mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
-  cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
+  cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
  spurious_rebuilds=$(grep 'Executing action' "${bazel_explain_file}" | grep 'Compiling' | grep -v -E 'no entry in the cache|unconditional execution is requested|Executing genrule //native_client:workspace_status|Compiling native_client/workspace_status.cc|Linking native_client/libdeepspeech.so' | wc -l)
  if [ "${spurious_rebuilds}" -ne 0 ]; then
@ -108,13 +108,13 @@ verify_bazel_rebuild()
    if is_patched_bazel; then
      mkdir -p ${DS_ROOT_TASK}/DeepSpeech/ckd/ds ${DS_ROOT_TASK}/DeepSpeech/ckd/tf
      tar xf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-tf.tar --strip-components=4 -C ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/
-      tar xf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-ds.tar --strip-components=4 -C ${DS_ROOT_TASK}/DeepSpeech/ckd/tf/
+      tar xf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-ds.tar --strip-components=4 -C ${DS_ROOT_TASK}/DeepSpeech/ds/ckd/tensorflow/
      echo "Making a diff between CKD files"
      mkdir -p ${TASKCLUSTER_ARTIFACTS}
-      diff -urNw ${DS_ROOT_TASK}/DeepSpeech/ckd/tf/ ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/ | tee ${TASKCLUSTER_ARTIFACTS}/ckd.diff
+      diff -urNw ${DS_ROOT_TASK}/DeepSpeech/ds/ckd/tensorflow/ ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/ | tee ${TASKCLUSTER_ARTIFACTS}/ckd.diff
-      rm -fr ${DS_ROOT_TASK}/DeepSpeech/ckd/tf/ ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/
+      rm -fr ${DS_ROOT_TASK}/DeepSpeech/ds/ckd/tensorflow/ ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/
    else
      echo "Cannot get CKD information from release, please use patched Bazel"
    fi;
--- a/taskcluster/tc-all-vars.sh
+++ b/taskcluster/tc-all-vars.sh
@ -49,7 +49,7 @@ export ANDROID_TMP_DIR=/data/local/tmp
 mkdir -p ${TASKCLUSTER_TMP_DIR} || true
-export DS_TFDIR=${DS_ROOT_TASK}/DeepSpeech/tf
+export DS_TFDIR=${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow
 export DS_DSDIR=${DS_ROOT_TASK}/DeepSpeech/ds
 export DS_EXAMPLEDIR=${DS_ROOT_TASK}/DeepSpeech/examples
--- a/taskcluster/tc-build-utils.sh
+++ b/taskcluster/tc-build-utils.sh
@ -173,26 +173,26 @@ do_deepspeech_npm_package()
 do_bazel_build()
 {
-  cd ${DS_ROOT_TASK}/DeepSpeech/tf
+  cd ${DS_TFDIR}
  eval "export ${BAZEL_ENV_FLAGS}"
  if is_patched_bazel; then
-    find ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-tf.tar -T -
+    find ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-tf.tar -T -
  fi;
  bazel ${BAZEL_OUTPUT_USER_ROOT} build \
    -s --explain bazel_monolithic.log --verbose_explanations --experimental_strict_action_env --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS}
  if is_patched_bazel; then
-    find ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-ds.tar -T -
+    find ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-ds.tar -T -
  fi;
-  verify_bazel_rebuild "${DS_ROOT_TASK}/DeepSpeech/tf/bazel_monolithic.log"
+  verify_bazel_rebuild "${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel_monolithic.log"
 }
 shutdown_bazel()
 {
-  cd ${DS_ROOT_TASK}/DeepSpeech/tf
+  cd ${DS_TFDIR}
  bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown
 }
--- a/taskcluster/tc-decision.py
+++ b/taskcluster/tc-decision.py
@ -0,0 +1,238 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, print_function, unicode_literals
 from glob import glob
 from functools import reduce
 import json
 import jsone
 import os
 import sys
 import requests
 import slugid
 import yaml
 import subprocess
 import networkx as nx
 TASKS_ROOT = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])))
 TASKCLUSTER_API_BASEURL = 'http://taskcluster/queue/v1/task/%(task_id)s'
 def string_to_dict(sid, value):
    parts = sid.split('.')
    def pack(parts):
        if len(parts) == 1:
            return {parts[0]: value}
        elif len(parts):
            return {parts[0]: pack(parts[1:])}
        return parts
    return pack(parts)
 def merge_dicts(*dicts):
    if not reduce(lambda x, y: isinstance(y, dict) and x, dicts, True):
        raise TypeError("Object in *dicts not of type dict")
    if len(dicts) < 2:
        raise ValueError("Requires 2 or more dict objects")
    def merge(a, b):
        for d in set(a.keys()).union(b.keys()):
            if d in a and d in b:
                if type(a[d]) == type(b[d]):
                    if not isinstance(a[d], dict):
                        ret = list({a[d], b[d]})
                        if len(ret) == 1: ret = ret[0]
                        yield (d, sorted(ret))
                    else:
                        yield (d, dict(merge(a[d], b[d])))
                else:
                    raise TypeError("Conflicting key:value type assignment", type(a[d]), a[d], type(b[d]), b[d])
            elif d in a:
                yield (d, a[d])
            elif d in b:
                yield (d, b[d])
            else:
                raise KeyError
    return reduce(lambda x, y: dict(merge(x, y)), dicts[1:], dicts[0])
 def taskcluster_event_context():
    das_context = {}
    # Pre-filterting
    for k in os.environ.keys():
        if k == 'GITHUB_HEAD_USER':
            os.environ['GITHUB_HEAD_USER_LOGIN'] = os.environ[k]
            del os.environ['GITHUB_HEAD_USER']
    for k in os.environ.keys():
        if k == 'TASK_ID':
            parts = string_to_dict('taskcluster.taskGroupId', os.environ[k])
            das_context = merge_dicts(das_context, parts)
        if k.startswith('GITHUB_'):
            parts = string_to_dict(k.lower().replace('_', '.').replace('github', 'event'), os.environ[k])
            das_context = merge_dicts(das_context, parts)
    return das_context
 def load_specific_contextFile(file):
    specific_context = {}
    try:
        with open(os.path.join(TASKS_ROOT, file)) as src:
            specific_context = yaml.load(src)
        if specific_context is None:
            specific_context = {}
    except FileNotFoundError:
        specific_context = {}
    return specific_context
 def defaultValues_build_context():
    return load_specific_contextFile('.build.yml')
 def shared_context():
    return load_specific_contextFile('.shared.yml')
 def create_task_payload(build, base_context):
    print('build', build)
    build_type = os.path.splitext(os.path.basename(build))[0]
    build_context = defaultValues_build_context()
    with open(build) as src:
        build_context['build'].update(yaml.load(src)['build'])
    # Be able to use what has been defined in base_context
    # e.g., the {${event.head.branch}}
    build_context = jsone.render(build_context, base_context)
    template_context = {
        'taskcluster': {
            'taskId': as_slugid(build_type)
        },
        'build_type': build_type
    }
    with open(os.path.join(TASKS_ROOT, build_context['build']['template_file'])) as src:
        template = yaml.load(src)
    contextes = merge_dicts({}, base_context, template_context, build_context)
    for one_context in glob(os.path.join(TASKS_ROOT, '*.cyml')):
        with open(one_context) as src:
            contextes = merge_dicts(contextes, yaml.load(src))
    return jsone.render(template, contextes)
 def send_task(t):
    url = TASKCLUSTER_API_BASEURL % {'task_id': t['taskId']}
    del t['taskId']
    r = requests.put(url, json=t)
    print(url, r.status_code)
    if r.status_code != requests.codes.ok:
        print(json.dumps(t, indent=2))
        print(r.content)
        print(json.loads(r.content.decode())['message'])
    return r.status_code == requests.codes.ok
 slugids = {}
 def as_slugid(name):
    if name not in slugids:
        slugids[name] = slugid.nice().decode()
        print('cache miss', name, slugids[name])
    else:
        print('cache hit', name, slugids[name])
    return slugids[name]
 def to_int(x):
    return int(x)
 def functions_context():
    return {
        'as_slugid': as_slugid,
        'to_int': to_int
    }
 def is_dry_run():
    return (len(sys.argv) > 1) and (sys.argv[1] == '--dry')
 def should_run():
    # Make a quick clone to fetch the last commit
    try:
        subprocess.check_call([
            'git', 'clone', '--quiet', '-b', os.environ.get('GITHUB_HEAD_BRANCH'),
            '--single-branch', os.environ.get('GITHUB_HEAD_REPO_URL'),
            '--depth=1', '/tmp/ds-clone/'
        ], env={'GIT_LFS_SKIP_SMUDGE': '1'})
    except subprocess.CalledProcessError as e:
        print("Error while git cloning:", e, file=sys.stderr)
        return False
    try:
        git_msg = subprocess.check_output([
            'git', '--git-dir=/tmp/ds-clone/.git/',
            'log', '--format=%b', '-n', '1',
            os.environ.get('GITHUB_HEAD_SHA')
        ]).decode('utf-8').strip().upper()
    except subprocess.CalledProcessError as e:
        print("Error while git show:", e, file=sys.stderr)
        return False
    print('Commit message:', git_msg)
    x_deepspeech = filter(lambda x: 'X-DEEPSPEECH:' in x, git_msg.split('\n'))
    if len(list(filter(lambda x: 'NOBUILD' in x, x_deepspeech))) == 1:
        print('Not running anything according to commit message')
        return False
    return True
 if __name__ == '__main__':
    if not is_dry_run():
        # We might want to NOT run in some cases
        if not should_run():
            sys.exit(0)
    base_context = taskcluster_event_context()
    base_context = merge_dicts(base_context, functions_context())
    base_context = merge_dicts(base_context, shared_context())
    root_task = base_context['taskcluster']['taskGroupId']
    tasks_graph = nx.DiGraph()
    tasks = {}
    for build in glob(os.path.join(TASKS_ROOT, '*.yml')):
        t = create_task_payload(build, base_context)
        # We allow template to produce completely empty output
        if not t:
            continue
        if 'dependencies' in t and len(t['dependencies']) > 0:
            for dep in t['dependencies']:
                tasks_graph.add_edge(t['taskId'], dep)
        else:
            tasks_graph.add_edge(t['taskId'], root_task)
        tasks[t['taskId']] = t
    for task in nx.dfs_postorder_nodes(tasks_graph):
        # root_task is the task group and also the task id that is already
        # running, so we don't have to schedule that
        if task == root_task:
            continue
        t = tasks[task]
        if is_dry_run():
            print(json.dumps(t, indent=2))
            continue
        p = send_task(t)
        if not p:
            sys.exit(1)
--- a/taskcluster/tc-decision_reqs.txt
+++ b/taskcluster/tc-decision_reqs.txt
@ -0,0 +1,5 @@
 json-e == 2.3.1
 networkx 
 pyaml
 requests
 slugid == 1.0.7
--- a/taskcluster/tc-package.sh
+++ b/taskcluster/tc-package.sh
@ -24,6 +24,7 @@ package_native_client()
  ${TAR} -cf - \
    -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
    -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so.if.lib \
    -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \
    -C ${deepspeech_dir}/ LICENSE \
    -C ${deepspeech_dir}/native_client/ deepspeech${PLATFORM_EXE_SUFFIX} \
    -C ${deepspeech_dir}/native_client/ deepspeech.h \
@ -34,6 +35,7 @@ package_native_client()
 package_native_client_ndk()
 {
  deepspeech_dir=${DS_DSDIR}
  tensorflow_dir=${DS_TFDIR}
  artifacts_dir=${TASKCLUSTER_ARTIFACTS}
  artifact_name=$1
  arch_abi=$2
@ -56,6 +58,7 @@ package_native_client_ndk()
  tar -cf - \
    -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ deepspeech \
    -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libdeepspeech.so \
    -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \
    -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libc++_shared.so \
    -C ${deepspeech_dir}/native_client/ deepspeech.h \
    -C ${deepspeech_dir}/ LICENSE \
--- a/taskcluster/tc-schedule.sh
+++ b/taskcluster/tc-schedule.sh
@ -2,15 +2,13 @@
 set -ex
-# tc-decision.py assumes being at the root folder
+curdir=$(dirname "$0")/
 curdir=$(dirname "$0")/..
 pip3 install --quiet --user --upgrade pip
 export PATH=$HOME/.local/bin/:$PATH
-curl -L --silent https://raw.githubusercontent.com/lissyx/taskcluster-github-decision/${TC_DECISION_SHA}/requirements.txt | pip3 install --quiet --user --upgrade -r /dev/stdin
+pip3 install --quiet --user --upgrade -r ${curdir}/tc-decision_reqs.txt
 curl -L --silent https://raw.githubusercontent.com/lissyx/taskcluster-github-decision/${TC_DECISION_SHA}/tc-decision.py > ${curdir}/tc-decision.py
 # First, perform dry run for push and pull request
 # This should help us track merge failures in advance
--- a/taskcluster/test-win-cuda-opt-base.tyml
+++ b/taskcluster/test-win-cuda-opt-base.tyml
@ -38,8 +38,6 @@ then:
        "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
        "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
      - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0"
      - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}"
      - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm"
      - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm"
      - $let:
          extraSystemSetup: { $eval: strip(str(build.system_setup)) }
@ -69,10 +67,6 @@ then:
        content:
          sha256: ${system.msys2.sha}
          url: ${system.msys2.url}
      - file: filesystem-2020.02-3-x86_64.pkg.tar.xz
        content:
          sha256: ${system.msys2_filesystem_pkg.sha}
          url: ${system.msys2_filesystem_pkg.url}
      - file: pyenv.tar.gz
        content:
          url: ${system.pyenv.win.url}
--- a/taskcluster/test-win-opt-base.tyml
+++ b/taskcluster/test-win-opt-base.tyml
@ -40,8 +40,6 @@ then:
        "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
        "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
      - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0"
      - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}"
      - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm"
      - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm"
      - $let:
          extraSystemSetup: { $eval: strip(str(build.system_setup)) }
@ -71,10 +69,6 @@ then:
        content:
          sha256: ${system.msys2.sha}
          url: ${system.msys2.url}
      - file: filesystem-2020.02-3-x86_64.pkg.tar.xz
        content:
          sha256: ${system.msys2_filesystem_pkg.sha}
          url: ${system.msys2_filesystem_pkg.url}
      - file: pyenv.tar.gz
        content:
          url: ${system.pyenv.win.url}
--- a/Show More
+++ b/Show More