Merge pull request #3050 from mozilla/update-r0.8

Update r0.8
2020-06-08 15:53:21 +02:00 · 2020-06-08 15:53:21 +02:00 · 7024451938
commit 7024451938
parent 29ec97b2ce c6efdc6d01
36 changed files with 234 additions and 98 deletions
--- a/15
+++ b/15
@ -194,16 +194,17 @@ WORKDIR /DeepSpeech/
 RUN pip3 install tensorflow-gpu==1.15.0


-# Make DeepSpeech and install Python bindings
+# Build client.cc and install Python client and decoder bindings
 ENV TFDIR /tensorflow
 WORKDIR /DeepSpeech/native_client
 RUN make deepspeech
-WORKDIR /DeepSpeech/native_client/python
-RUN make bindings
-RUN pip3 install --upgrade dist/deepspeech*
-WORKDIR /DeepSpeech/native_client/ctcdecode
-RUN make bindings
-RUN pip3 install --upgrade dist/*.whl
+
+WORKDIR /DeepSpeech
+RUN cd native_client/python && make bindings
+RUN pip3 install --upgrade native_client/python/dist/*.whl
+
+RUN cd native_client/ctcdecode && make bindings
+RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl


 # << END Build and bind
--- a/1
+++ b/1
@ -1 +0,0 @@
-6
--- a/1
+++ b/1
@ -0,0 +1 @@
+training/deepspeech_training/GRAPH_VERSION
--- a/1
+++ b/1
@ -1 +0,0 @@
-0.8.0-alpha.2
--- a/1
+++ b/1
@ -0,0 +1 @@
+training/deepspeech_training/VERSION
--- a/bin/import_cv.py
+++ b/bin/import_cv.py
@ -127,7 +127,7 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv):
    pool.join()

    print('Writing "%s"...' % target_csv)
-    with open(target_csv, "w") as target_csv_file:
+    with open(target_csv, "w", encoding="utf-8", newline="") as target_csv_file:
        writer = csv.DictWriter(target_csv_file, fieldnames=FIELDNAMES)
        writer.writeheader()
        bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
--- a/bin/import_cv2.py
+++ b/bin/import_cv2.py
@ -127,7 +127,7 @@ def _maybe_convert_set(dataset, tsv_dir, audio_dir, filter_obj, space_after_ever

    output_csv = os.path.join(os.path.abspath(audio_dir), dataset + ".csv")
    print("Saving new DeepSpeech-formatted CSV file to: ", output_csv)
-    with open(output_csv, "w", encoding="utf-8") as output_csv_file:
+    with open(output_csv, "w", encoding="utf-8", newline="") as output_csv_file:
        print("Writing CSV file for DeepSpeech.py as: ", output_csv)
        writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
        writer.writeheader()
--- a/bin/import_lingua_libre.py
+++ b/bin/import_lingua_libre.py
@ -136,9 +136,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
    pool.close()
    pool.join()

-    with open(target_csv_template.format("train"), "w") as train_csv_file:  # 80%
-        with open(target_csv_template.format("dev"), "w") as dev_csv_file:  # 10%
-            with open(target_csv_template.format("test"), "w") as test_csv_file:  # 10%
+    with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file:  # 80%
+        with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file:  # 10%
+            with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file:  # 10%
                train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
                train_writer.writeheader()
                dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
--- a/bin/import_m-ailabs.py
+++ b/bin/import_m-ailabs.py
@ -137,9 +137,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
    pool.close()
    pool.join()

-    with open(target_csv_template.format("train"), "w") as train_csv_file:  # 80%
-        with open(target_csv_template.format("dev"), "w") as dev_csv_file:  # 10%
-            with open(target_csv_template.format("test"), "w") as test_csv_file:  # 10%
+    with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file:  # 80%
+        with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file:  # 10%
+            with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file:  # 10%
                train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
                train_writer.writeheader()
                dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
--- a/bin/import_slr57.py
+++ b/bin/import_slr57.py
@ -159,9 +159,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
    pool.close()
    pool.join()

-    with open(target_csv_template.format("train"), "w") as train_csv_file:  # 80%
-        with open(target_csv_template.format("dev"), "w") as dev_csv_file:  # 10%
-            with open(target_csv_template.format("test"), "w") as test_csv_file:  # 10%
+    with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file:  # 80%
+        with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file:  # 10%
+            with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file:  # 10%
                train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
                train_writer.writeheader()
                dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
--- a/bin/import_swc.py
+++ b/bin/import_swc.py
@ -461,7 +461,7 @@ def write_csvs(samples, language):
        base_dir = os.path.abspath(CLI_ARGS.base_dir)
        csv_path = os.path.join(base_dir, language + "-" + sub_set + ".csv")
        print('Writing "{}"...'.format(csv_path))
-        with open(csv_path, "w") as csv_file:
+        with open(csv_path, "w", encoding="utf-8", newline="") as csv_file:
            writer = csv.DictWriter(
                csv_file, fieldnames=FIELDNAMES_EXT if CLI_ARGS.add_meta else FIELDNAMES
            )
--- a/bin/import_ts.py
+++ b/bin/import_ts.py
@ -131,9 +131,9 @@ def _maybe_convert_sets(target_dir, extracted_data, english_compatible=False):
    pool.close()
    pool.join()

-    with open(target_csv_template.format("train"), "w") as train_csv_file:  # 80%
-        with open(target_csv_template.format("dev"), "w") as dev_csv_file:  # 10%
-            with open(target_csv_template.format("test"), "w") as test_csv_file:  # 10%
+    with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file:  # 80%
+        with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file:  # 10%
+            with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file:  # 10%
                train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
                train_writer.writeheader()
                dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
--- a/bin/import_tuda.py
+++ b/bin/import_tuda.py
@ -111,7 +111,7 @@ def write_csvs(extracted):
            CLI_ARGS.base_dir, "tuda-{}-{}.csv".format(TUDA_VERSION, sub_set)
        )
        print('Writing "{}"...'.format(csv_path))
-        with open(csv_path, "w") as csv_file:
+        with open(csv_path, "w", encoding="utf-8", newline="") as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=FIELDNAMES)
            writer.writeheader()
            set_dir = os.path.join(extracted, sub_set)
--- a/doc/Makefile
+++ b/doc/Makefile
@ -14,34 +14,23 @@ PIP_INSTALL   ?= pip3 install --user
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

-.PHONY: help pip3 npm Makefile doxygen-c doxygen-java
-
-doxygen-c:
-	cd ../ && doxygen doc/doxygen-c.conf
-
-doxygen-java:
-	cd ../ && doxygen doc/doxygen-java.conf
-
-doxygen-dotnet:
-	cd ../ && doxygen doc/doxygen-dotnet.conf
+.PHONY: help pip3 Makefile

 pip3:
 	$(PIP_INSTALL) -r ../taskcluster/docs-requirements.txt

-npm:
-	npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x
-
 submodule:
 	git submodule update --init --remote

 # Add submodule update dependency to Sphinx's "html" target
-html: Makefile submodule pip3 npm doxygen-c doxygen-java doxygen-dotnet
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+html: Makefile submodule pip3
+	@PATH=$$HOME/.local/bin:`pwd`/../node_modules/.bin/:$$PATH \
+	     $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

 dist: html
 	cd $(BUILDDIR)/html/ && zip -r9 ../../html.zip *

 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile pip3 npm doxygen-c doxygen-java doxygen-dotnet
+%: Makefile pip3
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/doc/USING.rst
+++ b/doc/USING.rst
@ -37,8 +37,8 @@ If you want to use the pre-trained English model for performing speech-to-text,

 .. code-block:: bash

-   wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.pbmm
-   wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.scorer
+   wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm
+   wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer

 Model compatibility
 ^^^^^^^^^^^^^^^^^^^
@ -113,7 +113,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett

 .. code-block:: bash

-   deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio my_audio_file.wav
+   deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio my_audio_file.wav

 The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio.

@ -177,7 +177,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett

 .. code-block:: bash

-   ./deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio audio_input.wav
+   ./deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio_input.wav

 See the help output with ``./deepspeech -h`` for more details.

--- a/doc/conf.py
+++ b/doc/conf.py
@ -26,12 +26,14 @@ sys.path.insert(0, os.path.abspath('../'))

 autodoc_mock_imports = ['deepspeech']

-read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
-if read_the_docs_build:
-    import subprocess
-    subprocess.call('cd ../ && doxygen doc/doxygen-c.conf', shell=True)
-    subprocess.call('cd ../ && doxygen doc/doxygen-java.conf', shell=True)
-    subprocess.call('cd ../ && doxygen doc/doxygen-dotnet.conf', shell=True)
+# This is in fact only relevant on ReadTheDocs, but we want to run the same way
+# on our CI as in RTD to avoid regressions on RTD that we would not catch on
+# TaskCluster
+import subprocess
+subprocess.check_call('cd ../ && npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x', shell=True)
+subprocess.check_call('cd ../ && doxygen doc/doxygen-c.conf', shell=True)
+subprocess.check_call('cd ../ && doxygen doc/doxygen-java.conf', shell=True)
+subprocess.check_call('cd ../ && doxygen doc/doxygen-dotnet.conf', shell=True)

 # -- General configuration ------------------------------------------------

--- a/doc/index.rst
+++ b/doc/index.rst
@ -20,15 +20,15 @@ To install and use DeepSpeech all you have to do is:
   pip3 install deepspeech

   # Download pre-trained English model files
-   curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.pbmm
-   curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.scorer
+   curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm
+   curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer

   # Download example audio files
-   curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/audio-0.7.0.tar.gz
-   tar xvf audio-0.7.0.tar.gz
+   curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/audio-0.7.3.tar.gz
+   tar xvf audio-0.7.3.tar.gz

   # Transcribe an audio file
-   deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio audio/2830-3980-0043.wav
+   deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav

 A pre-trained English model is available for use and can be downloaded following the instructions in :ref:`the usage docs <usage-docs>`. For the latest release, including pre-trained models and checkpoints, `see the GitHub releases page <https://github.com/mozilla/DeepSpeech/releases/latest>`_.

@ -44,7 +44,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th
   pip3 install deepspeech-gpu

   # Transcribe an audio file.
-   deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio audio/2830-3980-0043.wav
+   deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav

 Please ensure you have the required :ref:`CUDA dependencies <cuda-deps>`.

--- a/native_client/bazel_workspace_status_cmd.sh
+++ b/native_client/bazel_workspace_status_cmd.sh
@ -22,8 +22,8 @@ echo "STABLE_TF_GIT_VERSION ${tf_git_rev}"
 pushd $(dirname "$0")
 ds_git_rev=$(git describe --long --tags)
 echo "STABLE_DS_GIT_VERSION ${ds_git_rev}"
-ds_version=$(cat ../VERSION)
+ds_version=$(cat ../training/deepspeech_training/VERSION)
 echo "STABLE_DS_VERSION ${ds_version}"
-ds_graph_version=$(cat ../GRAPH_VERSION)
+ds_graph_version=$(cat ../training/deepspeech_training/GRAPH_VERSION)
 echo "STABLE_DS_GRAPH_VERSION ${ds_graph_version}"
 popd
--- a/native_client/ctcdecode/setup.py
+++ b/native_client/ctcdecode/setup.py
@ -51,7 +51,7 @@ def maybe_rebuild(srcs, out_name, build_dir):
                     num_parallel=known_args.num_processes,
                     debug=debug)

-project_version = read('../../VERSION').strip()
+project_version = read('../../training/deepspeech_training/VERSION').strip()

 build_dir = 'temp_build/temp_build'

--- a/native_client/ctcdecode/swigwrapper.i
+++ b/native_client/ctcdecode/swigwrapper.i
@ -43,7 +43,13 @@ namespace std {
 %template(OutputVectorVector) std::vector<std::vector<Output>>;

 // Import only the error code enum definitions from deepspeech.h
-%ignore "";
-%rename("%s", regexmatch$name="DS_ERR_") "";
-%rename("%s", regexmatch$name="DeepSpeech_Error_Codes") "";
+// We can't just do |%ignore "";| here because it affects this file globally (even
+// files %include'd above). That causes SWIG to lose destructor information and
+// leads to leaks of the wrapper objects.
+// Instead we ignore functions and classes (structs), which are the only other
+// things in deepspeech.h. If we add some new construct to deepspeech.h we need
+// to update the ignore rules here to avoid exposing unwanted APIs in the decoder
+// package.
+%rename("$ignore", %$isfunction) "";
+%rename("$ignore", %$isclass) "";
 %include "../deepspeech.h"
--- a/native_client/javascript/Makefile
+++ b/native_client/javascript/Makefile
@ -3,7 +3,7 @@ NODE_ABI_TARGET    ?=
 NODE_BUILD_VERBOSE ?= --verbose
 NPM_TOOL           ?= npm
 PROJECT_NAME       ?= deepspeech
-PROJECT_VERSION    ?= $(shell cat ../../VERSION | tr -d '\n')
+PROJECT_VERSION    ?= $(shell cat ../../training/deepspeech_training/VERSION | tr -d '\n')
 NPM_ROOT           ?= $(shell npm root)

 NODE_MODULES_BIN   ?= $(NPM_ROOT)/.bin/
--- a/native_client/python/setup.py
+++ b/native_client/python/setup.py
@ -31,7 +31,7 @@ def main():
        sys.argv.remove('--project_name')
        sys.argv.pop(project_name_idx)

-    with open('../../VERSION', 'r') as ver:
+    with open('../../training/deepspeech_training/VERSION', 'r') as ver:
        project_version = ver.read().strip()

    class BuildExtFirst(build):
--- a/setup.py
+++ b/setup.py
@ -7,20 +7,8 @@ from pkg_resources import parse_version
 from setuptools import find_packages, setup


-def get_decoder_pkg_url(version, artifacts_root=None):
-    is_arm = 'arm' in platform.machine()
-    is_mac = 'darwin' in sys.platform
-    is_win = 'win32' in sys.platform
-    is_64bit = sys.maxsize > (2**31 - 1)
-
-    if is_arm:
-        tc_arch = 'arm64-ctc' if is_64bit else 'arm-ctc'
-    elif is_mac:
-        tc_arch = 'osx-ctc'
-    elif is_win:
-        tc_arch = 'win-ctc'
-    else:
-        tc_arch = 'cpu-ctc'
+def get_tc_decoder_pkg_url(version, artifacts_root):
+    assert artifacts_root

    ds_version = parse_version(version)
    branch = "v{}".format(version)
@ -37,20 +25,15 @@ def get_decoder_pkg_url(version, artifacts_root=None):
    if plat == 'windows':
        plat = 'win'

-    is_ucs2 = sys.maxunicode < 0x10ffff
-    m_or_mu = 'mu' if is_ucs2 else 'm'
-
    # ABI does not contain m / mu anymore after Python 3.8
    if sys.version_info.major == 3 and sys.version_info.minor >= 8:
        m_or_mu = ''
+    else:
+        is_ucs2 = sys.maxunicode < 0x10ffff
+        m_or_mu = 'mu' if is_ucs2 else 'm'

    pyver = ''.join(str(i) for i in sys.version_info[0:2])

-    if not artifacts_root:
-        artifacts_root = 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.{branch_name}.{tc_arch_string}/artifacts/public'.format(
-            branch_name=branch,
-            tc_arch_string=tc_arch)
-
    return 'ds_ctcdecoder @ {artifacts_root}/ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl'.format(
        artifacts_root=artifacts_root,
        ds_version=ds_version,
@ -66,8 +49,6 @@ def main():
    with open(str(version_file)) as fin:
        version = fin.read().strip()

-    decoder_pkg_url = get_decoder_pkg_url(version)
-
    install_requires_base = [
        'tensorflow == 1.15.2',
        'numpy',
@ -89,18 +70,22 @@ def main():
        'soundfile',
    ]

+    decoder_pypi_dep = [
+        'ds_ctcdecoder == {}'.format(version)
+    ]
+
    # Due to pip craziness environment variables are the only consistent way to
    # get options into this script when doing `pip install`.
    tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '')
    if tc_decoder_artifacts_root:
        # We're running inside the TaskCluster environment, override the decoder
        # package URL with the one we just built.
-        decoder_pkg_url = get_decoder_pkg_url(version, tc_decoder_artifacts_root)
+        decoder_pkg_url = get_tc_decoder_pkg_url(version, tc_decoder_artifacts_root)
        install_requires = install_requires_base + [decoder_pkg_url]
    elif os.environ.get('DS_NODECODER', ''):
        install_requires = install_requires_base
    else:
-        install_requires = install_requires_base + [decoder_pkg_url]
+        install_requires = install_requires_base + decoder_pypi_dep

    setup(
        name='deepspeech_training',
--- a/taskcluster/docs-build.sh
+++ b/taskcluster/docs-build.sh
@ -5,6 +5,5 @@ set -xe
 THIS=$(dirname "$0")

 pushd ${THIS}/../
-  export PATH=$HOME/.local/bin:${THIS}/../doc/node_modules/.bin/:$PATH
  make -C doc/ dist
 popd
--- a/taskcluster/scriptworker-task-github.yml
+++ b/taskcluster/scriptworker-task-github.yml
@ -49,7 +49,9 @@ build:
      - "node-package-tflite"
    cpp:
      - "darwin-amd64-cpu-opt"
+      - "darwin-amd64-tflite-opt"
      - "linux-amd64-cpu-opt"
+      - "linux-amd64-tflite-opt"
      - "linux-amd64-gpu-opt"
      - "linux-rpi3-cpu-opt"
      - "linux-arm64-cpu-opt"
--- a/taskcluster/scriptworker-task-pypi.yml
+++ b/taskcluster/scriptworker-task-pypi.yml
@ -5,13 +5,16 @@ build:
    # - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
    - "darwin-amd64-cpu-opt"
    - "darwin-amd64-tflite-opt"
+    - "darwin-amd64-ctc-opt"
    - "linux-amd64-cpu-opt"
    - "linux-amd64-tflite-opt"
    - "linux-amd64-gpu-opt"
+    - "linux-amd64-ctc-opt"
    - "linux-rpi3-cpu-opt"
    - "win-amd64-cpu-opt"
    - "win-amd64-tflite-opt"
    - "win-amd64-gpu-opt"
+    - "win-amd64-ctc-opt"
  allowed:
    - "tag"
  ref_match: "refs/tags/"
@ -24,13 +27,16 @@ build:
    python:
      - "darwin-amd64-cpu-opt"
      - "darwin-amd64-tflite-opt"
+      - "darwin-amd64-ctc-opt"
      - "linux-amd64-cpu-opt"
      - "linux-amd64-tflite-opt"
      - "linux-amd64-gpu-opt"
+      - "linux-amd64-ctc-opt"
      - "linux-rpi3-cpu-opt"
      - "win-amd64-cpu-opt"
      - "win-amd64-tflite-opt"
      - "win-amd64-gpu-opt"
+      - "win-amd64-ctc-opt"
      # - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
    nuget: []
  metadata:
--- a/taskcluster/tc-all-vars.sh
+++ b/taskcluster/tc-all-vars.sh
@ -57,7 +57,7 @@ export DS_TFDIR=${DS_ROOT_TASK}/DeepSpeech/tf
 export DS_DSDIR=${DS_ROOT_TASK}/DeepSpeech/ds
 export DS_EXAMPLEDIR=${DS_ROOT_TASK}/DeepSpeech/examples

-export DS_VERSION="$(cat ${DS_DSDIR}/VERSION)"
+export DS_VERSION="$(cat ${DS_DSDIR}/training/deepspeech_training/VERSION)"

 export GRADLE_USER_HOME=${DS_ROOT_TASK}/gradle-cache
 export ANDROID_SDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/SDK/
--- a/taskcluster/tc-train-tests.sh
+++ b/taskcluster/tc-train-tests.sh
@ -8,6 +8,13 @@ extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pya

 bitrate=$2

+decoder_src=$3
+
+if [ "$decoder_src" = "--pypi" ]; then
+    # Disable automatically picking up decoder package built in this CI group
+    export DECODER_ARTIFACTS_ROOT=""
+fi
+
 mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
 mkdir -p /tmp/train || true
 mkdir -p /tmp/train_tflite || true
--- a/taskcluster/test-linux-opt-tag-base.tyml
+++ b/taskcluster/test-linux-opt-tag-base.tyml
@ -0,0 +1,65 @@
+$if: '(event.event in build.allowed) && ((event.event != "tag") || (build.ref_match in event.head.ref))'
+then:
+  taskId: ${taskcluster.taskId}
+  provisionerId: ${taskcluster.docker.provisionerId}
+  workerType: ${taskcluster.docker.workerType}
+  taskGroupId: ${taskcluster.taskGroupId}
+  schedulerId: ${taskcluster.schedulerId}
+  dependencies:
+    $map: { $eval: build.dependencies }
+    each(b):
+      $eval: as_slugid(b)
+  created: { $fromNow: '0 sec' }
+  deadline: { $fromNow: '1 day' }
+  expires: { $fromNow: '7 days' }
+
+  extra:
+    github:
+      { $eval: taskcluster.github_events.pull_request }
+
+  payload:
+    maxRunTime: { $eval: to_int(build.maxRunTime) }
+    image: ${build.docker_image}
+
+    env:
+      $let:
+        training: { $eval: as_slugid(build.test_model_task) }
+        linux_amd64_build: { $eval: as_slugid("linux-amd64-cpu-opt") }
+        linux_amd64_tflite: { $eval: as_slugid("linux-amd64-tflite-opt") }
+        linux_amd64_ctc: { $eval: as_slugid("linux-amd64-ctc-opt") }
+      in:
+        DEEPSPEECH_ARTIFACTS_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_build}/artifacts/public
+        DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_tflite}/artifacts/public
+        DEEPSPEECH_TEST_MODEL: https://community-tc.services.mozilla.com/api/queue/v1/task/${training}/artifacts/public/output_graph.pb
+        DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb
+        DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm
+        DECODER_ARTIFACTS_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_ctc}/artifacts/public
+        PIP_DEFAULT_TIMEOUT: "60"
+        EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
+
+    command:
+      - "/bin/bash"
+      - "--login"
+      - "-cxe"
+      - $let:
+          extraSystemSetup: { $eval: strip(str(build.system_setup)) }
+        in: >
+          ${aptEc2Mirrors} &&
+          apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox sudo wget && ${extraSystemSetup} &&
+          adduser --system --home ${system.homedir.linux} ${system.username} &&
+          cd ${system.homedir.linux} &&
+          echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}&& mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf -" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
+          sudo -H -u ${system.username} /bin/bash /tmp/clone.sh &&
+          sudo -H -u ${system.username} --preserve-env /bin/bash ${build.args.tests_cmdline}
+
+    artifacts:
+      "public":
+        type: "directory"
+        path: "/tmp/artifacts/"
+        expires: { $fromNow: '7 days' }
+
+  metadata:
+    name: ${build.metadata.name}
+    description: ${build.metadata.description}
+    owner: ${event.head.user.email}
+    source: ${event.head.repo.url}
--- a/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml
+++ b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml
@ -0,0 +1,15 @@
+build:
+  template_file: test-linux-opt-tag-base.tyml
+  dependencies:
+    - "scriptworker-task-pypi"
+  allowed:
+    - "tag"
+  ref_match: "refs/tags/"
+  system_setup:
+    >
+      apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
+  args:
+    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k --pypi"
+  metadata:
+    name: "DeepSpeech Linux AMD64 CPU 16kHz PyPI training Py3.5"
+    description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version, decoder package from PyPI"
--- a/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml
+++ b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml
@ -0,0 +1,15 @@
+build:
+  template_file: test-linux-opt-tag-base.tyml
+  dependencies:
+    - "scriptworker-task-pypi"
+  allowed:
+    - "tag"
+  ref_match: "refs/tags/"
+  system_setup:
+    >
+      apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
+  args:
+    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k --pypi"
+  metadata:
+    name: "DeepSpeech Linux AMD64 CPU 16kHz PyPI training Py3.6"
+    description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version, decoder package from PyPI"
--- a/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml
+++ b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml
@ -0,0 +1,15 @@
+build:
+  template_file: test-linux-opt-tag-base.tyml
+  dependencies:
+    - "scriptworker-task-pypi"
+  allowed:
+    - "tag"
+  ref_match: "refs/tags/"
+  system_setup:
+    >
+      apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
+  args:
+    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k --pypi"
+  metadata:
+    name: "DeepSpeech Linux AMD64 CPU 16kHz PyPI training Py3.7"
+    description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version, decoder package from PyPI"
--- a/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml
+++ b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml
@ -0,0 +1,15 @@
+build:
+  template_file: test-linux-opt-tag-base.tyml
+  dependencies:
+    - "scriptworker-task-pypi"
+  allowed:
+    - "tag"
+  ref_match: "refs/tags/"
+  system_setup:
+    >
+      apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
+  args:
+    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k --pypi"
+  metadata:
+    name: "DeepSpeech Linux AMD64 CPU 8kHz PyPI training Py3.6"
+    description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version, decoder package from PyPI"
--- a/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml
+++ b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml
@ -0,0 +1,15 @@
+build:
+  template_file: test-linux-opt-tag-base.tyml
+  dependencies:
+    - "scriptworker-task-pypi"
+  allowed:
+    - "tag"
+  ref_match: "refs/tags/"
+  system_setup:
+    >
+      apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
+  args:
+    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k  --pypi"
+  metadata:
+    name: "DeepSpeech Linux AMD64 CPU 8kHz PyPI training Py3.7"
+    description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version, decoder package from PyPI"
--- a/training/deepspeech_training/GRAPH_VERSION
+++ b/training/deepspeech_training/GRAPH_VERSION
@ -1 +0,0 @@
-../../GRAPH_VERSION
--- a/training/deepspeech_training/GRAPH_VERSION
+++ b/training/deepspeech_training/GRAPH_VERSION
@ -0,0 +1 @@
+6
--- a/training/deepspeech_training/VERSION
+++ b/training/deepspeech_training/VERSION
@ -1 +0,0 @@
-../../VERSION
--- a/training/deepspeech_training/VERSION
+++ b/training/deepspeech_training/VERSION
@ -0,0 +1 @@
+0.8.0-alpha.2
--- a/training/deepspeech_training/evaluate.py
+++ b/training/deepspeech_training/evaluate.py
@ -61,7 +61,6 @@ def evaluate(test_csvs, create_model):
    # One rate per layer
    no_dropout = [None] * 6
    logits, _ = create_model(batch_x=batch_x,
-                             batch_size=FLAGS.test_batch_size,
                             seq_length=batch_x_len,
                             dropout=no_dropout)

--- a/training/deepspeech_training/util/feeding.py
+++ b/training/deepspeech_training/util/feeding.py
@ -151,7 +151,7 @@ def create_dataset(sources,
                              .map(process_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE))
    if enable_cache:
        dataset = dataset.cache(cache_path)
-    dataset = (dataset.window(batch_size, drop_remainder=True).flat_map(batch_fn)
+    dataset = (dataset.window(batch_size, drop_remainder=train_phase).flat_map(batch_fn)
                      .prefetch(len(Config.available_devices)))
    return dataset

@ -168,7 +168,7 @@ def split_audio_file(audio_path,
        segments = vad_split(frames, aggressiveness=aggressiveness)
        for segment in segments:
            segment_buffer, time_start, time_end = segment
-            samples = pcm_to_np(audio_format, segment_buffer)
+            samples = pcm_to_np(segment_buffer, audio_format)
            yield time_start, time_end, samples

    def to_mfccs(time_start, time_end, samples):