diff --git a/Dockerfile b/Dockerfile index caf58247..8a03b3c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -194,16 +194,17 @@ WORKDIR /DeepSpeech/ RUN pip3 install tensorflow-gpu==1.15.0 -# Make DeepSpeech and install Python bindings +# Build client.cc and install Python client and decoder bindings ENV TFDIR /tensorflow WORKDIR /DeepSpeech/native_client RUN make deepspeech -WORKDIR /DeepSpeech/native_client/python -RUN make bindings -RUN pip3 install --upgrade dist/deepspeech* -WORKDIR /DeepSpeech/native_client/ctcdecode -RUN make bindings -RUN pip3 install --upgrade dist/*.whl + +WORKDIR /DeepSpeech +RUN cd native_client/python && make bindings +RUN pip3 install --upgrade native_client/python/dist/*.whl + +RUN cd native_client/ctcdecode && make bindings +RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl # << END Build and bind diff --git a/GRAPH_VERSION b/GRAPH_VERSION deleted file mode 100644 index 1e8b3149..00000000 --- a/GRAPH_VERSION +++ /dev/null @@ -1 +0,0 @@ -6 diff --git a/GRAPH_VERSION b/GRAPH_VERSION new file mode 120000 index 00000000..b9a65815 --- /dev/null +++ b/GRAPH_VERSION @@ -0,0 +1 @@ +training/deepspeech_training/GRAPH_VERSION \ No newline at end of file diff --git a/VERSION b/VERSION deleted file mode 100644 index edcd751a..00000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.8.0-alpha.2 diff --git a/VERSION b/VERSION new file mode 120000 index 00000000..8a3ed242 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +training/deepspeech_training/VERSION \ No newline at end of file diff --git a/bin/import_cv.py b/bin/import_cv.py index e4251c1a..3754694f 100755 --- a/bin/import_cv.py +++ b/bin/import_cv.py @@ -127,7 +127,7 @@ def _maybe_convert_set(extracted_dir, source_csv, target_csv): pool.join() print('Writing "%s"...' % target_csv) - with open(target_csv, "w") as target_csv_file: + with open(target_csv, "w", encoding="utf-8", newline="") as target_csv_file: writer = csv.DictWriter(target_csv_file, fieldnames=FIELDNAMES) writer.writeheader() bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index 3b7e8fba..c2880a06 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -127,7 +127,7 @@ def _maybe_convert_set(dataset, tsv_dir, audio_dir, filter_obj, space_after_ever output_csv = os.path.join(os.path.abspath(audio_dir), dataset + ".csv") print("Saving new DeepSpeech-formatted CSV file to: ", output_csv) - with open(output_csv, "w", encoding="utf-8") as output_csv_file: + with open(output_csv, "w", encoding="utf-8", newline="") as output_csv_file: print("Writing CSV file for DeepSpeech.py as: ", output_csv) writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES) writer.writeheader() diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py index 6992d298..ec5047ba 100755 --- a/bin/import_lingua_libre.py +++ b/bin/import_lingua_libre.py @@ -136,9 +136,9 @@ def _maybe_convert_sets(target_dir, extracted_data): pool.close() pool.join() - with open(target_csv_template.format("train"), "w") as train_csv_file: # 80% - with open(target_csv_template.format("dev"), "w") as dev_csv_file: # 10% - with open(target_csv_template.format("test"), "w") as test_csv_file: # 10% + with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% + with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% + with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py index e303feb3..963b2873 100755 --- a/bin/import_m-ailabs.py +++ b/bin/import_m-ailabs.py @@ -137,9 +137,9 @@ def _maybe_convert_sets(target_dir, extracted_data): pool.close() pool.join() - with open(target_csv_template.format("train"), "w") as train_csv_file: # 80% - with open(target_csv_template.format("dev"), "w") as dev_csv_file: # 10% - with open(target_csv_template.format("test"), "w") as test_csv_file: # 10% + with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% + with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% + with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) diff --git a/bin/import_slr57.py b/bin/import_slr57.py index 1aa0e4d2..11e30fa4 100755 --- a/bin/import_slr57.py +++ b/bin/import_slr57.py @@ -159,9 +159,9 @@ def _maybe_convert_sets(target_dir, extracted_data): pool.close() pool.join() - with open(target_csv_template.format("train"), "w") as train_csv_file: # 80% - with open(target_csv_template.format("dev"), "w") as dev_csv_file: # 10% - with open(target_csv_template.format("test"), "w") as test_csv_file: # 10% + with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% + with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% + with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) diff --git a/bin/import_swc.py b/bin/import_swc.py index d19f7c3e..0c0ceb64 100755 --- a/bin/import_swc.py +++ b/bin/import_swc.py @@ -461,7 +461,7 @@ def write_csvs(samples, language): base_dir = os.path.abspath(CLI_ARGS.base_dir) csv_path = os.path.join(base_dir, language + "-" + sub_set + ".csv") print('Writing "{}"...'.format(csv_path)) - with open(csv_path, "w") as csv_file: + with open(csv_path, "w", encoding="utf-8", newline="") as csv_file: writer = csv.DictWriter( csv_file, fieldnames=FIELDNAMES_EXT if CLI_ARGS.add_meta else FIELDNAMES ) diff --git a/bin/import_ts.py b/bin/import_ts.py index 86f5eab7..e6cdc1e8 100755 --- a/bin/import_ts.py +++ b/bin/import_ts.py @@ -131,9 +131,9 @@ def _maybe_convert_sets(target_dir, extracted_data, english_compatible=False): pool.close() pool.join() - with open(target_csv_template.format("train"), "w") as train_csv_file: # 80% - with open(target_csv_template.format("dev"), "w") as dev_csv_file: # 10% - with open(target_csv_template.format("test"), "w") as test_csv_file: # 10% + with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% + with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% + with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) diff --git a/bin/import_tuda.py b/bin/import_tuda.py index fa48de61..5facd073 100755 --- a/bin/import_tuda.py +++ b/bin/import_tuda.py @@ -111,7 +111,7 @@ def write_csvs(extracted): CLI_ARGS.base_dir, "tuda-{}-{}.csv".format(TUDA_VERSION, sub_set) ) print('Writing "{}"...'.format(csv_path)) - with open(csv_path, "w") as csv_file: + with open(csv_path, "w", encoding="utf-8", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=FIELDNAMES) writer.writeheader() set_dir = os.path.join(extracted, sub_set) diff --git a/doc/Makefile b/doc/Makefile index 05819848..2b53edb8 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -14,34 +14,23 @@ PIP_INSTALL ?= pip3 install --user help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -.PHONY: help pip3 npm Makefile doxygen-c doxygen-java - -doxygen-c: - cd ../ && doxygen doc/doxygen-c.conf - -doxygen-java: - cd ../ && doxygen doc/doxygen-java.conf - -doxygen-dotnet: - cd ../ && doxygen doc/doxygen-dotnet.conf +.PHONY: help pip3 Makefile pip3: $(PIP_INSTALL) -r ../taskcluster/docs-requirements.txt -npm: - npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x - submodule: git submodule update --init --remote # Add submodule update dependency to Sphinx's "html" target -html: Makefile submodule pip3 npm doxygen-c doxygen-java doxygen-dotnet - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +html: Makefile submodule pip3 + @PATH=$$HOME/.local/bin:`pwd`/../node_modules/.bin/:$$PATH \ + $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) dist: html cd $(BUILDDIR)/html/ && zip -r9 ../../html.zip * # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile pip3 npm doxygen-c doxygen-java doxygen-dotnet +%: Makefile pip3 @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/USING.rst b/doc/USING.rst index 57ee279d..bafbc46f 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -37,8 +37,8 @@ If you want to use the pre-trained English model for performing speech-to-text, .. code-block:: bash - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.pbmm - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.scorer + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer Model compatibility ^^^^^^^^^^^^^^^^^^^ @@ -113,7 +113,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio my_audio_file.wav + deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio my_audio_file.wav The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio. @@ -177,7 +177,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - ./deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio audio_input.wav + ./deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio_input.wav See the help output with ``./deepspeech -h`` for more details. diff --git a/doc/conf.py b/doc/conf.py index 3557c586..bb64d77e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -26,12 +26,14 @@ sys.path.insert(0, os.path.abspath('../')) autodoc_mock_imports = ['deepspeech'] -read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' -if read_the_docs_build: - import subprocess - subprocess.call('cd ../ && doxygen doc/doxygen-c.conf', shell=True) - subprocess.call('cd ../ && doxygen doc/doxygen-java.conf', shell=True) - subprocess.call('cd ../ && doxygen doc/doxygen-dotnet.conf', shell=True) +# This is in fact only relevant on ReadTheDocs, but we want to run the same way +# on our CI as in RTD to avoid regressions on RTD that we would not catch on +# TaskCluster +import subprocess +subprocess.check_call('cd ../ && npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x', shell=True) +subprocess.check_call('cd ../ && doxygen doc/doxygen-c.conf', shell=True) +subprocess.check_call('cd ../ && doxygen doc/doxygen-java.conf', shell=True) +subprocess.check_call('cd ../ && doxygen doc/doxygen-dotnet.conf', shell=True) # -- General configuration ------------------------------------------------ diff --git a/doc/index.rst b/doc/index.rst index 6eca9ca3..008cbaa2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,15 +20,15 @@ To install and use DeepSpeech all you have to do is: pip3 install deepspeech # Download pre-trained English model files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.pbmm - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/deepspeech-0.7.0-models.scorer + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer # Download example audio files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.0/audio-0.7.0.tar.gz - tar xvf audio-0.7.0.tar.gz + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/audio-0.7.3.tar.gz + tar xvf audio-0.7.3.tar.gz # Transcribe an audio file - deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav A pre-trained English model is available for use and can be downloaded following the instructions in :ref:`the usage docs `. For the latest release, including pre-trained models and checkpoints, `see the GitHub releases page `_. @@ -44,7 +44,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th pip3 install deepspeech-gpu # Transcribe an audio file. - deepspeech --model deepspeech-0.7.0-models.pbmm --scorer deepspeech-0.7.0-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav Please ensure you have the required :ref:`CUDA dependencies `. diff --git a/native_client/bazel_workspace_status_cmd.sh b/native_client/bazel_workspace_status_cmd.sh index 7969b19a..a1a5a2a0 100755 --- a/native_client/bazel_workspace_status_cmd.sh +++ b/native_client/bazel_workspace_status_cmd.sh @@ -22,8 +22,8 @@ echo "STABLE_TF_GIT_VERSION ${tf_git_rev}" pushd $(dirname "$0") ds_git_rev=$(git describe --long --tags) echo "STABLE_DS_GIT_VERSION ${ds_git_rev}" -ds_version=$(cat ../VERSION) +ds_version=$(cat ../training/deepspeech_training/VERSION) echo "STABLE_DS_VERSION ${ds_version}" -ds_graph_version=$(cat ../GRAPH_VERSION) +ds_graph_version=$(cat ../training/deepspeech_training/GRAPH_VERSION) echo "STABLE_DS_GRAPH_VERSION ${ds_graph_version}" popd diff --git a/native_client/ctcdecode/setup.py b/native_client/ctcdecode/setup.py index 8a3876c9..82e702a8 100644 --- a/native_client/ctcdecode/setup.py +++ b/native_client/ctcdecode/setup.py @@ -51,7 +51,7 @@ def maybe_rebuild(srcs, out_name, build_dir): num_parallel=known_args.num_processes, debug=debug) -project_version = read('../../VERSION').strip() +project_version = read('../../training/deepspeech_training/VERSION').strip() build_dir = 'temp_build/temp_build' diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index 64ed5f5a..ab5675be 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -43,7 +43,13 @@ namespace std { %template(OutputVectorVector) std::vector>; // Import only the error code enum definitions from deepspeech.h -%ignore ""; -%rename("%s", regexmatch$name="DS_ERR_") ""; -%rename("%s", regexmatch$name="DeepSpeech_Error_Codes") ""; +// We can't just do |%ignore "";| here because it affects this file globally (even +// files %include'd above). That causes SWIG to lose destructor information and +// leads to leaks of the wrapper objects. +// Instead we ignore functions and classes (structs), which are the only other +// things in deepspeech.h. If we add some new construct to deepspeech.h we need +// to update the ignore rules here to avoid exposing unwanted APIs in the decoder +// package. +%rename("$ignore", %$isfunction) ""; +%rename("$ignore", %$isclass) ""; %include "../deepspeech.h" diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index 28b00046..ac5ee8ed 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -3,7 +3,7 @@ NODE_ABI_TARGET ?= NODE_BUILD_VERBOSE ?= --verbose NPM_TOOL ?= npm PROJECT_NAME ?= deepspeech -PROJECT_VERSION ?= $(shell cat ../../VERSION | tr -d '\n') +PROJECT_VERSION ?= $(shell cat ../../training/deepspeech_training/VERSION | tr -d '\n') NPM_ROOT ?= $(shell npm root) NODE_MODULES_BIN ?= $(NPM_ROOT)/.bin/ diff --git a/native_client/python/setup.py b/native_client/python/setup.py index b5a5d95b..0e1d0e62 100755 --- a/native_client/python/setup.py +++ b/native_client/python/setup.py @@ -31,7 +31,7 @@ def main(): sys.argv.remove('--project_name') sys.argv.pop(project_name_idx) - with open('../../VERSION', 'r') as ver: + with open('../../training/deepspeech_training/VERSION', 'r') as ver: project_version = ver.read().strip() class BuildExtFirst(build): diff --git a/setup.py b/setup.py index b7a4c132..6811b717 100644 --- a/setup.py +++ b/setup.py @@ -7,20 +7,8 @@ from pkg_resources import parse_version from setuptools import find_packages, setup -def get_decoder_pkg_url(version, artifacts_root=None): - is_arm = 'arm' in platform.machine() - is_mac = 'darwin' in sys.platform - is_win = 'win32' in sys.platform - is_64bit = sys.maxsize > (2**31 - 1) - - if is_arm: - tc_arch = 'arm64-ctc' if is_64bit else 'arm-ctc' - elif is_mac: - tc_arch = 'osx-ctc' - elif is_win: - tc_arch = 'win-ctc' - else: - tc_arch = 'cpu-ctc' +def get_tc_decoder_pkg_url(version, artifacts_root): + assert artifacts_root ds_version = parse_version(version) branch = "v{}".format(version) @@ -37,20 +25,15 @@ def get_decoder_pkg_url(version, artifacts_root=None): if plat == 'windows': plat = 'win' - is_ucs2 = sys.maxunicode < 0x10ffff - m_or_mu = 'mu' if is_ucs2 else 'm' - # ABI does not contain m / mu anymore after Python 3.8 if sys.version_info.major == 3 and sys.version_info.minor >= 8: m_or_mu = '' + else: + is_ucs2 = sys.maxunicode < 0x10ffff + m_or_mu = 'mu' if is_ucs2 else 'm' pyver = ''.join(str(i) for i in sys.version_info[0:2]) - if not artifacts_root: - artifacts_root = 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.{branch_name}.{tc_arch_string}/artifacts/public'.format( - branch_name=branch, - tc_arch_string=tc_arch) - return 'ds_ctcdecoder @ {artifacts_root}/ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl'.format( artifacts_root=artifacts_root, ds_version=ds_version, @@ -66,8 +49,6 @@ def main(): with open(str(version_file)) as fin: version = fin.read().strip() - decoder_pkg_url = get_decoder_pkg_url(version) - install_requires_base = [ 'tensorflow == 1.15.2', 'numpy', @@ -89,18 +70,22 @@ def main(): 'soundfile', ] + decoder_pypi_dep = [ + 'ds_ctcdecoder == {}'.format(version) + ] + # Due to pip craziness environment variables are the only consistent way to # get options into this script when doing `pip install`. tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '') if tc_decoder_artifacts_root: # We're running inside the TaskCluster environment, override the decoder # package URL with the one we just built. - decoder_pkg_url = get_decoder_pkg_url(version, tc_decoder_artifacts_root) + decoder_pkg_url = get_tc_decoder_pkg_url(version, tc_decoder_artifacts_root) install_requires = install_requires_base + [decoder_pkg_url] elif os.environ.get('DS_NODECODER', ''): install_requires = install_requires_base else: - install_requires = install_requires_base + [decoder_pkg_url] + install_requires = install_requires_base + decoder_pypi_dep setup( name='deepspeech_training', diff --git a/taskcluster/docs-build.sh b/taskcluster/docs-build.sh index 27d207c9..04ed8a85 100644 --- a/taskcluster/docs-build.sh +++ b/taskcluster/docs-build.sh @@ -5,6 +5,5 @@ set -xe THIS=$(dirname "$0") pushd ${THIS}/../ - export PATH=$HOME/.local/bin:${THIS}/../doc/node_modules/.bin/:$PATH make -C doc/ dist popd diff --git a/taskcluster/scriptworker-task-github.yml b/taskcluster/scriptworker-task-github.yml index 75799d40..3003baad 100644 --- a/taskcluster/scriptworker-task-github.yml +++ b/taskcluster/scriptworker-task-github.yml @@ -49,7 +49,9 @@ build: - "node-package-tflite" cpp: - "darwin-amd64-cpu-opt" + - "darwin-amd64-tflite-opt" - "linux-amd64-cpu-opt" + - "linux-amd64-tflite-opt" - "linux-amd64-gpu-opt" - "linux-rpi3-cpu-opt" - "linux-arm64-cpu-opt" diff --git a/taskcluster/scriptworker-task-pypi.yml b/taskcluster/scriptworker-task-pypi.yml index 55a5f8b2..988384c5 100644 --- a/taskcluster/scriptworker-task-pypi.yml +++ b/taskcluster/scriptworker-task-pypi.yml @@ -5,13 +5,16 @@ build: # - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org - "darwin-amd64-cpu-opt" - "darwin-amd64-tflite-opt" + - "darwin-amd64-ctc-opt" - "linux-amd64-cpu-opt" - "linux-amd64-tflite-opt" - "linux-amd64-gpu-opt" + - "linux-amd64-ctc-opt" - "linux-rpi3-cpu-opt" - "win-amd64-cpu-opt" - "win-amd64-tflite-opt" - "win-amd64-gpu-opt" + - "win-amd64-ctc-opt" allowed: - "tag" ref_match: "refs/tags/" @@ -24,13 +27,16 @@ build: python: - "darwin-amd64-cpu-opt" - "darwin-amd64-tflite-opt" + - "darwin-amd64-ctc-opt" - "linux-amd64-cpu-opt" - "linux-amd64-tflite-opt" - "linux-amd64-gpu-opt" + - "linux-amd64-ctc-opt" - "linux-rpi3-cpu-opt" - "win-amd64-cpu-opt" - "win-amd64-tflite-opt" - "win-amd64-gpu-opt" + - "win-amd64-ctc-opt" # - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org nuget: [] metadata: diff --git a/taskcluster/tc-all-vars.sh b/taskcluster/tc-all-vars.sh index 74887bdc..1a38e18b 100755 --- a/taskcluster/tc-all-vars.sh +++ b/taskcluster/tc-all-vars.sh @@ -57,7 +57,7 @@ export DS_TFDIR=${DS_ROOT_TASK}/DeepSpeech/tf export DS_DSDIR=${DS_ROOT_TASK}/DeepSpeech/ds export DS_EXAMPLEDIR=${DS_ROOT_TASK}/DeepSpeech/examples -export DS_VERSION="$(cat ${DS_DSDIR}/VERSION)" +export DS_VERSION="$(cat ${DS_DSDIR}/training/deepspeech_training/VERSION)" export GRADLE_USER_HOME=${DS_ROOT_TASK}/gradle-cache export ANDROID_SDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/SDK/ diff --git a/taskcluster/tc-train-tests.sh b/taskcluster/tc-train-tests.sh index c8708f50..3b681dcd 100644 --- a/taskcluster/tc-train-tests.sh +++ b/taskcluster/tc-train-tests.sh @@ -8,6 +8,13 @@ extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pya bitrate=$2 +decoder_src=$3 + +if [ "$decoder_src" = "--pypi" ]; then + # Disable automatically picking up decoder package built in this CI group + export DECODER_ARTIFACTS_ROOT="" +fi + mkdir -p ${TASKCLUSTER_ARTIFACTS} || true mkdir -p /tmp/train || true mkdir -p /tmp/train_tflite || true diff --git a/taskcluster/test-linux-opt-tag-base.tyml b/taskcluster/test-linux-opt-tag-base.tyml new file mode 100644 index 00000000..1e6686b5 --- /dev/null +++ b/taskcluster/test-linux-opt-tag-base.tyml @@ -0,0 +1,65 @@ +$if: '(event.event in build.allowed) && ((event.event != "tag") || (build.ref_match in event.head.ref))' +then: + taskId: ${taskcluster.taskId} + provisionerId: ${taskcluster.docker.provisionerId} + workerType: ${taskcluster.docker.workerType} + taskGroupId: ${taskcluster.taskGroupId} + schedulerId: ${taskcluster.schedulerId} + dependencies: + $map: { $eval: build.dependencies } + each(b): + $eval: as_slugid(b) + created: { $fromNow: '0 sec' } + deadline: { $fromNow: '1 day' } + expires: { $fromNow: '7 days' } + + extra: + github: + { $eval: taskcluster.github_events.pull_request } + + payload: + maxRunTime: { $eval: to_int(build.maxRunTime) } + image: ${build.docker_image} + + env: + $let: + training: { $eval: as_slugid(build.test_model_task) } + linux_amd64_build: { $eval: as_slugid("linux-amd64-cpu-opt") } + linux_amd64_tflite: { $eval: as_slugid("linux-amd64-tflite-opt") } + linux_amd64_ctc: { $eval: as_slugid("linux-amd64-ctc-opt") } + in: + DEEPSPEECH_ARTIFACTS_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_build}/artifacts/public + DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_tflite}/artifacts/public + DEEPSPEECH_TEST_MODEL: https://community-tc.services.mozilla.com/api/queue/v1/task/${training}/artifacts/public/output_graph.pb + DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb + DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm + DECODER_ARTIFACTS_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_ctc}/artifacts/public + PIP_DEFAULT_TIMEOUT: "60" + EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}" + + command: + - "/bin/bash" + - "--login" + - "-cxe" + - $let: + extraSystemSetup: { $eval: strip(str(build.system_setup)) } + in: > + ${aptEc2Mirrors} && + apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox sudo wget && ${extraSystemSetup} && + adduser --system --home ${system.homedir.linux} ${system.username} && + cd ${system.homedir.linux} && + echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}&& mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf -" > /tmp/clone.sh && chmod +x /tmp/clone.sh && + sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && + sudo -H -u ${system.username} --preserve-env /bin/bash ${build.args.tests_cmdline} + + artifacts: + "public": + type: "directory" + path: "/tmp/artifacts/" + expires: { $fromNow: '7 days' } + + metadata: + name: ${build.metadata.name} + description: ${build.metadata.description} + owner: ${event.head.user.email} + source: ${event.head.repo.url} diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml new file mode 100644 index 00000000..dc7f83d8 --- /dev/null +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml @@ -0,0 +1,15 @@ +build: + template_file: test-linux-opt-tag-base.tyml + dependencies: + - "scriptworker-task-pypi" + allowed: + - "tag" + ref_match: "refs/tags/" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k --pypi" + metadata: + name: "DeepSpeech Linux AMD64 CPU 16kHz PyPI training Py3.5" + description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version, decoder package from PyPI" diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml new file mode 100644 index 00000000..ffac9c89 --- /dev/null +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml @@ -0,0 +1,15 @@ +build: + template_file: test-linux-opt-tag-base.tyml + dependencies: + - "scriptworker-task-pypi" + allowed: + - "tag" + ref_match: "refs/tags/" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k --pypi" + metadata: + name: "DeepSpeech Linux AMD64 CPU 16kHz PyPI training Py3.6" + description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version, decoder package from PyPI" diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml new file mode 100644 index 00000000..a7624e73 --- /dev/null +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml @@ -0,0 +1,15 @@ +build: + template_file: test-linux-opt-tag-base.tyml + dependencies: + - "scriptworker-task-pypi" + allowed: + - "tag" + ref_match: "refs/tags/" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k --pypi" + metadata: + name: "DeepSpeech Linux AMD64 CPU 16kHz PyPI training Py3.7" + description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version, decoder package from PyPI" diff --git a/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml new file mode 100644 index 00000000..48c52ea1 --- /dev/null +++ b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml @@ -0,0 +1,15 @@ +build: + template_file: test-linux-opt-tag-base.tyml + dependencies: + - "scriptworker-task-pypi" + allowed: + - "tag" + ref_match: "refs/tags/" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k --pypi" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz PyPI training Py3.6" + description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version, decoder package from PyPI" diff --git a/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml new file mode 100644 index 00000000..18f4d51f --- /dev/null +++ b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml @@ -0,0 +1,15 @@ +build: + template_file: test-linux-opt-tag-base.tyml + dependencies: + - "scriptworker-task-pypi" + allowed: + - "tag" + ref_match: "refs/tags/" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k --pypi" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz PyPI training Py3.7" + description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version, decoder package from PyPI" diff --git a/training/deepspeech_training/GRAPH_VERSION b/training/deepspeech_training/GRAPH_VERSION deleted file mode 120000 index bef0a41b..00000000 --- a/training/deepspeech_training/GRAPH_VERSION +++ /dev/null @@ -1 +0,0 @@ -../../GRAPH_VERSION \ No newline at end of file diff --git a/training/deepspeech_training/GRAPH_VERSION b/training/deepspeech_training/GRAPH_VERSION new file mode 100644 index 00000000..1e8b3149 --- /dev/null +++ b/training/deepspeech_training/GRAPH_VERSION @@ -0,0 +1 @@ +6 diff --git a/training/deepspeech_training/VERSION b/training/deepspeech_training/VERSION deleted file mode 120000 index 558194c5..00000000 --- a/training/deepspeech_training/VERSION +++ /dev/null @@ -1 +0,0 @@ -../../VERSION \ No newline at end of file diff --git a/training/deepspeech_training/VERSION b/training/deepspeech_training/VERSION new file mode 100644 index 00000000..edcd751a --- /dev/null +++ b/training/deepspeech_training/VERSION @@ -0,0 +1 @@ +0.8.0-alpha.2 diff --git a/training/deepspeech_training/evaluate.py b/training/deepspeech_training/evaluate.py index d0144ddb..716b5f93 100755 --- a/training/deepspeech_training/evaluate.py +++ b/training/deepspeech_training/evaluate.py @@ -61,7 +61,6 @@ def evaluate(test_csvs, create_model): # One rate per layer no_dropout = [None] * 6 logits, _ = create_model(batch_x=batch_x, - batch_size=FLAGS.test_batch_size, seq_length=batch_x_len, dropout=no_dropout) diff --git a/training/deepspeech_training/util/feeding.py b/training/deepspeech_training/util/feeding.py index 5cbd4833..31dafcfb 100644 --- a/training/deepspeech_training/util/feeding.py +++ b/training/deepspeech_training/util/feeding.py @@ -151,7 +151,7 @@ def create_dataset(sources, .map(process_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)) if enable_cache: dataset = dataset.cache(cache_path) - dataset = (dataset.window(batch_size, drop_remainder=True).flat_map(batch_fn) + dataset = (dataset.window(batch_size, drop_remainder=train_phase).flat_map(batch_fn) .prefetch(len(Config.available_devices))) return dataset @@ -168,7 +168,7 @@ def split_audio_file(audio_path, segments = vad_split(frames, aggressiveness=aggressiveness) for segment in segments: segment_buffer, time_start, time_end = segment - samples = pcm_to_np(audio_format, segment_buffer) + samples = pcm_to_np(segment_buffer, audio_format) yield time_start, time_end, samples def to_mfccs(time_start, time_end, samples):