From 32ad25b088d1e88a2d374a11d791b8b4cc13bfd7 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Mon, 31 Aug 2020 17:12:27 +0200 Subject: [PATCH] Fix #3129: Add CI coverage for transcribe.py --- taskcluster/tc-all-utils.sh | 29 +++++++++++ taskcluster/tc-train-tests.sh | 4 ++ taskcluster/tc-transcribe-tests.sh | 49 +++++++++++++++++++ ...t-transcribe_16k-linux-amd64-py35m-opt.yml | 13 +++++ ...t-transcribe_16k-linux-amd64-py36m-opt.yml | 13 +++++ ...t-transcribe_16k-linux-amd64-py37m-opt.yml | 13 +++++ ...st-transcribe_8k-linux-amd64-py36m-opt.yml | 13 +++++ ...st-transcribe_8k-linux-amd64-py37m-opt.yml | 13 +++++ 8 files changed, 147 insertions(+) create mode 100755 taskcluster/tc-transcribe-tests.sh create mode 100644 taskcluster/test-transcribe_16k-linux-amd64-py35m-opt.yml create mode 100644 taskcluster/test-transcribe_16k-linux-amd64-py36m-opt.yml create mode 100644 taskcluster/test-transcribe_16k-linux-amd64-py37m-opt.yml create mode 100644 taskcluster/test-transcribe_8k-linux-amd64-py36m-opt.yml create mode 100644 taskcluster/test-transcribe_8k-linux-amd64-py37m-opt.yml diff --git a/taskcluster/tc-all-utils.sh b/taskcluster/tc-all-utils.sh index 4ab2c9b5..defc7350 100755 --- a/taskcluster/tc-all-utils.sh +++ b/taskcluster/tc-all-utils.sh @@ -43,6 +43,35 @@ set_ldc_sample_filename() esac } +get_dependency_url() +{ + local _file=$1 + all_deps="$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')" + + for dep in ${all_deps}; do + local has_artifact=$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts | python -c 'import json; import sys; has_artifact = True in [ e["name"].find("'${_file}'") > 0 for e in json.loads(sys.stdin.read())["artifacts"] ]; print(has_artifact)') + if [ "${has_artifact}" = "True" ]; then + echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts/public/${_file}" + exit 0 + fi; + done; + + echo "" +} + +download_dependency_file() +{ + local _file=$1 + url=$(get_dependency_url "${_file}") + + if [ -z "${url}" ]; then + echo "Unable to find an URL for ${_file}" + exit 1 + fi; + + ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${url}" +} + download_data() { ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}" diff --git a/taskcluster/tc-train-tests.sh b/taskcluster/tc-train-tests.sh index f9394935..8751c061 100644 --- a/taskcluster/tc-train-tests.sh +++ b/taskcluster/tc-train-tests.sh @@ -52,6 +52,10 @@ pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}" popd +tar -cf - \ + -C /tmp/ckpt/ . \ + | ${XZ} > ${TASKCLUSTER_ARTIFACTS}/checkpoint.tar.xz + cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS} cp /tmp/train_tflite/output_graph.tflite ${TASKCLUSTER_ARTIFACTS} diff --git a/taskcluster/tc-transcribe-tests.sh b/taskcluster/tc-transcribe-tests.sh new file mode 100755 index 00000000..8b4f33bf --- /dev/null +++ b/taskcluster/tc-transcribe-tests.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/tc-tests-utils.sh + +extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias" + +bitrate=$2 + +mkdir -p ${TASKCLUSTER_ARTIFACTS} || true +mkdir -p ${TASKCLUSTER_TMP_DIR}/ckpt || true + +download_dependency_file "checkpoint.tar.xz" +cd ${TASKCLUSTER_TMP_DIR} && ${UNXZ} checkpoint.tar.xz +cd ${TASKCLUSTER_TMP_DIR}/ckpt/ && tar -xf ${TASKCLUSTER_TMP_DIR}/checkpoint.tar + +virtualenv_activate "${pyalias}" "deepspeech" + +set -o pipefail +pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat +pushd ${HOME}/DeepSpeech/ds + pip install --upgrade . | cat + pip install --upgrade -r requirements_transcribe.txt | cat +popd +set +o pipefail + +# Prepare correct arguments for transcribeing +case "${bitrate}" in + 8k) + sample_rate=8000 + sample_name='LDC93S1_pcms16le_1_8000.wav' + ;; + 16k) + sample_rate=16000 + sample_name='LDC93S1_pcms16le_1_16000.wav' + ;; +esac + +pushd ${HOME}/DeepSpeech/ds/ + python transcribe.py \ + --src "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/${sample_name}" \ + --dst ${TASKCLUSTER_ARTIFACTS}/transcribe.log \ + --n_hidden 100 \ + --checkpoint_dir ${TASKCLUSTER_TMP_DIR}/ckpt/ \ + --scorer "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer" +popd + +virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/test-transcribe_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-transcribe_16k-linux-amd64-py35m-opt.yml new file mode 100644 index 00000000..e9412704 --- /dev/null +++ b/taskcluster/test-transcribe_16k-linux-amd64-py35m-opt.yml @@ -0,0 +1,13 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.5.8:m 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU 16kHz transcribe Py3.5" + description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version" diff --git a/taskcluster/test-transcribe_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-transcribe_16k-linux-amd64-py36m-opt.yml new file mode 100644 index 00000000..8fc04511 --- /dev/null +++ b/taskcluster/test-transcribe_16k-linux-amd64-py36m-opt.yml @@ -0,0 +1,13 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.6.10:m 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU 16kHz transcribe Py3.6" + description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-transcribe_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-transcribe_16k-linux-amd64-py37m-opt.yml new file mode 100644 index 00000000..053e329b --- /dev/null +++ b/taskcluster/test-transcribe_16k-linux-amd64-py37m-opt.yml @@ -0,0 +1,13 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.7.6:m 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU 16kHz transcribe Py3.7" + description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-transcribe_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-transcribe_8k-linux-amd64-py36m-opt.yml new file mode 100644 index 00000000..af365b17 --- /dev/null +++ b/taskcluster/test-transcribe_8k-linux-amd64-py36m-opt.yml @@ -0,0 +1,13 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.6.10:m 8k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz transcribe Py3.6" + description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-transcribe_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-transcribe_8k-linux-amd64-py37m-opt.yml new file mode 100644 index 00000000..5214b2cc --- /dev/null +++ b/taskcluster/test-transcribe_8k-linux-amd64-py37m-opt.yml @@ -0,0 +1,13 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.7.6:m 8k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz transcribe Py3.7" + description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"