Merge pull request #3296 from lissyx/transcribe-ci

Fix #3129: Add CI coverage for transcribe.py
This commit is contained in:
lissyx 2020-09-01 19:13:28 +02:00 committed by GitHub
commit 9377aaf3a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 147 additions and 0 deletions

View File

@ -43,6 +43,35 @@ set_ldc_sample_filename()
esac esac
} }
get_dependency_url()
{
local _file=$1
all_deps="$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')"
for dep in ${all_deps}; do
local has_artifact=$(curl -s https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts | python -c 'import json; import sys; has_artifact = True in [ e["name"].find("'${_file}'") > 0 for e in json.loads(sys.stdin.read())["artifacts"] ]; print(has_artifact)')
if [ "${has_artifact}" = "True" ]; then
echo "https://community-tc.services.mozilla.com/api/queue/v1/task/${dep}/artifacts/public/${_file}"
exit 0
fi;
done;
echo ""
}
download_dependency_file()
{
local _file=$1
url=$(get_dependency_url "${_file}")
if [ -z "${url}" ]; then
echo "Unable to find an URL for ${_file}"
exit 1
fi;
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${url}"
}
download_data() download_data()
{ {
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}" ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}"

View File

@ -52,6 +52,10 @@ pushd ${HOME}/DeepSpeech/ds/
time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}" time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}"
popd popd
tar -cf - \
-C /tmp/ckpt/ . \
| ${XZ} > ${TASKCLUSTER_ARTIFACTS}/checkpoint.tar.xz
cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS} cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS}
cp /tmp/train_tflite/output_graph.tflite ${TASKCLUSTER_ARTIFACTS} cp /tmp/train_tflite/output_graph.tflite ${TASKCLUSTER_ARTIFACTS}

View File

@ -0,0 +1,49 @@
#!/bin/bash
set -xe
source $(dirname "$0")/tc-tests-utils.sh
extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias"
bitrate=$2
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
mkdir -p ${TASKCLUSTER_TMP_DIR}/ckpt || true
download_dependency_file "checkpoint.tar.xz"
cd ${TASKCLUSTER_TMP_DIR} && ${UNXZ} checkpoint.tar.xz
cd ${TASKCLUSTER_TMP_DIR}/ckpt/ && tar -xf ${TASKCLUSTER_TMP_DIR}/checkpoint.tar
virtualenv_activate "${pyalias}" "deepspeech"
set -o pipefail
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
pushd ${HOME}/DeepSpeech/ds
pip install --upgrade . | cat
pip install --upgrade -r requirements_transcribe.txt | cat
popd
set +o pipefail
# Prepare correct arguments for transcribeing
case "${bitrate}" in
8k)
sample_rate=8000
sample_name='LDC93S1_pcms16le_1_8000.wav'
;;
16k)
sample_rate=16000
sample_name='LDC93S1_pcms16le_1_16000.wav'
;;
esac
pushd ${HOME}/DeepSpeech/ds/
python transcribe.py \
--src "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/${sample_name}" \
--dst ${TASKCLUSTER_ARTIFACTS}/transcribe.log \
--n_hidden 100 \
--checkpoint_dir ${TASKCLUSTER_TMP_DIR}/ckpt/ \
--scorer "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer"
popd
virtualenv_deactivate "${pyalias}" "deepspeech"

View File

@ -0,0 +1,13 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "test-training_16k-linux-amd64-py36m-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.5.8:m 16k"
workerType: "${docker.dsTests}"
metadata:
name: "DeepSpeech Linux AMD64 CPU 16kHz transcribe Py3.5"
description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version"

View File

@ -0,0 +1,13 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "test-training_16k-linux-amd64-py36m-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.6.10:m 16k"
workerType: "${docker.dsTests}"
metadata:
name: "DeepSpeech Linux AMD64 CPU 16kHz transcribe Py3.6"
description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version"

View File

@ -0,0 +1,13 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "test-training_16k-linux-amd64-py36m-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.7.6:m 16k"
workerType: "${docker.dsTests}"
metadata:
name: "DeepSpeech Linux AMD64 CPU 16kHz transcribe Py3.7"
description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version"

View File

@ -0,0 +1,13 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "test-training_16k-linux-amd64-py36m-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.6.10:m 8k"
workerType: "${docker.dsTests}"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz transcribe Py3.6"
description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version"

View File

@ -0,0 +1,13 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "test-training_16k-linux-amd64-py36m-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} ${python.packages_xenial.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transcribe-tests.sh 3.7.6:m 8k"
workerType: "${docker.dsTests}"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz transcribe Py3.7"
description: "Transcribe a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"