Add some coverage for training and inference in bytes output mode

This commit is contained in:
Reuben Morais 2020-10-05 16:04:20 +02:00
parent 2fd11dd74a
commit fb4f5b6a84
8 changed files with 148 additions and 0 deletions

View File

@ -0,0 +1,31 @@
#!/bin/sh
set -xe
ldc93s1_dir="./data/smoke_test"
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
fi;
# Force only one visible device because we have a single-sample dataset
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
--train_files ${ldc93s1_csv} --train_batch_size 1 \
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
--test_files ${ldc93s1_csv} --test_batch_size 1 \
--n_hidden 100 --epochs 1 \
--max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' --utf8 \
--learning_rate 0.001 --dropout_rate 0.05 \
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' | tee /tmp/resume.log
if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then
echo "Did not resume training from checkpoint"
exit 1
else
exit 0
fi

30
bin/run-tc-ldc93s1_new_bytes.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/sh
set -xe
ldc93s1_dir="./data/smoke_test"
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
epoch_count=$1
audio_sample_rate=$2
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
fi;
# Force only one visible device because we have a single-sample dataset
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
--train_files ${ldc93s1_csv} --train_batch_size 1 \
--feature_cache '/tmp/ldc93s1_cache' \
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
--test_files ${ldc93s1_csv} --test_batch_size 1 \
--n_hidden 100 --epochs $epoch_count \
--max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' \
--learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_bytes' \
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \
--audio_sample_rate ${audio_sample_rate} \
--utf8

View File

@ -0,0 +1,26 @@
#!/bin/sh
set -xe
ldc93s1_dir="./data/smoke_test"
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
audio_sample_rate=$1
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
fi;
# Force only one visible device because we have a single-sample dataset
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -u DeepSpeech.py --noshow_progressbar \
--n_hidden 100 \
--checkpoint_dir '/tmp/ckpt_bytes' \
--export_dir '/tmp/train_bytes_tflite' \
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \
--utf8 \
--audio_sample_rate ${audio_sample_rate} \
--export_tflite

View File

@ -98,6 +98,7 @@ download_data()
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.bytes.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.bytes.scorer
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
}

View File

@ -0,0 +1,16 @@
#!/bin/bash
set -xe
source $(dirname "$0")/tc-tests-utils.sh
bitrate=$1
set_ldc_sample_filename "${bitrate}"
download_material "${TASKCLUSTER_TMP_DIR}/ds"
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
# Bytes output mode with LDC93S1 takes too long to converge so we simply test
# that loading the model won't crash
check_versions

View File

@ -54,10 +54,30 @@ pushd ${HOME}/DeepSpeech/ds/
# Test --metrics_files training argument
time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}"
# Test training with bytes output mode
time ./bin/run-tc-ldc93s1_new_bytes.sh 200 "${sample_rate}"
time ./bin/run-tc-ldc93s1_new_bytes_tflite.sh "${sample_rate}"
popd
# Save exported model artifacts from bytes output mode training
cp /tmp/train_bytes/output_graph.pb ${TASKCLUSTER_ARTIFACTS}/output_graph.pb
cp /tmp/train_bytes_tflite/output_graph.tflite ${TASKCLUSTER_ARTIFACTS}/output_graph.tflite
pushd ${HOME}/DeepSpeech/ds/
python util/taskcluster.py --source tensorflow --artifact convert_graphdef_memmapped_format --branch r1.15 --target /tmp/
popd
/tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train_bytes/output_graph.pb --out_graph=/tmp/train_bytes/output_graph.pbmm
cp /tmp/train_bytes/output_graph.pbmm ${TASKCLUSTER_ARTIFACTS}
# Test resuming from checkpoints created above
pushd ${HOME}/DeepSpeech/ds/
# SDB, resuming from checkpoint
time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh
# Bytes output mode, resuming from checkpoint
time ./bin/run-tc-ldc93s1_checkpoint_bytes.sh
popd
virtualenv_deactivate "${pyalias}" "deepspeech"

View File

@ -0,0 +1,12 @@
build:
template_file: test-darwin-opt-base.tyml
dependencies:
- "darwin-amd64-cpu-opt"
- "test-training-extra_16k-linux-amd64-py36m-opt"
- "homebrew_tests-darwin-amd64"
test_model_task: "test-training-extra_16k-linux-amd64-py36m-opt"
args:
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-cpp-bytes-ds-tests.sh 16k"
metadata:
name: "DeepSpeech OSX AMD64 CPU C++ tests (Bytes Output Model, 16kHz)"
description: "Testing DeepSpeech C++ for OSX/AMD64, CPU only, optimized version (Bytes Output Model, 16kHz)"

View File

@ -0,0 +1,12 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-cpu-opt"
- "test-training-extra_16k-linux-amd64-py36m-opt"
test_model_task: "test-training-extra_16k-linux-amd64-py36m-opt"
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-cpp-bytes-ds-tests.sh 16k"
workerType: "${docker.dsTests}"
metadata:
name: "DeepSpeech Linux AMD64 CPU C++ tests (Bytes Output Model, 16kHz)"
description: "Testing DeepSpeech C++ for Linux/AMD64, CPU only, optimized version (Bytes Output Model, 16kHz)"