Add transfer learning test
This commit is contained in:
parent
5bba9ea5d1
commit
f32fd7a33f
|
@ -23,7 +23,7 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
|||
--learning_rate 0.001 --dropout_rate 0.05 \
|
||||
--scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log
|
||||
|
||||
if ! grep "Restored variables from most recent checkpoint" /tmp/resume.log; then
|
||||
if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then
|
||||
echo "Did not resume training from checkpoint"
|
||||
exit 1
|
||||
else
|
||||
|
|
|
@ -1,13 +1,19 @@
|
|||
#!/bin/sh
|
||||
'''
|
||||
This bash script is for running minimum working examples
|
||||
of transfer learning for continuous integration tests
|
||||
to be run on Taskcluster.
|
||||
'''
|
||||
# This bash script is for running minimum working examples
|
||||
# of transfer learning for continuous integration tests
|
||||
# to be run on Taskcluster.
|
||||
set -xe
|
||||
|
||||
ru_csv="data/smoke_test/russian_sample_data/ru.csv"
|
||||
epoch_count=$1
|
||||
ru_dir="./data/smoke_test/russian_sample_data"
|
||||
ru_csv="${ru_dir}/ru.csv"
|
||||
|
||||
ldc93s1_dir="./data/smoke_test"
|
||||
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
|
||||
|
||||
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
|
||||
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
|
||||
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
|
||||
fi;
|
||||
|
||||
# Force only one visible device because we have a single-sample dataset
|
||||
# and when trying to run on multiple devices (like GPUs), this will break
|
||||
|
@ -20,101 +26,98 @@ for LOAD in 'init' 'last' 'auto'; do
|
|||
echo "########################################################"
|
||||
echo "#### Train ENGLISH model with just --checkpoint_dir ####"
|
||||
echo "########################################################"
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--alphabet_config_path "./data/alphabet.txt" \
|
||||
--load "$LOAD" \
|
||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
||||
--checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||
--scorer_path '' \
|
||||
--checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--n_hidden 100 \
|
||||
--epochs 10 \
|
||||
"$@"
|
||||
--epochs 10
|
||||
|
||||
echo "##############################################################################"
|
||||
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||
echo "##############################################################################"
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--alphabet_config_path "./data/alphabet.txt" \
|
||||
--load "$LOAD" \
|
||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--scorer_path '' \
|
||||
--n_hidden 100 \
|
||||
--epochs 10 \
|
||||
"$@"
|
||||
--epochs 10
|
||||
|
||||
echo "#################################################################################"
|
||||
echo "#### Transfer Russian model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||
echo "#################################################################################"
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--drop_source_layers 1 \
|
||||
--alphabet_config_path "${ru_dir}/alphabet.ru" \
|
||||
--load 'last' \
|
||||
--train_files "${ru_dir}/ru.csv" --train_batch_size 1 \
|
||||
--dev_files "${ru_dir}/ru.csv" --dev_batch_size 1 \
|
||||
--test_files "${ru_dir}/ru.csv" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--train_files "${ru_csv}" --train_batch_size 1 \
|
||||
--dev_files "${ru_csv}" --dev_batch_size 1 \
|
||||
--test_files "${ru_csv}" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--scorer_path '' \
|
||||
--n_hidden 100 \
|
||||
--epochs 10 \
|
||||
"$@"
|
||||
--epochs 10
|
||||
done
|
||||
|
||||
echo "#######################################################"
|
||||
echo "##### Train ENGLISH model and transfer to RUSSIAN #####"
|
||||
echo "##### while iterating over loading logic with CUDNN ###"
|
||||
echo "##### while iterating over loading logic #####"
|
||||
echo "#######################################################"
|
||||
|
||||
for LOAD in 'init' 'last' 'auto'; do
|
||||
echo "########################################################"
|
||||
echo "#### Train ENGLISH model with just --checkpoint_dir ####"
|
||||
echo "########################################################"
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
|
||||
--train_cudnn\
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--alphabet_config_path "./data/alphabet.txt" \
|
||||
--load "$LOAD" \
|
||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
||||
--checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||
--checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--scorer_path '' \
|
||||
--n_hidden 100 \
|
||||
--epochs 10 \
|
||||
"$@"
|
||||
--epochs 10
|
||||
|
||||
|
||||
echo "##############################################################################"
|
||||
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||
echo "##############################################################################"
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
|
||||
--train_cudnn\
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--alphabet_config_path "./data/alphabet.txt" \
|
||||
--load "$LOAD" \
|
||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--scorer_path '' \
|
||||
--n_hidden 100 \
|
||||
--epochs 10 \
|
||||
"$@"
|
||||
--epochs 10
|
||||
|
||||
echo "####################################################################################"
|
||||
echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||
echo "####################################################################################"
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
|
||||
--load_cudnn\
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--drop_source_layers 1 \
|
||||
--alphabet_config_path "${ru_dir}/alphabet.ru" \
|
||||
--load 'last' \
|
||||
--train_files "${ru_dir}/ru.csv" --train_batch_size 1 \
|
||||
--dev_files "${ru_dir}/ru.csv" --dev_batch_size 1 \
|
||||
--test_files "${ru_dir}/ru.csv" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
||||
--train_files "${ru_csv}" --train_batch_size 1 \
|
||||
--dev_files "${ru_csv}" --dev_batch_size 1 \
|
||||
--test_files "${ru_csv}" --test_batch_size 1 \
|
||||
--save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
|
||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||
--scorer_path '' \
|
||||
--n_hidden 100 \
|
||||
--epochs 10 \
|
||||
"$@"
|
||||
--epochs 10
|
||||
done
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/tc-tests-utils.sh
|
||||
|
||||
pyver_full=$1
|
||||
|
||||
if [ -z "${pyver_full}" ]; then
|
||||
echo "No python version given, aborting."
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
pyver=$(echo "${pyver_full}" | cut -d':' -f1)
|
||||
|
||||
# 2.7.x => 27
|
||||
pyver_pkg=$(echo "${pyver}" | cut -d'.' -f1,2 | tr -d '.')
|
||||
|
||||
py_unicode_type=$(echo "${pyver_full}" | cut -d':' -f2)
|
||||
if [ "${py_unicode_type}" = "m" ]; then
|
||||
pyconf="ucs2"
|
||||
elif [ "${py_unicode_type}" = "mu" ]; then
|
||||
pyconf="ucs4"
|
||||
fi;
|
||||
|
||||
unset PYTHON_BIN_PATH
|
||||
unset PYTHONPATH
|
||||
export PYENV_ROOT="${HOME}/ds-train/.pyenv"
|
||||
export PATH="${PYENV_ROOT}/bin:${HOME}/bin:$PATH"
|
||||
|
||||
mkdir -p ${PYENV_ROOT} || true
|
||||
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
|
||||
mkdir -p /tmp/train || true
|
||||
mkdir -p /tmp/train_tflite || true
|
||||
|
||||
install_pyenv "${PYENV_ROOT}"
|
||||
install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
|
||||
|
||||
PYENV_NAME=deepspeech-train
|
||||
PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf}" pyenv install ${pyver}
|
||||
pyenv virtualenv ${pyver} ${PYENV_NAME}
|
||||
source ${PYENV_ROOT}/versions/${pyver}/envs/${PYENV_NAME}/bin/activate
|
||||
|
||||
set -o pipefail
|
||||
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
|
||||
pip install --upgrade -r ${HOME}/DeepSpeech/ds/requirements.txt | cat
|
||||
set +o pipefail
|
||||
|
||||
pushd ${HOME}/DeepSpeech/ds/
|
||||
verify_ctcdecoder_url
|
||||
popd
|
||||
|
||||
platform=$(python -c 'import sys; import platform; plat = platform.system().lower(); arch = platform.machine().lower(); plat = "manylinux1" if plat == "linux" and arch == "x86_64" else plat; plat = "macosx_10_10" if plat == "darwin" else plat; sys.stdout.write("%s_%s" % (plat, platform.machine()));')
|
||||
whl_ds_version="$(python -c 'from pkg_resources import parse_version; print(parse_version("'${DS_VERSION}'"))')"
|
||||
decoder_pkg="ds_ctcdecoder-${whl_ds_version}-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}-${platform}.whl"
|
||||
|
||||
decoder_pkg_url=${DECODER_ARTIFACTS_ROOT}/${decoder_pkg}
|
||||
|
||||
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat
|
||||
|
||||
pushd ${HOME}/DeepSpeech/ds/
|
||||
time ./bin/run-tc-transfer.sh
|
||||
popd
|
||||
|
||||
deactivate
|
|
@ -0,0 +1,12 @@
|
|||
build:
|
||||
template_file: test-linux-opt-base.tyml
|
||||
dependencies:
|
||||
- "linux-amd64-ctc-opt"
|
||||
system_setup:
|
||||
>
|
||||
apt-get -qq -y install ${python.packages_trusty.apt}
|
||||
args:
|
||||
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transfer-tests.sh 3.6.4:m"
|
||||
metadata:
|
||||
name: "DeepSpeech Linux AMD64 CPU transfer learning Py3.6"
|
||||
description: "Training a DeepSpeech LDC93S1 model with transfer learning for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version"
|
|
@ -56,11 +56,11 @@ def _load_checkpoint(session, checkpoint_path):
|
|||
init_vars.add(v)
|
||||
load_vars -= init_vars
|
||||
|
||||
for v in load_vars:
|
||||
for v in sorted(load_vars, key=lambda v: v.op.name):
|
||||
log_info('Loading variable from checkpoint: %s' % (v.op.name))
|
||||
v.load(ckpt.get_tensor(v.op.name), session=session)
|
||||
|
||||
for v in init_vars:
|
||||
for v in sorted(init_vars, key=lambda v: v.op.name):
|
||||
log_info('Initializing variable: %s' % (v.op.name))
|
||||
session.run(v.initializer)
|
||||
|
||||
|
|
Loading…
Reference in New Issue