Add transfer learning test
This commit is contained in:
parent
5bba9ea5d1
commit
f32fd7a33f
|
@ -23,7 +23,7 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
--learning_rate 0.001 --dropout_rate 0.05 \
|
--learning_rate 0.001 --dropout_rate 0.05 \
|
||||||
--scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log
|
--scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log
|
||||||
|
|
||||||
if ! grep "Restored variables from most recent checkpoint" /tmp/resume.log; then
|
if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then
|
||||||
echo "Did not resume training from checkpoint"
|
echo "Did not resume training from checkpoint"
|
||||||
exit 1
|
exit 1
|
||||||
else
|
else
|
||||||
|
|
|
@ -1,13 +1,19 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
'''
|
# This bash script is for running minimum working examples
|
||||||
This bash script is for running minimum working examples
|
# of transfer learning for continuous integration tests
|
||||||
of transfer learning for continuous integration tests
|
# to be run on Taskcluster.
|
||||||
to be run on Taskcluster.
|
|
||||||
'''
|
|
||||||
set -xe
|
set -xe
|
||||||
|
|
||||||
ru_csv="data/smoke_test/russian_sample_data/ru.csv"
|
ru_dir="./data/smoke_test/russian_sample_data"
|
||||||
epoch_count=$1
|
ru_csv="${ru_dir}/ru.csv"
|
||||||
|
|
||||||
|
ldc93s1_dir="./data/smoke_test"
|
||||||
|
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
|
||||||
|
|
||||||
|
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
|
||||||
|
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
|
||||||
|
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
|
||||||
|
fi;
|
||||||
|
|
||||||
# Force only one visible device because we have a single-sample dataset
|
# Force only one visible device because we have a single-sample dataset
|
||||||
# and when trying to run on multiple devices (like GPUs), this will break
|
# and when trying to run on multiple devices (like GPUs), this will break
|
||||||
|
@ -23,13 +29,13 @@ for LOAD in 'init' 'last' 'auto'; do
|
||||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
--alphabet_config_path "./data/alphabet.txt" \
|
--alphabet_config_path "./data/alphabet.txt" \
|
||||||
--load "$LOAD" \
|
--load "$LOAD" \
|
||||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||||
--checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--scorer_path '' \
|
||||||
|
--checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
--n_hidden 100 \
|
--n_hidden 100 \
|
||||||
--epochs 10 \
|
--epochs 10
|
||||||
"$@"
|
|
||||||
|
|
||||||
echo "##############################################################################"
|
echo "##############################################################################"
|
||||||
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||||
|
@ -37,14 +43,14 @@ for LOAD in 'init' 'last' 'auto'; do
|
||||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
--alphabet_config_path "./data/alphabet.txt" \
|
--alphabet_config_path "./data/alphabet.txt" \
|
||||||
--load "$LOAD" \
|
--load "$LOAD" \
|
||||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||||
--save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
|
--scorer_path '' \
|
||||||
--n_hidden 100 \
|
--n_hidden 100 \
|
||||||
--epochs 10 \
|
--epochs 10
|
||||||
"$@"
|
|
||||||
|
|
||||||
echo "#################################################################################"
|
echo "#################################################################################"
|
||||||
echo "#### Transfer Russian model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
echo "#### Transfer Russian model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||||
|
@ -53,19 +59,19 @@ for LOAD in 'init' 'last' 'auto'; do
|
||||||
--drop_source_layers 1 \
|
--drop_source_layers 1 \
|
||||||
--alphabet_config_path "${ru_dir}/alphabet.ru" \
|
--alphabet_config_path "${ru_dir}/alphabet.ru" \
|
||||||
--load 'last' \
|
--load 'last' \
|
||||||
--train_files "${ru_dir}/ru.csv" --train_batch_size 1 \
|
--train_files "${ru_csv}" --train_batch_size 1 \
|
||||||
--dev_files "${ru_dir}/ru.csv" --dev_batch_size 1 \
|
--dev_files "${ru_csv}" --dev_batch_size 1 \
|
||||||
--test_files "${ru_dir}/ru.csv" --test_batch_size 1 \
|
--test_files "${ru_csv}" --test_batch_size 1 \
|
||||||
--save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
|
--save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
|
||||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
|
--scorer_path '' \
|
||||||
--n_hidden 100 \
|
--n_hidden 100 \
|
||||||
--epochs 10 \
|
--epochs 10
|
||||||
"$@"
|
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "#######################################################"
|
echo "#######################################################"
|
||||||
echo "##### Train ENGLISH model and transfer to RUSSIAN #####"
|
echo "##### Train ENGLISH model and transfer to RUSSIAN #####"
|
||||||
echo "##### while iterating over loading logic with CUDNN ###"
|
echo "##### while iterating over loading logic #####"
|
||||||
echo "#######################################################"
|
echo "#######################################################"
|
||||||
|
|
||||||
for LOAD in 'init' 'last' 'auto'; do
|
for LOAD in 'init' 'last' 'auto'; do
|
||||||
|
@ -73,48 +79,45 @@ for LOAD in 'init' 'last' 'auto'; do
|
||||||
echo "#### Train ENGLISH model with just --checkpoint_dir ####"
|
echo "#### Train ENGLISH model with just --checkpoint_dir ####"
|
||||||
echo "########################################################"
|
echo "########################################################"
|
||||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
--train_cudnn\
|
|
||||||
--alphabet_config_path "./data/alphabet.txt" \
|
--alphabet_config_path "./data/alphabet.txt" \
|
||||||
--load "$LOAD" \
|
--load "$LOAD" \
|
||||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||||
--checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
|
--scorer_path '' \
|
||||||
--n_hidden 100 \
|
--n_hidden 100 \
|
||||||
--epochs 10 \
|
--epochs 10
|
||||||
"$@"
|
|
||||||
|
|
||||||
|
|
||||||
echo "##############################################################################"
|
echo "##############################################################################"
|
||||||
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||||
echo "##############################################################################"
|
echo "##############################################################################"
|
||||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
--train_cudnn\
|
|
||||||
--alphabet_config_path "./data/alphabet.txt" \
|
--alphabet_config_path "./data/alphabet.txt" \
|
||||||
--load "$LOAD" \
|
--load "$LOAD" \
|
||||||
--train_files "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1 \
|
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
|
||||||
--dev_files "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
|
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
|
||||||
--test_files "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
|
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
|
||||||
--save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
|
--scorer_path '' \
|
||||||
--n_hidden 100 \
|
--n_hidden 100 \
|
||||||
--epochs 10 \
|
--epochs 10
|
||||||
"$@"
|
|
||||||
|
|
||||||
echo "####################################################################################"
|
echo "####################################################################################"
|
||||||
echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####"
|
||||||
echo "####################################################################################"
|
echo "####################################################################################"
|
||||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
--load_cudnn\
|
|
||||||
--drop_source_layers 1 \
|
--drop_source_layers 1 \
|
||||||
--alphabet_config_path "${ru_dir}/alphabet.ru" \
|
--alphabet_config_path "${ru_dir}/alphabet.ru" \
|
||||||
--load 'last' \
|
--load 'last' \
|
||||||
--train_files "${ru_dir}/ru.csv" --train_batch_size 1 \
|
--train_files "${ru_csv}" --train_batch_size 1 \
|
||||||
--dev_files "${ru_dir}/ru.csv" --dev_batch_size 1 \
|
--dev_files "${ru_csv}" --dev_batch_size 1 \
|
||||||
--test_files "${ru_dir}/ru.csv" --test_batch_size 1 \
|
--test_files "${ru_csv}" --test_batch_size 1 \
|
||||||
--save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
|
--save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
|
||||||
--load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
|
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
|
||||||
|
--scorer_path '' \
|
||||||
--n_hidden 100 \
|
--n_hidden 100 \
|
||||||
--epochs 10 \
|
--epochs 10
|
||||||
"$@"
|
|
||||||
done
|
done
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
source $(dirname "$0")/tc-tests-utils.sh
|
||||||
|
|
||||||
|
pyver_full=$1
|
||||||
|
|
||||||
|
if [ -z "${pyver_full}" ]; then
|
||||||
|
echo "No python version given, aborting."
|
||||||
|
exit 1
|
||||||
|
fi;
|
||||||
|
|
||||||
|
pyver=$(echo "${pyver_full}" | cut -d':' -f1)
|
||||||
|
|
||||||
|
# 2.7.x => 27
|
||||||
|
pyver_pkg=$(echo "${pyver}" | cut -d'.' -f1,2 | tr -d '.')
|
||||||
|
|
||||||
|
py_unicode_type=$(echo "${pyver_full}" | cut -d':' -f2)
|
||||||
|
if [ "${py_unicode_type}" = "m" ]; then
|
||||||
|
pyconf="ucs2"
|
||||||
|
elif [ "${py_unicode_type}" = "mu" ]; then
|
||||||
|
pyconf="ucs4"
|
||||||
|
fi;
|
||||||
|
|
||||||
|
unset PYTHON_BIN_PATH
|
||||||
|
unset PYTHONPATH
|
||||||
|
export PYENV_ROOT="${HOME}/ds-train/.pyenv"
|
||||||
|
export PATH="${PYENV_ROOT}/bin:${HOME}/bin:$PATH"
|
||||||
|
|
||||||
|
mkdir -p ${PYENV_ROOT} || true
|
||||||
|
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
|
||||||
|
mkdir -p /tmp/train || true
|
||||||
|
mkdir -p /tmp/train_tflite || true
|
||||||
|
|
||||||
|
install_pyenv "${PYENV_ROOT}"
|
||||||
|
install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
|
||||||
|
|
||||||
|
PYENV_NAME=deepspeech-train
|
||||||
|
PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf}" pyenv install ${pyver}
|
||||||
|
pyenv virtualenv ${pyver} ${PYENV_NAME}
|
||||||
|
source ${PYENV_ROOT}/versions/${pyver}/envs/${PYENV_NAME}/bin/activate
|
||||||
|
|
||||||
|
set -o pipefail
|
||||||
|
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
|
||||||
|
pip install --upgrade -r ${HOME}/DeepSpeech/ds/requirements.txt | cat
|
||||||
|
set +o pipefail
|
||||||
|
|
||||||
|
pushd ${HOME}/DeepSpeech/ds/
|
||||||
|
verify_ctcdecoder_url
|
||||||
|
popd
|
||||||
|
|
||||||
|
platform=$(python -c 'import sys; import platform; plat = platform.system().lower(); arch = platform.machine().lower(); plat = "manylinux1" if plat == "linux" and arch == "x86_64" else plat; plat = "macosx_10_10" if plat == "darwin" else plat; sys.stdout.write("%s_%s" % (plat, platform.machine()));')
|
||||||
|
whl_ds_version="$(python -c 'from pkg_resources import parse_version; print(parse_version("'${DS_VERSION}'"))')"
|
||||||
|
decoder_pkg="ds_ctcdecoder-${whl_ds_version}-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}-${platform}.whl"
|
||||||
|
|
||||||
|
decoder_pkg_url=${DECODER_ARTIFACTS_ROOT}/${decoder_pkg}
|
||||||
|
|
||||||
|
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat
|
||||||
|
|
||||||
|
pushd ${HOME}/DeepSpeech/ds/
|
||||||
|
time ./bin/run-tc-transfer.sh
|
||||||
|
popd
|
||||||
|
|
||||||
|
deactivate
|
|
@ -0,0 +1,12 @@
|
||||||
|
build:
|
||||||
|
template_file: test-linux-opt-base.tyml
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-ctc-opt"
|
||||||
|
system_setup:
|
||||||
|
>
|
||||||
|
apt-get -qq -y install ${python.packages_trusty.apt}
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transfer-tests.sh 3.6.4:m"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech Linux AMD64 CPU transfer learning Py3.6"
|
||||||
|
description: "Training a DeepSpeech LDC93S1 model with transfer learning for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version"
|
|
@ -56,11 +56,11 @@ def _load_checkpoint(session, checkpoint_path):
|
||||||
init_vars.add(v)
|
init_vars.add(v)
|
||||||
load_vars -= init_vars
|
load_vars -= init_vars
|
||||||
|
|
||||||
for v in load_vars:
|
for v in sorted(load_vars, key=lambda v: v.op.name):
|
||||||
log_info('Loading variable from checkpoint: %s' % (v.op.name))
|
log_info('Loading variable from checkpoint: %s' % (v.op.name))
|
||||||
v.load(ckpt.get_tensor(v.op.name), session=session)
|
v.load(ckpt.get_tensor(v.op.name), session=session)
|
||||||
|
|
||||||
for v in init_vars:
|
for v in sorted(init_vars, key=lambda v: v.op.name):
|
||||||
log_info('Initializing variable: %s' % (v.op.name))
|
log_info('Initializing variable: %s' % (v.op.name))
|
||||||
session.run(v.initializer)
|
session.run(v.initializer)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue