Add transfer learning test

2020-02-16 19:22:57 +01:00 · 2020-02-16 19:22:57 +01:00 · f32fd7a33f
commit f32fd7a33f
parent 5bba9ea5d1
5 changed files with 140 additions and 60 deletions
--- a/bin/run-tc-ldc93s1_checkpoint.sh
+++ b/bin/run-tc-ldc93s1_checkpoint.sh
@ -23,7 +23,7 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
  --learning_rate 0.001 --dropout_rate 0.05 \
  --scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log

-if ! grep "Restored variables from most recent checkpoint" /tmp/resume.log; then
+if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then
  echo "Did not resume training from checkpoint"
  exit 1
 else
--- a/bin/run-tc-transfer.sh
+++ b/bin/run-tc-transfer.sh
@ -1,13 +1,19 @@
 #!/bin/sh
-'''
-This bash script is for running minimum working examples
-of transfer learning for continuous integration tests
-to be run on Taskcluster.
-'''
+# This bash script is for running minimum working examples
+# of transfer learning for continuous integration tests
+# to be run on Taskcluster.
 set -xe

-ru_csv="data/smoke_test/russian_sample_data/ru.csv"
-epoch_count=$1
+ru_dir="./data/smoke_test/russian_sample_data"
+ru_csv="${ru_dir}/ru.csv"
+
+ldc93s1_dir="./data/smoke_test"
+ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
+
+if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
+    echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
+    python -u bin/import_ldc93s1.py ${ldc93s1_dir}
+fi;

 # Force only one visible device because we have a single-sample dataset
 # and when trying to run on multiple devices (like GPUs), this will break
@ -20,101 +26,98 @@ for LOAD in 'init' 'last' 'auto'; do
    echo "########################################################"
    echo "#### Train ENGLISH model with just --checkpoint_dir ####"
    echo "########################################################"
-    python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
+    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
       --alphabet_config_path "./data/alphabet.txt" \
       --load "$LOAD" \
-       --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
-       --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
-       --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
-       --checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+       --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
+       --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
+       --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
+       --scorer_path '' \
+       --checkpoint_dir '/tmp/ckpt/transfer/eng' \
       --n_hidden 100 \
-       --epochs 10 \
-       "$@"
+       --epochs 10

    echo "##############################################################################"
    echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
    echo "##############################################################################"
-    python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
+    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --alphabet_config_path "./data/alphabet.txt" \
           --load "$LOAD" \
-           --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
-           --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
-           --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
+           --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
+           --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
+           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
-           "$@"
+           --epochs 10

    echo "#################################################################################"
    echo "#### Transfer Russian model with --save_checkpoint_dir --load_checkpoint_dir ####"
    echo "#################################################################################"
-    python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
+    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --drop_source_layers 1 \
           --alphabet_config_path "${ru_dir}/alphabet.ru" \
           --load 'last' \
-           --train_files  "${ru_dir}/ru.csv" --train_batch_size 1  \
-           --dev_files  "${ru_dir}/ru.csv" --dev_batch_size 1 \
-           --test_files  "${ru_dir}/ru.csv" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --train_files  "${ru_csv}" --train_batch_size 1  \
+           --dev_files  "${ru_csv}" --dev_batch_size 1 \
+           --test_files  "${ru_csv}" --test_batch_size 1 \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
+           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
-           "$@"
+           --epochs 10
 done

 echo "#######################################################"
 echo "##### Train ENGLISH model and transfer to RUSSIAN #####"
-echo "##### while iterating over loading logic with CUDNN ###"
+echo "##### while iterating over loading logic          #####"
 echo "#######################################################"

 for LOAD in 'init' 'last' 'auto'; do
    echo "########################################################"
    echo "#### Train ENGLISH model with just --checkpoint_dir ####"
    echo "########################################################"
-    python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
-       --train_cudnn\
+    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
       --alphabet_config_path "./data/alphabet.txt" \
       --load "$LOAD" \
-       --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
-       --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
-       --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
-       --checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+       --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
+       --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
+       --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
+       --checkpoint_dir '/tmp/ckpt/transfer/eng' \
+       --scorer_path '' \
       --n_hidden 100 \
-       --epochs 10 \
-       "$@"
+       --epochs 10


    echo "##############################################################################"
    echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
    echo "##############################################################################"
-    python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
-           --train_cudnn\
+    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --alphabet_config_path "./data/alphabet.txt" \
           --load "$LOAD" \
-           --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
-           --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
-           --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
+           --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
+           --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
+           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
-           "$@"
+           --epochs 10

    echo "####################################################################################"
    echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####"
    echo "####################################################################################"
-    python -u DeepSpeech.py --noshow_progressbar --noearly_stop\
-           --load_cudnn\
+    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --drop_source_layers 1 \
           --alphabet_config_path "${ru_dir}/alphabet.ru" \
           --load 'last' \
-           --train_files  "${ru_dir}/ru.csv" --train_batch_size 1  \
-           --dev_files  "${ru_dir}/ru.csv" --dev_batch_size 1 \
-           --test_files  "${ru_dir}/ru.csv" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --train_files  "${ru_csv}" --train_batch_size 1  \
+           --dev_files  "${ru_csv}" --dev_batch_size 1 \
+           --test_files  "${ru_csv}" --test_batch_size 1 \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
+           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
-           "$@"
+           --epochs 10
 done
--- a/taskcluster/tc-transfer-tests.sh
+++ b/taskcluster/tc-transfer-tests.sh
@ -0,0 +1,65 @@
+#!/bin/bash
+
+set -xe
+
+source $(dirname "$0")/tc-tests-utils.sh
+
+pyver_full=$1
+
+if [ -z "${pyver_full}" ]; then
+    echo "No python version given, aborting."
+    exit 1
+fi;
+
+pyver=$(echo "${pyver_full}" | cut -d':' -f1)
+
+# 2.7.x => 27
+pyver_pkg=$(echo "${pyver}" | cut -d'.' -f1,2 | tr -d '.')
+
+py_unicode_type=$(echo "${pyver_full}" | cut -d':' -f2)
+if [ "${py_unicode_type}" = "m" ]; then
+  pyconf="ucs2"
+elif [ "${py_unicode_type}" = "mu" ]; then
+  pyconf="ucs4"
+fi;
+
+unset PYTHON_BIN_PATH
+unset PYTHONPATH
+export PYENV_ROOT="${HOME}/ds-train/.pyenv"
+export PATH="${PYENV_ROOT}/bin:${HOME}/bin:$PATH"
+
+mkdir -p ${PYENV_ROOT} || true
+mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
+mkdir -p /tmp/train || true
+mkdir -p /tmp/train_tflite || true
+
+install_pyenv "${PYENV_ROOT}"
+install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
+
+PYENV_NAME=deepspeech-train
+PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf}" pyenv install ${pyver}
+pyenv virtualenv ${pyver} ${PYENV_NAME}
+source ${PYENV_ROOT}/versions/${pyver}/envs/${PYENV_NAME}/bin/activate
+
+set -o pipefail
+pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
+pip install --upgrade -r ${HOME}/DeepSpeech/ds/requirements.txt | cat
+set +o pipefail
+
+pushd ${HOME}/DeepSpeech/ds/
+    verify_ctcdecoder_url
+popd
+
+platform=$(python -c 'import sys; import platform; plat = platform.system().lower(); arch = platform.machine().lower(); plat = "manylinux1" if plat == "linux" and arch == "x86_64" else plat; plat = "macosx_10_10" if plat == "darwin" else plat; sys.stdout.write("%s_%s" % (plat, platform.machine()));')
+whl_ds_version="$(python -c 'from pkg_resources import parse_version; print(parse_version("'${DS_VERSION}'"))')"
+decoder_pkg="ds_ctcdecoder-${whl_ds_version}-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}-${platform}.whl"
+
+decoder_pkg_url=${DECODER_ARTIFACTS_ROOT}/${decoder_pkg}
+
+LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat
+
+pushd ${HOME}/DeepSpeech/ds/
+    time ./bin/run-tc-transfer.sh
+popd
+
+deactivate
--- a/taskcluster/test-transfer-linux-amd64-py36m-opt.yml
+++ b/taskcluster/test-transfer-linux-amd64-py36m-opt.yml
@ -0,0 +1,12 @@
+build:
+  template_file: test-linux-opt-base.tyml
+  dependencies:
+    - "linux-amd64-ctc-opt"
+  system_setup:
+    >
+      apt-get -qq -y install ${python.packages_trusty.apt}
+  args:
+    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transfer-tests.sh 3.6.4:m"
+  metadata:
+    name: "DeepSpeech Linux AMD64 CPU transfer learning Py3.6"
+    description: "Training a DeepSpeech LDC93S1 model with transfer learning for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version"
--- a/util/checkpoints.py
+++ b/util/checkpoints.py
@ -56,11 +56,11 @@ def _load_checkpoint(session, checkpoint_path):
                init_vars.add(v)
        load_vars -= init_vars

-    for v in load_vars:
+    for v in sorted(load_vars, key=lambda v: v.op.name):
        log_info('Loading variable from checkpoint: %s' % (v.op.name))
        v.load(ckpt.get_tensor(v.op.name), session=session)

-    for v in init_vars:
+    for v in sorted(init_vars, key=lambda v: v.op.name):
        log_info('Initializing variable: %s' % (v.op.name))
        session.run(v.initializer)