Add transfer learning test

2020-02-16 19:22:57 +01:00 · 2020-02-16 19:22:57 +01:00 · f32fd7a33f
parent 5bba9ea5d1
commit f32fd7a33f
5 changed files with 140 additions and 60 deletions
--- a/bin/run-tc-ldc93s1_checkpoint.sh
+++ b/bin/run-tc-ldc93s1_checkpoint.sh
@ -23,7 +23,7 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
  --learning_rate 0.001 --dropout_rate 0.05 \
  --scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log
-if ! grep "Restored variables from most recent checkpoint" /tmp/resume.log; then
+if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then
  echo "Did not resume training from checkpoint"
  exit 1
 else
--- a/bin/run-tc-transfer.sh
+++ b/bin/run-tc-transfer.sh
@ -1,13 +1,19 @@
 #!/bin/sh
-'''
+# This bash script is for running minimum working examples
-This bash script is for running minimum working examples
+# of transfer learning for continuous integration tests
-of transfer learning for continuous integration tests
+# to be run on Taskcluster.
 to be run on Taskcluster.
 '''
 set -xe
-ru_csv="data/smoke_test/russian_sample_data/ru.csv"
+ru_dir="./data/smoke_test/russian_sample_data"
-epoch_count=$1
+ru_csv="${ru_dir}/ru.csv"
 ldc93s1_dir="./data/smoke_test"
 ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
    echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
    python -u bin/import_ldc93s1.py ${ldc93s1_dir}
 fi;
 # Force only one visible device because we have a single-sample dataset
 # and when trying to run on multiple devices (like GPUs), this will break
@ -23,13 +29,13 @@ for LOAD in 'init' 'last' 'auto'; do
    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
       --alphabet_config_path "./data/alphabet.txt" \
       --load "$LOAD" \
-       --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
+       --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
-       --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
+       --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
-       --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
+       --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
-       --checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+       --scorer_path '' \
       --checkpoint_dir '/tmp/ckpt/transfer/eng' \
       --n_hidden 100 \
-       --epochs 10 \
+       --epochs 10
       "$@"
    echo "##############################################################################"
    echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
@ -37,14 +43,14 @@ for LOAD in 'init' 'last' 'auto'; do
    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --alphabet_config_path "./data/alphabet.txt" \
           --load "$LOAD" \
-           --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
+           --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
-           --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
+           --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
-           --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
+           --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
+           --epochs 10
           "$@"
    echo "#################################################################################"
    echo "#### Transfer Russian model with --save_checkpoint_dir --load_checkpoint_dir ####"
@ -53,19 +59,19 @@ for LOAD in 'init' 'last' 'auto'; do
           --drop_source_layers 1 \
           --alphabet_config_path "${ru_dir}/alphabet.ru" \
           --load 'last' \
-           --train_files  "${ru_dir}/ru.csv" --train_batch_size 1  \
+           --train_files  "${ru_csv}" --train_batch_size 1  \
-           --dev_files  "${ru_dir}/ru.csv" --dev_batch_size 1 \
+           --dev_files  "${ru_csv}" --dev_batch_size 1 \
-           --test_files  "${ru_dir}/ru.csv" --test_batch_size 1 \
+           --test_files  "${ru_csv}" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
+           --epochs 10
           "$@"
 done
 echo "#######################################################"
 echo "##### Train ENGLISH model and transfer to RUSSIAN #####"
-echo "##### while iterating over loading logic with CUDNN ###"
+echo "##### while iterating over loading logic          #####"
 echo "#######################################################"
 for LOAD in 'init' 'last' 'auto'; do
@ -73,48 +79,45 @@ for LOAD in 'init' 'last' 'auto'; do
    echo "#### Train ENGLISH model with just --checkpoint_dir ####"
    echo "########################################################"
    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
       --train_cudnn\
       --alphabet_config_path "./data/alphabet.txt" \
       --load "$LOAD" \
-       --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
+       --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
-       --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
+       --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
-       --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
+       --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
-       --checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+       --checkpoint_dir '/tmp/ckpt/transfer/eng' \
       --scorer_path '' \
       --n_hidden 100 \
-       --epochs 10 \
+       --epochs 10
       "$@"
    echo "##############################################################################"
    echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
    echo "##############################################################################"
    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --train_cudnn\
           --alphabet_config_path "./data/alphabet.txt" \
           --load "$LOAD" \
-           --train_files  "./data/ldc93s1/ldc93s1.csv" --train_batch_size 1  \
+           --train_files  "${ldc93s1_csv}" --train_batch_size 1  \
-           --dev_files  "./data/ldc93s1/ldc93s1.csv" --dev_batch_size 1 \
+           --dev_files  "${ldc93s1_csv}" --dev_batch_size 1 \
-           --test_files  "./data/ldc93s1/ldc93s1.csv" --test_batch_size 1 \
+           --test_files  "${ldc93s1_csv}" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
+           --epochs 10
           "$@"
    echo "####################################################################################"
    echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####"
    echo "####################################################################################"
    python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
           --load_cudnn\
           --drop_source_layers 1 \
           --alphabet_config_path "${ru_dir}/alphabet.ru" \
           --load 'last' \
-           --train_files  "${ru_dir}/ru.csv" --train_batch_size 1  \
+           --train_files  "${ru_csv}" --train_batch_size 1  \
-           --dev_files  "${ru_dir}/ru.csv" --dev_batch_size 1 \
+           --dev_files  "${ru_csv}" --dev_batch_size 1 \
-           --test_files  "${ru_dir}/ru.csv" --test_batch_size 1 \
+           --test_files  "${ru_csv}" --test_batch_size 1 \
-           --save_checkpoint_dir '/tmp/ckpt/transfer/ru-cudnn' \
+           --save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
-           --load_checkpoint_dir '/tmp/ckpt/transfer/eng-cudnn' \
+           --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
           --scorer_path '' \
           --n_hidden 100 \
-           --epochs 10 \
+           --epochs 10
           "$@"
 done
--- a/taskcluster/tc-transfer-tests.sh
+++ b/taskcluster/tc-transfer-tests.sh
@ -0,0 +1,65 @@
 #!/bin/bash
 set -xe
 source $(dirname "$0")/tc-tests-utils.sh
 pyver_full=$1
 if [ -z "${pyver_full}" ]; then
    echo "No python version given, aborting."
    exit 1
 fi;
 pyver=$(echo "${pyver_full}" | cut -d':' -f1)
 # 2.7.x => 27
 pyver_pkg=$(echo "${pyver}" | cut -d'.' -f1,2 | tr -d '.')
 py_unicode_type=$(echo "${pyver_full}" | cut -d':' -f2)
 if [ "${py_unicode_type}" = "m" ]; then
  pyconf="ucs2"
 elif [ "${py_unicode_type}" = "mu" ]; then
  pyconf="ucs4"
 fi;
 unset PYTHON_BIN_PATH
 unset PYTHONPATH
 export PYENV_ROOT="${HOME}/ds-train/.pyenv"
 export PATH="${PYENV_ROOT}/bin:${HOME}/bin:$PATH"
 mkdir -p ${PYENV_ROOT} || true
 mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
 mkdir -p /tmp/train || true
 mkdir -p /tmp/train_tflite || true
 install_pyenv "${PYENV_ROOT}"
 install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
 PYENV_NAME=deepspeech-train
 PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf}" pyenv install ${pyver}
 pyenv virtualenv ${pyver} ${PYENV_NAME}
 source ${PYENV_ROOT}/versions/${pyver}/envs/${PYENV_NAME}/bin/activate
 set -o pipefail
 pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
 pip install --upgrade -r ${HOME}/DeepSpeech/ds/requirements.txt | cat
 set +o pipefail
 pushd ${HOME}/DeepSpeech/ds/
    verify_ctcdecoder_url
 popd
 platform=$(python -c 'import sys; import platform; plat = platform.system().lower(); arch = platform.machine().lower(); plat = "manylinux1" if plat == "linux" and arch == "x86_64" else plat; plat = "macosx_10_10" if plat == "darwin" else plat; sys.stdout.write("%s_%s" % (plat, platform.machine()));')
 whl_ds_version="$(python -c 'from pkg_resources import parse_version; print(parse_version("'${DS_VERSION}'"))')"
 decoder_pkg="ds_ctcdecoder-${whl_ds_version}-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}-${platform}.whl"
 decoder_pkg_url=${DECODER_ARTIFACTS_ROOT}/${decoder_pkg}
 LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat
 pushd ${HOME}/DeepSpeech/ds/
    time ./bin/run-tc-transfer.sh
 popd
 deactivate
--- a/taskcluster/test-transfer-linux-amd64-py36m-opt.yml
+++ b/taskcluster/test-transfer-linux-amd64-py36m-opt.yml
@ -0,0 +1,12 @@
 build:
  template_file: test-linux-opt-base.tyml
  dependencies:
    - "linux-amd64-ctc-opt"
  system_setup:
    >
      apt-get -qq -y install ${python.packages_trusty.apt}
  args:
    tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-transfer-tests.sh 3.6.4:m"
  metadata:
    name: "DeepSpeech Linux AMD64 CPU transfer learning Py3.6"
    description: "Training a DeepSpeech LDC93S1 model with transfer learning for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version"
--- a/util/checkpoints.py
+++ b/util/checkpoints.py
@ -56,11 +56,11 @@ def _load_checkpoint(session, checkpoint_path):
                init_vars.add(v)
        load_vars -= init_vars
-    for v in load_vars:
+    for v in sorted(load_vars, key=lambda v: v.op.name):
        log_info('Loading variable from checkpoint: %s' % (v.op.name))
        v.load(ckpt.get_tensor(v.op.name), session=session)
-    for v in init_vars:
+    for v in sorted(init_vars, key=lambda v: v.op.name):
        log_info('Initializing variable: %s' % (v.op.name))
        session.run(v.initializer)