STT/bin/run-ci-transfer.sh
2021-05-19 20:19:36 +02:00

81 lines
3.4 KiB
Bash
Executable File

#!/bin/sh
# This bash script is for running minimum working examples
# of transfer learning for continuous integration tests
# to be run on CI.
set -xe
ru_dir="./data/smoke_test/russian_sample_data"
ru_csv="${ru_dir}/ru.csv"
ldc93s1_dir="./data/smoke_test"
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
fi;
# Force only one visible device because we have a single-sample dataset
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
# Force UTF-8 output
export PYTHONIOENCODING=utf-8
echo "##### Train ENGLISH model and transfer to RUSSIAN #####"
echo "##### while iterating over loading logic #####"
for LOAD in 'init' 'last' 'auto'; do
echo "########################################################"
echo "#### Train ENGLISH model with just --checkpoint_dir ####"
echo "########################################################"
python -u train.py --show_progressbar false --early_stop false \
--alphabet_config_path "./data/alphabet.txt" \
--load_train "$LOAD" \
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
--scorer_path '' \
--checkpoint_dir '/tmp/ckpt/transfer/eng' \
--n_hidden 100 \
--epochs 10
echo "##############################################################################"
echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####"
echo "##############################################################################"
python -u train.py --show_progressbar false --early_stop false \
--alphabet_config_path "./data/alphabet.txt" \
--load_train "$LOAD" \
--train_files "${ldc93s1_csv}" --train_batch_size 1 \
--dev_files "${ldc93s1_csv}" --dev_batch_size 1 \
--test_files "${ldc93s1_csv}" --test_batch_size 1 \
--save_checkpoint_dir '/tmp/ckpt/transfer/eng' \
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
--scorer_path '' \
--n_hidden 100 \
--epochs 10
echo "####################################################################################"
echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####"
echo "####################################################################################"
python -u train.py --show_progressbar false --early_stop false \
--drop_source_layers 1 \
--alphabet_config_path "${ru_dir}/alphabet.ru" \
--load_train 'last' \
--train_files "${ru_csv}" --train_batch_size 1 \
--dev_files "${ru_csv}" --dev_batch_size 1 \
--save_checkpoint_dir '/tmp/ckpt/transfer/ru' \
--load_checkpoint_dir '/tmp/ckpt/transfer/eng' \
--scorer_path '' \
--n_hidden 100 \
--epochs 10
# Test transfer learning checkpoint
python -u evaluate.py --show_progressbar false \
--test_files "${ru_csv}" --test_batch_size 1 \
--alphabet_config_path "${ru_dir}/alphabet.ru" \
--load_checkpoint_dir '/tmp/ckpt/transfer/ru' \
--scorer_path '' \
--n_hidden 100
done