Add training test with --metrics_files

2020-06-08 15:50:12 +02:00 · 2020-06-08 15:50:12 +02:00 · ecd79531c8
commit ecd79531c8
parent 07d2c39138
2 changed files with 32 additions and 0 deletions
--- a/bin/run-tc-ldc93s1_new_metrics.sh
+++ b/bin/run-tc-ldc93s1_new_metrics.sh
@ -0,0 +1,29 @@
 #!/bin/sh
 set -xe
 ldc93s1_dir="./data/smoke_test"
 ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
 epoch_count=$1
 audio_sample_rate=$2
 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
    echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
    python -u bin/import_ldc93s1.py ${ldc93s1_dir}
 fi;
 # Force only one visible device because we have a single-sample dataset
 # and when trying to run on multiple devices (like GPUs), this will break
 export CUDA_VISIBLE_DEVICES=0
 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
  --train_files ${ldc93s1_csv} --train_batch_size 1 \
  --dev_files ${ldc93s1_csv} --dev_batch_size 1 \
  --test_files ${ldc93s1_csv} --test_batch_size 1 \
  --metrics_files ${ldc93s1_csv} \
  --n_hidden 100 --epochs $epoch_count \
  --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_metrics' \
  --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_metrics' \
  --scorer_path 'data/smoke_test/pruned_lm.scorer' \
  --audio_sample_rate ${audio_sample_rate}
--- a/taskcluster/tc-train-extra-tests.sh
+++ b/taskcluster/tc-train-extra-tests.sh
@ -51,6 +51,9 @@ pushd ${HOME}/DeepSpeech/ds/
    # Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features
    time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}"
    time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}"
    # Test --metrics_files training argument
    time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}"
 popd
 pushd ${HOME}/DeepSpeech/ds/