Merge pull request #3051 from mozilla/add-metrics-tracking

Add read-only metrics tracking
This commit is contained in:
Reuben Morais 2020-06-09 13:14:50 +02:00 committed by GitHub
commit bfaa68945a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 189 additions and 15 deletions

View File

@ -0,0 +1,29 @@
#!/bin/sh
set -xe
ldc93s1_dir="./data/smoke_test"
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
epoch_count=$1
audio_sample_rate=$2
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
fi;
# Force only one visible device because we have a single-sample dataset
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
--train_files ${ldc93s1_csv} --train_batch_size 1 \
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
--test_files ${ldc93s1_csv} --test_batch_size 1 \
--metrics_files ${ldc93s1_csv} \
--n_hidden 100 --epochs $epoch_count \
--max_to_keep 1 --checkpoint_dir '/tmp/ckpt_metrics' \
--learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_metrics' \
--scorer_path 'data/smoke_test/pruned_lm.scorer' \
--audio_sample_rate ${audio_sample_rate}

View File

@ -0,0 +1,63 @@
#!/bin/bash
set -xe
source $(dirname "$0")/tc-tests-utils.sh
extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias"
bitrate=$2
decoder_src=$3
if [ "$decoder_src" = "--pypi" ]; then
# Disable automatically picking up decoder package built in this CI group
export DECODER_ARTIFACTS_ROOT=""
fi
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
mkdir -p /tmp/train || true
mkdir -p /tmp/train_tflite || true
virtualenv_activate "${pyalias}" "deepspeech"
set -o pipefail
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
pushd ${HOME}/DeepSpeech/ds
pip install --upgrade . | cat
popd
set +o pipefail
# Prepare correct arguments for training
case "${bitrate}" in
8k)
sample_rate=8000
sample_name='LDC93S1_pcms16le_1_8000.wav'
;;
16k)
sample_rate=16000
sample_name='LDC93S1_pcms16le_1_16000.wav'
;;
esac
# Easier to rename to that we can exercize the LDC93S1 importer code to
# generate the CSV file.
echo "Moving ${sample_name} to LDC93S1.wav"
mv "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/${sample_name}" "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/LDC93S1.wav"
pushd ${HOME}/DeepSpeech/ds/
# Testing single SDB source
time ./bin/run-tc-ldc93s1_new_sdb.sh 220 "${sample_rate}"
# Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features
time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}"
time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}"
# Test --metrics_files training argument
time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}"
popd
pushd ${HOME}/DeepSpeech/ds/
time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh
popd
virtualenv_deactivate "${pyalias}" "deepspeech"

View File

@ -50,11 +50,6 @@ pushd ${HOME}/DeepSpeech/ds/
time ./bin/run-tc-ldc93s1_new.sh 249 "${sample_rate}" time ./bin/run-tc-ldc93s1_new.sh 249 "${sample_rate}"
time ./bin/run-tc-ldc93s1_new.sh 1 "${sample_rate}" time ./bin/run-tc-ldc93s1_new.sh 1 "${sample_rate}"
time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}" time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}"
# Testing single SDB source
time ./bin/run-tc-ldc93s1_new_sdb.sh 220 "${sample_rate}"
# Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features
time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}"
time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}"
popd popd
cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS} cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS}
@ -69,7 +64,6 @@ cp /tmp/train/output_graph.pbmm ${TASKCLUSTER_ARTIFACTS}
pushd ${HOME}/DeepSpeech/ds/ pushd ${HOME}/DeepSpeech/ds/
time ./bin/run-tc-ldc93s1_checkpoint.sh time ./bin/run-tc-ldc93s1_checkpoint.sh
time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh
popd popd
virtualenv_deactivate "${pyalias}" "deepspeech" virtualenv_deactivate "${pyalias}" "deepspeech"

View File

@ -0,0 +1,12 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-ctc-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.5.8:m 16k"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7"
description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"

View File

@ -0,0 +1,12 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-ctc-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 16k"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7"
description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"

View File

@ -0,0 +1,12 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-ctc-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 16k"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7"
description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"

View File

@ -0,0 +1,12 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-ctc-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 8k"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7"
description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"

View File

@ -0,0 +1,12 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-ctc-opt"
system_setup:
>
apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 8k"
metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7"
description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"

View File

@ -8,5 +8,5 @@ build:
args: args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k" tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k"
metadata: metadata:
name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.5" name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.5"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version"

View File

@ -8,5 +8,5 @@ build:
args: args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k" tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k"
metadata: metadata:
name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.6" name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.6"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version"

View File

@ -8,5 +8,5 @@ build:
args: args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k" tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k"
metadata: metadata:
name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.7" name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.7"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version"

View File

@ -8,5 +8,5 @@ build:
args: args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k" tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k"
metadata: metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz training Py3.6" name: "DeepSpeech Linux AMD64 CPU 8kHz basic training Py3.6"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version"

View File

@ -8,5 +8,5 @@ build:
args: args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k" tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k"
metadata: metadata:
name: "DeepSpeech Linux AMD64 CPU 8kHz training Py3.7" name: "DeepSpeech Linux AMD64 CPU 8kHz basic training Py3.7"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version"

View File

@ -450,6 +450,16 @@ def train():
buffering=FLAGS.read_buffer) for source in dev_sources] buffering=FLAGS.read_buffer) for source in dev_sources]
dev_init_ops = [iterator.make_initializer(dev_set) for dev_set in dev_sets] dev_init_ops = [iterator.make_initializer(dev_set) for dev_set in dev_sets]
if FLAGS.metrics_files:
metrics_sources = FLAGS.metrics_files.split(',')
metrics_sets = [create_dataset([source],
batch_size=FLAGS.dev_batch_size,
train_phase=False,
exception_box=exception_box,
process_ahead=len(Config.available_devices) * FLAGS.dev_batch_size * 2,
buffering=FLAGS.read_buffer) for source in metrics_sources]
metrics_init_ops = [iterator.make_initializer(metrics_set) for metrics_set in metrics_sets]
# Dropout # Dropout
dropout_rates = [tfv1.placeholder(tf.float32, name='dropout_{}'.format(i)) for i in range(6)] dropout_rates = [tfv1.placeholder(tf.float32, name='dropout_{}'.format(i)) for i in range(6)]
dropout_feed_dict = { dropout_feed_dict = {
@ -488,7 +498,14 @@ def train():
step_summaries_op = tfv1.summary.merge_all('step_summaries') step_summaries_op = tfv1.summary.merge_all('step_summaries')
step_summary_writers = { step_summary_writers = {
'train': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'train'), max_queue=120), 'train': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'train'), max_queue=120),
'dev': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'dev'), max_queue=120) 'dev': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'dev'), max_queue=120),
'metrics': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'metrics'), max_queue=120),
}
human_readable_set_names = {
'train': 'Training',
'dev': 'Validation',
'metrics': 'Metrics',
} }
# Checkpointing # Checkpointing
@ -533,7 +550,7 @@ def train():
data['mean_loss'] = total_loss / step_count if step_count else 0.0 data['mean_loss'] = total_loss / step_count if step_count else 0.0
return progressbar.widgets.FormatLabel.__call__(self, progress, data, **kwargs) return progressbar.widgets.FormatLabel.__call__(self, progress, data, **kwargs)
prefix = 'Epoch {} | {:>10}'.format(epoch, 'Training' if is_train else 'Validation') prefix = 'Epoch {} | {:>10}'.format(epoch, human_readable_set_names[set_name])
widgets = [' | ', progressbar.widgets.Timer(), widgets = [' | ', progressbar.widgets.Timer(),
' | Steps: ', progressbar.widgets.Counter(), ' | Steps: ', progressbar.widgets.Counter(),
' | ', LossWidget()] ' | ', LossWidget()]
@ -635,6 +652,16 @@ def train():
log_info('Encountered a plateau, reducing learning rate to {}'.format( log_info('Encountered a plateau, reducing learning rate to {}'.format(
current_learning_rate)) current_learning_rate))
if FLAGS.metrics_files:
# Read only metrics, not affecting best validation loss tracking
for source, init_op in zip(metrics_sources, metrics_init_ops):
log_progress('Metrics for epoch %d on %s...' % (epoch, source))
set_loss, _ = run_set('metrics', epoch, init_op, dataset=source)
log_progress('Metrics for epoch %d on %s - loss: %f' % (epoch, source, set_loss))
print('-' * 80)
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
log_info('FINISHED optimization in {}'.format(datetime.utcnow() - train_start_time)) log_info('FINISHED optimization in {}'.format(datetime.utcnow() - train_start_time))

View File

@ -13,8 +13,9 @@ def create_flags():
f = absl.flags f = absl.flags
f.DEFINE_string('train_files', '', 'comma separated list of files specifying the dataset used for training. Multiple files will get merged. If empty, training will not be run.') f.DEFINE_string('train_files', '', 'comma separated list of files specifying the dataset used for training. Multiple files will get merged. If empty, training will not be run.')
f.DEFINE_string('dev_files', '', 'comma separated list of files specifying the dataset used for validation. Multiple files will get merged. If empty, validation will not be run.') f.DEFINE_string('dev_files', '', 'comma separated list of files specifying the datasets used for validation. Multiple files will get reported separately. If empty, validation will not be run.')
f.DEFINE_string('test_files', '', 'comma separated list of files specifying the dataset used for testing. Multiple files will get merged. If empty, the model will not be tested.') f.DEFINE_string('test_files', '', 'comma separated list of files specifying the datasets used for testing. Multiple files will get reported separately. If empty, the model will not be tested.')
f.DEFINE_string('metrics_files', '', 'comma separated list of files specifying the datasets used for tracking of metrics (after validation step). Currently the only metric is the CTC loss but without affecting the tracking of best validation loss. Multiple files will get reported separately. If empty, metrics will not be computed.')
f.DEFINE_string('read_buffer', '1MB', 'buffer-size for reading samples from datasets (supports file-size suffixes KB, MB, GB, TB)') f.DEFINE_string('read_buffer', '1MB', 'buffer-size for reading samples from datasets (supports file-size suffixes KB, MB, GB, TB)')
f.DEFINE_string('feature_cache', '', 'cache MFCC features to disk to speed up future training runs on the same data. This flag specifies the path where cached features extracted from --train_files will be saved. If empty, or if online augmentation flags are enabled, caching will be disabled.') f.DEFINE_string('feature_cache', '', 'cache MFCC features to disk to speed up future training runs on the same data. This flag specifies the path where cached features extracted from --train_files will be saved. If empty, or if online augmentation flags are enabled, caching will be disabled.')