From 7121ca5a2b1456539034dd115401f39032e9dec0 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 13:40:35 -0800 Subject: [PATCH 1/6] Add a dockerignore for slightly faster local docker builds --- .dockerignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..7fbf26f3 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +tensorflow/ +data/ From ffe2155733e257df587d78fd70e10df1c512877b Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 13:47:55 -0800 Subject: [PATCH 2/6] Undo remote edits for taskcluster as this is all local --- training/deepspeech_training/util/taskcluster.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/util/taskcluster.py b/training/deepspeech_training/util/taskcluster.py index 4471659d..13829bdf 100644 --- a/training/deepspeech_training/util/taskcluster.py +++ b/training/deepspeech_training/util/taskcluster.py @@ -14,8 +14,6 @@ import sys from pkg_resources import parse_version -from .io import isdir_remote, open_remote, is_remote_path - DEFAULT_SCHEMES = { 'deepspeech': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s', 'tensorflow': 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.%(branch_name)s.%(arch_string)s/artifacts/public/%(artifact_name)s' @@ -43,7 +41,7 @@ def maybe_download_tc(target_dir, tc_url, progress=True): assert target_dir is not None - if not is_remote_path(target_dir): + if not os.path.isdir(target_dir): try: os.makedirs(target_dir) except OSError as e: @@ -62,7 +60,7 @@ def maybe_download_tc(target_dir, tc_url, progress=True): print('File already exists: %s' % target_file) if is_gzip: - with open_remote(target_file, "r+b") as frw: + with open(target_file, "r+b") as frw: decompressed = gzip.decompress(frw.read()) frw.seek(0) frw.write(decompressed) From 8bf1e9ddb79bc59225d2f9949f6269f76b4cdddf Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 14:21:31 -0800 Subject: [PATCH 3/6] Fix too aggressive F&R --- training/deepspeech_training/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 3428598d..94ca7c04 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -811,7 +811,7 @@ def export(): load_graph_for_evaluation(session) output_filename = FLAGS.export_file_name + '.pb' - if FLAGS.remove_remote_export: + if FLAGS.remove_export: if isdir_remote(FLAGS.export_dir): log_info('Removing old export') remove_remote(FLAGS.export_dir) From 9aaa0e406bd77969a024aaa8f2e4b9ec031059cf Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 14:31:48 -0800 Subject: [PATCH 4/6] Make sure to unpack samples now --- bin/compare_samples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/compare_samples.py b/bin/compare_samples.py index 94108a7a..27898cd1 100755 --- a/bin/compare_samples.py +++ b/bin/compare_samples.py @@ -15,8 +15,8 @@ def fail(message): def compare_samples(): - sample1 = load_sample(CLI_ARGS.sample1) - sample2 = load_sample(CLI_ARGS.sample2) + sample1 = load_sample(CLI_ARGS.sample1).unpack() + sample2 = load_sample(CLI_ARGS.sample2).unpack() if sample1.audio_format != sample2.audio_format: fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format)) if sample1.duration != sample2.duration: From 24e9e6777c112373792bd137e591d1bd1d8626bf Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 14:44:26 -0800 Subject: [PATCH 5/6] Make sure we properly unpack samples when changing audio types --- training/deepspeech_training/util/audio.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 05ceba38..04e99fbd 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -118,15 +118,16 @@ class Sample: self.audio_type = new_audio_type -def _change_audio_type(sample_and_audio_type): - sample, audio_type, bitrate = sample_and_audio_type +def _unpack_and_change_audio_type(sample_and_audio_type): + packed_sample, audio_type, bitrate = sample_and_audio_type + sample = packed_sample.unpack() sample.change_audio_type(audio_type, bitrate=bitrate) return sample -def change_audio_types(samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): +def change_audio_types(packed_samples, audio_type=AUDIO_TYPE_PCM, bitrate=None, processes=None, process_ahead=None): with LimitingPool(processes=processes, process_ahead=process_ahead) as pool: - yield from pool.imap(_change_audio_type, map(lambda s: (s, audio_type, bitrate), samples)) + yield from pool.imap(_unpack_and_change_audio_type, map(lambda s: (s, audio_type, bitrate), packed_samples)) def get_audio_type_from_extension(ext): From 6cb638211efc604f1285fd800fa481b1680b7c04 Mon Sep 17 00:00:00 2001 From: CatalinVoss Date: Tue, 17 Nov 2020 16:55:49 -0800 Subject: [PATCH 6/6] Only unpack when we need to, to make things work with SDBs --- training/deepspeech_training/util/audio.py | 5 ++++- training/deepspeech_training/util/augmentations.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 04e99fbd..15c7743c 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -120,7 +120,10 @@ class Sample: def _unpack_and_change_audio_type(sample_and_audio_type): packed_sample, audio_type, bitrate = sample_and_audio_type - sample = packed_sample.unpack() + if hasattr(sample, 'unpack'): + sample = packed_sample.unpack() + else: + sample = packed_sample sample.change_audio_type(audio_type, bitrate=bitrate) return sample diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 0934fbd5..79aa5750 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -152,7 +152,10 @@ def _init_augmentation_worker(preparation_context): def _load_and_augment_sample(timed_sample, context=None): sample, clock = timed_sample - realized_sample = sample.unpack() + if hasattr(sample, 'unpack'): + realized_sample = sample.unpack() + else: + realized_sample = sample return _augment_sample((realized_sample, clock), context)