Resolves #3144 - Add augmentation support to build_sdb.py

2020-07-09 11:55:26 +02:00 · 2020-07-09 11:55:26 +02:00 · 61bd5dd88d
parent 48cd53e474
commit 61bd5dd88d
1 changed files with 18 additions and 1 deletions
--- a/bin/build_sdb.py
+++ b/bin/build_sdb.py
@ -8,6 +8,7 @@ import argparse
 import progressbar

 from deepspeech_training.util.audio import (
+    AUDIO_TYPE_PCM,
    AUDIO_TYPE_OPUS,
    AUDIO_TYPE_WAV,
    change_audio_types,
@ -17,17 +18,28 @@ from deepspeech_training.util.sample_collections import (
    DirectSDBWriter,
    samples_from_sources,
 )
+from deepspeech_training.util.augmentations import (
+    parse_augmentations,
+    apply_sample_augmentations,
+    SampleAugmentation
+)

 AUDIO_TYPE_LOOKUP = {"wav": AUDIO_TYPE_WAV, "opus": AUDIO_TYPE_OPUS}


 def build_sdb():
    audio_type = AUDIO_TYPE_LOOKUP[CLI_ARGS.audio_type]
+    augmentations = parse_augmentations(CLI_ARGS.augment)
+    if any(not isinstance(a, SampleAugmentation) for a in augmentations):
+        print("Warning: Some of the augmentations cannot be applied by this command.")
    with DirectSDBWriter(
        CLI_ARGS.target, audio_type=audio_type, labeled=not CLI_ARGS.unlabeled
    ) as sdb_writer:
        samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled)
-        bar = progressbar.ProgressBar(max_value=len(samples), widgets=SIMPLE_BAR)
+        num_samples = len(samples)
+        if augmentations:
+            samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations)
+        bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
        for sample in bar(
            change_audio_types(samples, audio_type=audio_type, bitrate=CLI_ARGS.bitrate, processes=CLI_ARGS.workers)
        ):
@ -67,6 +79,11 @@ def handle_args():
        help="If to build an SDB with unlabeled (audio only) samples - "
        "typically used for building noise augmentation corpora",
    )
+    parser.add_argument(
+        "--augment",
+        action='append',
+        help="Add an augmentation operation",
+    )
    return parser.parse_args()