From cfc79799ecae723194e95aeb6a9b677cee322964 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Wed, 10 Jun 2020 13:12:15 +0200 Subject: [PATCH] Report imported vs total audio time --- bin/import_cv.py | 1 + bin/import_cv2.py | 1 + bin/import_lingua_libre.py | 1 + bin/import_m-ailabs.py | 1 + bin/import_slr57.py | 1 + bin/import_ts.py | 1 + training/deepspeech_training/util/importers.py | 4 ++-- 7 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bin/import_cv.py b/bin/import_cv.py index 3754694f..e7dab564 100755 --- a/bin/import_cv.py +++ b/bin/import_cv.py @@ -93,6 +93,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index c2880a06..9db63656 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -78,6 +78,7 @@ def one_sample(args): else: # This one is good - keep it for the target CSV rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2])) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py index ec5047ba..2273aae6 100755 --- a/bin/import_lingua_libre.py +++ b/bin/import_lingua_libre.py @@ -91,6 +91,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py index 963b2873..63bb1f4f 100755 --- a/bin/import_m-ailabs.py +++ b/bin/import_m-ailabs.py @@ -91,6 +91,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) diff --git a/bin/import_slr57.py b/bin/import_slr57.py index 11e30fa4..4edb3654 100755 --- a/bin/import_slr57.py +++ b/bin/import_slr57.py @@ -86,6 +86,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_ts.py b/bin/import_ts.py index e6cdc1e8..e0130130 100755 --- a/bin/import_ts.py +++ b/bin/import_ts.py @@ -93,6 +93,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/training/deepspeech_training/util/importers.py b/training/deepspeech_training/util/importers.py index a4c3c326..61f2342d 100644 --- a/training/deepspeech_training/util/importers.py +++ b/training/deepspeech_training/util/importers.py @@ -8,7 +8,7 @@ from .helpers import secs_to_hours from collections import Counter def get_counter(): - return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0}) + return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'imported_time': 0, 'total_time': 0}) def get_imported_samples(counter): return counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long'] - counter['invalid_label'] @@ -23,7 +23,7 @@ def print_import_report(counter, sample_rate, max_secs): print('Skipped %d samples that were too short to match the transcript.' % counter['too_short']) if counter['too_long'] > 0: print('Skipped %d samples that were longer than %d seconds.' % (counter['too_long'], max_secs)) - print('Final amount of imported audio: %s.' % secs_to_hours(counter['total_time'] / sample_rate)) + print('Final amount of imported audio: %s from %s.' % (secs_to_hours(counter['imported_time'] / sample_rate), secs_to_hours(counter['total_time'] / sample_rate))) def get_importers_parser(description): parser = argparse.ArgumentParser(description=description)