From cfc79799ecae723194e95aeb6a9b677cee322964 Mon Sep 17 00:00:00 2001
From: Alexandre Lissy <lissyx@lissyx.dyndns.org>
Date: Wed, 10 Jun 2020 13:12:15 +0200
Subject: [PATCH] Report imported vs total audio time

---
 bin/import_cv.py                               | 1 +
 bin/import_cv2.py                              | 1 +
 bin/import_lingua_libre.py                     | 1 +
 bin/import_m-ailabs.py                         | 1 +
 bin/import_slr57.py                            | 1 +
 bin/import_ts.py                               | 1 +
 training/deepspeech_training/util/importers.py | 4 ++--
 7 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/bin/import_cv.py b/bin/import_cv.py
index 3754694f..e7dab564 100755
--- a/bin/import_cv.py
+++ b/bin/import_cv.py
@@ -93,6 +93,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
     return (counter, rows)
diff --git a/bin/import_cv2.py b/bin/import_cv2.py
index c2880a06..9db63656 100755
--- a/bin/import_cv2.py
+++ b/bin/import_cv2.py
@@ -78,6 +78,7 @@ def one_sample(args):
     else:
         # This one is good - keep it for the target CSV
         rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2]))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
 
diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py
index ec5047ba..2273aae6 100755
--- a/bin/import_lingua_libre.py
+++ b/bin/import_lingua_libre.py
@@ -91,6 +91,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
 
diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py
index 963b2873..63bb1f4f 100755
--- a/bin/import_m-ailabs.py
+++ b/bin/import_m-ailabs.py
@@ -91,6 +91,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
     return (counter, rows)
diff --git a/bin/import_slr57.py b/bin/import_slr57.py
index 11e30fa4..4edb3654 100755
--- a/bin/import_slr57.py
+++ b/bin/import_slr57.py
@@ -86,6 +86,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
 
diff --git a/bin/import_ts.py b/bin/import_ts.py
index e6cdc1e8..e0130130 100755
--- a/bin/import_ts.py
+++ b/bin/import_ts.py
@@ -93,6 +93,7 @@ def one_sample(sample):
     else:
         # This one is good - keep it for the target CSV
         rows.append((wav_filename, file_size, label))
+        counter["imported_time"] += frames
     counter["all"] += 1
     counter["total_time"] += frames
 
diff --git a/training/deepspeech_training/util/importers.py b/training/deepspeech_training/util/importers.py
index a4c3c326..61f2342d 100644
--- a/training/deepspeech_training/util/importers.py
+++ b/training/deepspeech_training/util/importers.py
@@ -8,7 +8,7 @@ from .helpers import secs_to_hours
 from collections import Counter
 
 def get_counter():
-    return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0})
+    return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'imported_time': 0, 'total_time': 0})
 
 def get_imported_samples(counter):
     return counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long'] - counter['invalid_label']
@@ -23,7 +23,7 @@ def print_import_report(counter, sample_rate, max_secs):
         print('Skipped %d samples that were too short to match the transcript.' % counter['too_short'])
     if counter['too_long'] > 0:
         print('Skipped %d samples that were longer than %d seconds.' % (counter['too_long'], max_secs))
-    print('Final amount of imported audio: %s.' % secs_to_hours(counter['total_time'] / sample_rate))
+    print('Final amount of imported audio: %s from %s.' % (secs_to_hours(counter['imported_time'] / sample_rate), secs_to_hours(counter['total_time'] / sample_rate)))
 
 def get_importers_parser(description):
     parser = argparse.ArgumentParser(description=description)