Merge pull request #3054 from lissyx/import-time

Report imported vs total audio time
This commit is contained in:
lissyx 2020-06-10 13:35:01 +02:00 committed by GitHub
commit e99b938ebf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 8 additions and 2 deletions

View File

@ -93,6 +93,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames
return (counter, rows)

View File

@ -78,6 +78,7 @@ def one_sample(args):
else:
# This one is good - keep it for the target CSV
rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2]))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames

View File

@ -91,6 +91,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames

View File

@ -91,6 +91,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames
return (counter, rows)

View File

@ -86,6 +86,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames

View File

@ -93,6 +93,7 @@ def one_sample(sample):
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames

View File

@ -8,7 +8,7 @@ from .helpers import secs_to_hours
from collections import Counter
def get_counter():
return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0})
return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'imported_time': 0, 'total_time': 0})
def get_imported_samples(counter):
return counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long'] - counter['invalid_label']
@ -23,7 +23,7 @@ def print_import_report(counter, sample_rate, max_secs):
print('Skipped %d samples that were too short to match the transcript.' % counter['too_short'])
if counter['too_long'] > 0:
print('Skipped %d samples that were longer than %d seconds.' % (counter['too_long'], max_secs))
print('Final amount of imported audio: %s.' % secs_to_hours(counter['total_time'] / sample_rate))
print('Final amount of imported audio: %s from %s.' % (secs_to_hours(counter['imported_time'] / sample_rate), secs_to_hours(counter['total_time'] / sample_rate)))
def get_importers_parser(description):
parser = argparse.ArgumentParser(description=description)