From ce71ec0c8978ba4568c919cfb6bd7dcabc8565e6 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 4 May 2021 19:06:18 +0200 Subject: [PATCH] Include missing changes in MLS English importer --- bin/import_mls_english.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/import_mls_english.py b/bin/import_mls_english.py index 6e21c0ca..5ff83e3d 100644 --- a/bin/import_mls_english.py +++ b/bin/import_mls_english.py @@ -29,7 +29,11 @@ def read_ogg_opus_duration(ogg_file_path): def main(root_dir): - for subset in ("test",): + for subset in ( + "train", + "dev", + "test", + ): print("Processing {} subset...".format(subset)) with open(Path(root_dir) / subset / "transcripts.txt") as fin: subset_entries = [] @@ -45,7 +49,7 @@ def main(root_dir): / audio_id_parts[1] / "{}.opus".format(audio_id) ) - audio_filesize = os.path.getsize(audio_path) + audio_duration = read_ogg_opus_duration(audio_path) # TODO: support other languages transcript = ( transcript.strip() @@ -76,7 +80,7 @@ def main(root_dir): subset_entries.append( ( audio_path.relative_to(root_dir), - audio_filesize, + audio_duration, transcript.strip(), ) )