Include missing changes in MLS English importer
This commit is contained in:
parent
ad4025af7d
commit
ce71ec0c89
@ -29,7 +29,11 @@ def read_ogg_opus_duration(ogg_file_path):
|
|||||||
|
|
||||||
|
|
||||||
def main(root_dir):
|
def main(root_dir):
|
||||||
for subset in ("test",):
|
for subset in (
|
||||||
|
"train",
|
||||||
|
"dev",
|
||||||
|
"test",
|
||||||
|
):
|
||||||
print("Processing {} subset...".format(subset))
|
print("Processing {} subset...".format(subset))
|
||||||
with open(Path(root_dir) / subset / "transcripts.txt") as fin:
|
with open(Path(root_dir) / subset / "transcripts.txt") as fin:
|
||||||
subset_entries = []
|
subset_entries = []
|
||||||
@ -45,7 +49,7 @@ def main(root_dir):
|
|||||||
/ audio_id_parts[1]
|
/ audio_id_parts[1]
|
||||||
/ "{}.opus".format(audio_id)
|
/ "{}.opus".format(audio_id)
|
||||||
)
|
)
|
||||||
audio_filesize = os.path.getsize(audio_path)
|
audio_duration = read_ogg_opus_duration(audio_path)
|
||||||
# TODO: support other languages
|
# TODO: support other languages
|
||||||
transcript = (
|
transcript = (
|
||||||
transcript.strip()
|
transcript.strip()
|
||||||
@ -76,7 +80,7 @@ def main(root_dir):
|
|||||||
subset_entries.append(
|
subset_entries.append(
|
||||||
(
|
(
|
||||||
audio_path.relative_to(root_dir),
|
audio_path.relative_to(root_dir),
|
||||||
audio_filesize,
|
audio_duration,
|
||||||
transcript.strip(),
|
transcript.strip(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user