From 521842deea720713176bcafabc8674d40b6f5878 Mon Sep 17 00:00:00 2001 From: Anas Abou Allaban Date: Thu, 15 Oct 2020 11:22:39 -0400 Subject: [PATCH] Convert channels for CV2 dataset When running a training session on the CV2 dataset, it is possible to get the following error: ``` ValueError: Mono-channel audio required ``` This makes the [pysox Transformer](https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer.convert) also convert the channels. --- bin/import_cv2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index d6c8c192..19a5741c 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -27,6 +27,7 @@ from ds_ctcdecoder import Alphabet FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 +CHANNELS = 1 MAX_SECS = 10 PARAMS = None FILTER_OBJ = None @@ -179,7 +180,7 @@ def _preprocess_data(tsv_dir, audio_dir, space_after_every_character=False): def _maybe_convert_wav(mp3_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() - transformer.convert(samplerate=SAMPLE_RATE) + transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS) try: transformer.build(mp3_filename, wav_filename) except sox.core.SoxError: