Convert channels for CV2 dataset

When running a training session on the CV2 dataset, it is possible to get the following error:

```
ValueError: Mono-channel audio required
```

This makes the [pysox Transformer](https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer.convert) also convert the channels.
This commit is contained in:
Anas Abou Allaban 2020-10-15 11:22:39 -04:00 committed by GitHub
parent e508cd30b7
commit 521842deea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -27,6 +27,7 @@ from ds_ctcdecoder import Alphabet
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
CHANNELS = 1
MAX_SECS = 10
PARAMS = None
FILTER_OBJ = None
@ -179,7 +180,7 @@ def _preprocess_data(tsv_dir, audio_dir, space_after_every_character=False):
def _maybe_convert_wav(mp3_filename, wav_filename):
if not os.path.exists(wav_filename):
transformer = sox.Transformer()
transformer.convert(samplerate=SAMPLE_RATE)
transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS)
try:
transformer.build(mp3_filename, wav_filename)
except sox.core.SoxError: