Convert channels for CV2 dataset
When running a training session on the CV2 dataset, it is possible to get the following error: ``` ValueError: Mono-channel audio required ``` This makes the [pysox Transformer](https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer.convert) also convert the channels.
This commit is contained in:
parent
e508cd30b7
commit
521842deea
@ -27,6 +27,7 @@ from ds_ctcdecoder import Alphabet
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
CHANNELS = 1
|
||||
MAX_SECS = 10
|
||||
PARAMS = None
|
||||
FILTER_OBJ = None
|
||||
@ -179,7 +180,7 @@ def _preprocess_data(tsv_dir, audio_dir, space_after_every_character=False):
|
||||
def _maybe_convert_wav(mp3_filename, wav_filename):
|
||||
if not os.path.exists(wav_filename):
|
||||
transformer = sox.Transformer()
|
||||
transformer.convert(samplerate=SAMPLE_RATE)
|
||||
transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS)
|
||||
try:
|
||||
transformer.build(mp3_filename, wav_filename)
|
||||
except sox.core.SoxError:
|
||||
|
Loading…
x
Reference in New Issue
Block a user