Fix import_cv2.py binary file + permissions

This commit is contained in:
Quentin Brunet 2019-03-03 17:27:50 +01:00
parent 3c3401ec60
commit 179ba1b533

4
bin/import_cv2.py Normal file → Executable file
View File

@ -20,7 +20,7 @@ from util.downloader import SIMPLE_BAR
''' '''
Broadly speaking, this script takes the audio downloaded from Common Voice Broadly speaking, this script takes the audio downloaded from Common Voice
for a certain language, in addition to the *.tsv files output by CorporaCeator, for a certain language, in addition to the *.tsv files output by CorporaCreator,
and the script formats the data and transcripts to be in a state usable by and the script formats the data and transcripts to be in a state usable by
DeepSpeech.py DeepSpeech.py
@ -70,6 +70,8 @@ def _maybe_convert_set(audio_dir, input_tsv):
def one_sample(sample): def one_sample(sample):
""" Take a audio file, and optionally convert it to 16kHz WAV """ """ Take a audio file, and optionally convert it to 16kHz WAV """
mp3_filename = path.join(audio_dir, sample[0]) mp3_filename = path.join(audio_dir, sample[0])
if not path.splitext(mp3_filename.lower())[1] == '.mp3':
mp3_filename += ".mp3"
# Storing wav files next to the mp3 ones - just with a different suffix # Storing wav files next to the mp3 ones - just with a different suffix
wav_filename = path.splitext(mp3_filename)[0] + ".wav" wav_filename = path.splitext(mp3_filename)[0] + ".wav"
_maybe_convert_wav(mp3_filename, wav_filename) _maybe_convert_wav(mp3_filename, wav_filename)