Merge pull request #1928 from qboot/master

Fix import_cv2 binary file
2019-03-04 13:36:39 +01:00 · 2019-03-04 13:36:39 +01:00 · 223b2e82ab
commit 223b2e82ab
parent dca8c40ae9 179ba1b533
1 changed files with 11 additions and 9 deletions
--- a/bin/import_cv2.py
+++ b/bin/import_cv2.py
@ -20,7 +20,7 @@ from util.downloader import SIMPLE_BAR

 '''
 Broadly speaking, this script takes the audio downloaded from Common Voice
-for a certain language, in addition to the *.tsv files output by CorporaCeator,
+for a certain language, in addition to the *.tsv files output by CorporaCreator,
 and the script formats the data and transcripts to be in a state usable by
 DeepSpeech.py

@ -70,6 +70,8 @@ def _maybe_convert_set(audio_dir, input_tsv):
    def one_sample(sample):
        """ Take a audio file, and optionally convert it to 16kHz WAV """
        mp3_filename = path.join(audio_dir, sample[0])
+        if not path.splitext(mp3_filename.lower())[1] == '.mp3':
+            mp3_filename += ".mp3"
        # Storing wav files next to the mp3 ones - just with a different suffix
        wav_filename = path.splitext(mp3_filename)[0] + ".wav"
        _maybe_convert_wav(mp3_filename, wav_filename)