Update import_cv2.py

Requires utf8 encoding, without this it tries to read it as ascii and fails
2019-10-18 12:49:33 +11:00 · 2019-10-18 12:49:33 +11:00 · 9055d49b47
commit 9055d49b47
parent 336daa1641
1 changed files with 2 additions and 2 deletions
--- a/bin/import_cv2.py
+++ b/bin/import_cv2.py
@ -51,7 +51,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha

    # Get audiofile path and transcript for each sentence in tsv
    samples = []
-    with open(input_tsv) as input_tsv_file:
+    with open(input_tsv, encoding='utf-8') as input_tsv_file:
        reader = csv.DictReader(input_tsv_file, delimiter='\t')
        for row in reader:
            samples.append((row['path'], row['sentence']))
@ -104,7 +104,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha
    pool.close()
    pool.join()

-    with open(output_csv, 'w') as output_csv_file:
+    with open(output_csv, 'w', encoding='utf-8') as output_csv_file:
        print('Writing CSV file for DeepSpeech.py as: ', output_csv)
        writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
        writer.writeheader()