From 9055d49b47e002b4b60e370158a947e6cf534561 Mon Sep 17 00:00:00 2001 From: Murcurio <35284740+Murcurio@users.noreply.github.com> Date: Fri, 18 Oct 2019 12:49:33 +1100 Subject: [PATCH] Update import_cv2.py Requires utf8 encoding, without this it tries to read it as ascii and fails --- bin/import_cv2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index 8bbeee7a..083d2176 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -51,7 +51,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha # Get audiofile path and transcript for each sentence in tsv samples = [] - with open(input_tsv) as input_tsv_file: + with open(input_tsv, encoding='utf-8') as input_tsv_file: reader = csv.DictReader(input_tsv_file, delimiter='\t') for row in reader: samples.append((row['path'], row['sentence'])) @@ -104,7 +104,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha pool.close() pool.join() - with open(output_csv, 'w') as output_csv_file: + with open(output_csv, 'w', encoding='utf-8') as output_csv_file: print('Writing CSV file for DeepSpeech.py as: ', output_csv) writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES) writer.writeheader()