Update import_cv2.py

Requires utf8 encoding, without this it tries to read it as ascii and fails
This commit is contained in:
Murcurio 2019-10-18 12:49:33 +11:00 committed by GitHub
parent 336daa1641
commit 9055d49b47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -51,7 +51,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha
# Get audiofile path and transcript for each sentence in tsv
samples = []
with open(input_tsv) as input_tsv_file:
with open(input_tsv, encoding='utf-8') as input_tsv_file:
reader = csv.DictReader(input_tsv_file, delimiter='\t')
for row in reader:
samples.append((row['path'], row['sentence']))
@ -104,7 +104,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha
pool.close()
pool.join()
with open(output_csv, 'w') as output_csv_file:
with open(output_csv, 'w', encoding='utf-8') as output_csv_file:
print('Writing CSV file for DeepSpeech.py as: ', output_csv)
writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
writer.writeheader()