From cef7c45f03230038e9f0306e0e6bdc5c62143a5b Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Mon, 28 Oct 2019 12:25:12 +0100 Subject: [PATCH] Making sample paths relative; additional sub-sets --- bin/import_cv2.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index 122755a4..c96cd445 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -36,13 +36,11 @@ MAX_SECS = 10 def _preprocess_data(tsv_dir, audio_dir, label_filter, space_after_every_character=False): - for dataset in ['train', 'test', 'dev']: + for dataset in ['train', 'test', 'dev', 'validated', 'other']: input_tsv = path.join(path.abspath(tsv_dir), dataset+".tsv") if os.path.isfile(input_tsv): print("Loading TSV file: ", input_tsv) _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_character) - else: - print("ERROR: no TSV file found: ", input_tsv) def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_character=None): @@ -91,7 +89,7 @@ def _maybe_convert_set(input_tsv, audio_dir, label_filter, space_after_every_cha counter['too_long'] += 1 else: # This one is good - keep it for the target CSV - rows.append((wav_filename, file_size, label)) + rows.append((os.path.split(wav_filename)[-1], file_size, label)) counter['all'] += 1 counter['total_time'] += frames