From d38a3f13f73ae6d4e782fd4cb4a36f9eaf9a16e4 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Wed, 30 Oct 2019 16:10:51 +0100 Subject: [PATCH] Removing exclamation-marks, colons and semi-colons from labels --- util/text.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/util/text.py b/util/text.py index 8af360ec..cf07977f 100644 --- a/util/text.py +++ b/util/text.py @@ -65,7 +65,7 @@ def text_to_char_array(series, alphabet): """ try: transcript = np.asarray(alphabet.encode(series['transcript'])) - if not len(transcript): + if len(transcript) == 0: raise ValueError('While processing: {}\nFound an empty transcript! You must include a transcript for all training data.'.format(series['wav_filename'])) return transcript except KeyError as e: @@ -115,7 +115,10 @@ def validate_label(label): label = re.sub("[ ]{2,}", " ", label) label = label.replace(".", "") label = label.replace(",", "") + label = label.replace(";", "") label = label.replace("?", "") + label = label.replace("!", "") + label = label.replace(":", "") label = label.replace("\"", "") label = label.strip() label = label.lower()