Checking for empty transcripts during character encoding

This way we can get a plain English exception early, rather than a matrix shape error during training.
2019-08-01 11:19:21 -07:00 · 2019-08-01 11:19:21 -07:00 · 8ec6ac8079
commit 8ec6ac8079
parent daa6167829
1 changed files with 4 additions and 1 deletions
--- a/util/text.py
+++ b/util/text.py
@ -52,7 +52,10 @@ def text_to_char_array(original, alphabet):
    Given a Python string ``original``, remove unsupported characters, map characters
    to integers and return a numpy array representing the processed string.
    """
-    return np.asarray([alphabet.label_from_string(c) for c in original])
+    characters = np.asarray([alphabet.label_from_string(c) for c in original])
+    if characters.shape[0] == 0:
+        raise Exception("Found an empty transcript! You must include a transcript for all training data.")
+    return characters


 # The following code is from: http://hetland.org/coding/python/levenshtein.py