Checking for empty transcripts during character encoding

This way we can get a plain English exception early, rather than a matrix shape error during training.
This commit is contained in:
Robert Gale 2019-08-01 11:19:21 -07:00
parent daa6167829
commit 8ec6ac8079

View File

@ -52,7 +52,10 @@ def text_to_char_array(original, alphabet):
Given a Python string ``original``, remove unsupported characters, map characters
to integers and return a numpy array representing the processed string.
"""
return np.asarray([alphabet.label_from_string(c) for c in original])
characters = np.asarray([alphabet.label_from_string(c) for c in original])
if characters.shape[0] == 0:
raise Exception("Found an empty transcript! You must include a transcript for all training data.")
return characters
# The following code is from: http://hetland.org/coding/python/levenshtein.py