Checking for empty transcripts during character encoding
This way we can get a plain English exception early, rather than a matrix shape error during training.
This commit is contained in:
parent
daa6167829
commit
8ec6ac8079
@ -52,7 +52,10 @@ def text_to_char_array(original, alphabet):
|
||||
Given a Python string ``original``, remove unsupported characters, map characters
|
||||
to integers and return a numpy array representing the processed string.
|
||||
"""
|
||||
return np.asarray([alphabet.label_from_string(c) for c in original])
|
||||
characters = np.asarray([alphabet.label_from_string(c) for c in original])
|
||||
if characters.shape[0] == 0:
|
||||
raise Exception("Found an empty transcript! You must include a transcript for all training data.")
|
||||
return characters
|
||||
|
||||
|
||||
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
||||
|
Loading…
Reference in New Issue
Block a user