Checking for empty transcripts during character encoding
This way we can get a plain English exception early, rather than a matrix shape error during training.
This commit is contained in:
parent
daa6167829
commit
8ec6ac8079
@ -52,7 +52,10 @@ def text_to_char_array(original, alphabet):
|
|||||||
Given a Python string ``original``, remove unsupported characters, map characters
|
Given a Python string ``original``, remove unsupported characters, map characters
|
||||||
to integers and return a numpy array representing the processed string.
|
to integers and return a numpy array representing the processed string.
|
||||||
"""
|
"""
|
||||||
return np.asarray([alphabet.label_from_string(c) for c in original])
|
characters = np.asarray([alphabet.label_from_string(c) for c in original])
|
||||||
|
if characters.shape[0] == 0:
|
||||||
|
raise Exception("Found an empty transcript! You must include a transcript for all training data.")
|
||||||
|
return characters
|
||||||
|
|
||||||
|
|
||||||
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
||||||
|
Loading…
Reference in New Issue
Block a user