From 010f24578f96c2edab6ea1c38dd3b851eefd90b1 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:10:08 +0200 Subject: [PATCH] Better alphabet access --- bin/import_swc.py | 10 ++-------- util/text.py | 3 +++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/bin/import_swc.py b/bin/import_swc.py index 94016529..2046c584 100755 --- a/bin/import_swc.py +++ b/bin/import_swc.py @@ -159,13 +159,7 @@ def read_token(token): def in_alphabet(alphabet, c): - if alphabet is None: - return False - try: - alphabet.label_from_string(c) - return True - except KeyError: - return False + return True if alphabet is None else alphabet.has_char(c) alphabets = {} @@ -200,7 +194,7 @@ def label_filter(label, language): .encode("ascii", "ignore") .decode("ascii", "ignore")) for sc in c: - if alphabet is not None and not in_alphabet(alphabet, sc): + if not in_alphabet(alphabet, sc): return None, 'illegal character' chars.append(sc) label = ''.join(chars) diff --git a/util/text.py b/util/text.py index 0db0bb25..72eb71b8 100644 --- a/util/text.py +++ b/util/text.py @@ -36,6 +36,9 @@ class Alphabet(object): 'then add all these to data/alphabet.txt.'.format(string) ).with_traceback(e.__traceback__) + def has_char(self, char): + return char in self._str_to_label + def encode(self, string): res = [] for char in string: