Merge pull request #2813 from lissyx/enforce-newline-removal

Enforce proper line ending removal when reading alphabet
This commit is contained in:
lissyx 2020-03-06 15:52:21 +01:00 committed by GitHub
commit 43b93f3164
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 61 additions and 10 deletions

View File

@ -7,11 +7,20 @@ before_cache:
python:
- "3.6"
install:
jobs:
include:
- stage: cardboard linter
install:
- pip install --upgrade cardboardlint pylint
script:
script:
# Run cardboardlinter, in case of pull requests
- if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
cardboardlinter --refspec $TRAVIS_BRANCH -n auto;
fi
- stage: python unit tests
install:
- pip install --upgrade -r requirements_tests.txt
script:
- if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
python -m unittest;
fi

1
requirements_tests.txt Normal file
View File

@ -0,0 +1 @@
absl-py

View File

@ -0,0 +1 @@
a b c

View File

@ -0,0 +1,3 @@
a
b
c

View File

@ -0,0 +1,4 @@
a
b
c

34
util/test_text.py Normal file
View File

@ -0,0 +1,34 @@
import unittest
import os
from .text import Alphabet
class TestAlphabetParsing(unittest.TestCase):
def _ending_tester(self, file, expected):
alphabet = Alphabet(os.path.join(os.path.dirname(__file__), 'test_data', file))
label = ''
label_id = -1
for expected_label, expected_label_id in expected:
try:
label_id = alphabet.encode(expected_label)
except KeyError:
pass
self.assertEqual(label_id, [expected_label_id])
try:
label = alphabet.decode([expected_label_id])
except KeyError:
pass
self.assertEqual(label, expected_label)
def test_macos_ending(self):
self._ending_tester('alphabet_macos.txt', [('a', 0), ('b', 1), ('c', 2)])
def test_unix_ending(self):
self._ending_tester('alphabet_unix.txt', [('a', 0), ('b', 1), ('c', 2)])
def test_windows_ending(self):
self._ending_tester('alphabet_windows.txt', [('a', 0), ('b', 1), ('c', 2)])
if __name__ == '__main__':
unittest.main()

View File

@ -1,6 +1,5 @@
from __future__ import absolute_import, division, print_function
import codecs
import numpy as np
import re
import struct
@ -15,7 +14,7 @@ class Alphabet(object):
self._str_to_label = {}
self._size = 0
if config_file:
with codecs.open(config_file, 'r', 'utf-8') as fin:
with open(config_file, 'r', encoding='utf-8') as fin:
for line in fin:
if line[0:2] == '\\#':
line = '#\n'