From d65422c8ab4a4ec9261ac42892483b0117ecb490 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 15 Jan 2020 23:25:59 +0100 Subject: [PATCH 01/16] Update KenLM to b9f35777d112ce2fc10bd3986302517a16dc3883 --- native_client/kenlm/.gitignore | 3 + native_client/kenlm/BUILDING | 4 + native_client/kenlm/GIT_REVISION | 2 +- native_client/kenlm/README.md | 16 +- native_client/kenlm/README.mozilla | 22 +- native_client/kenlm/lm/build_binary_main.cc | 9 +- native_client/kenlm/lm/max_order.hh | 2 +- native_client/kenlm/lm/query_main.cc | 4 +- native_client/kenlm/lm/read_arpa.cc | 24 +- native_client/kenlm/lm/vocab.cc | 4 +- native_client/kenlm/lm/vocab.hh | 6 +- native_client/kenlm/setup.py | 24 +- native_client/kenlm/util/bit_packing.hh | 4 +- .../util/double-conversion/bignum-dtoa.cc | 13 +- .../kenlm/util/double-conversion/bignum.cc | 26 ++- .../kenlm/util/double-conversion/bignum.h | 1 - .../util/double-conversion/cached-powers.cc | 10 +- .../kenlm/util/double-conversion/diy-fp.h | 22 +- .../double-conversion/double-conversion.cc | 217 +++++++++++++----- .../double-conversion/double-conversion.h | 33 +-- .../kenlm/util/double-conversion/fast-dtoa.cc | 19 +- .../util/double-conversion/fixed-dtoa.cc | 19 +- .../kenlm/util/double-conversion/ieee.h | 6 +- .../kenlm/util/double-conversion/strtod.cc | 13 +- .../kenlm/util/double-conversion/utils.h | 58 +++-- native_client/kenlm/util/exception.hh | 2 +- native_client/kenlm/util/file.cc | 2 +- native_client/kenlm/util/file_piece.hh | 2 +- native_client/kenlm/util/mmap.cc | 2 +- .../kenlm/util/probing_hash_table.hh | 4 +- native_client/kenlm/util/tokenize_piece.hh | 32 ++- 31 files changed, 386 insertions(+), 219 deletions(-) diff --git a/native_client/kenlm/.gitignore b/native_client/kenlm/.gitignore index 2e28eaf4..c921fff8 100644 --- a/native_client/kenlm/.gitignore +++ b/native_client/kenlm/.gitignore @@ -3,6 +3,9 @@ util/file_piece.cc.gz *.o doc/ build/ +/bin +/lib +/tests ._* windows/Win32 windows/x64 diff --git a/native_client/kenlm/BUILDING b/native_client/kenlm/BUILDING index f6a10812..da36b87f 100644 --- a/native_client/kenlm/BUILDING +++ b/native_client/kenlm/BUILDING @@ -12,3 +12,7 @@ If you only want the query code and do not care about compression (.gz, .bz2, an Windows: The windows directory has visual studio files. Note that you need to compile the kenlm project before build_binary and ngram_query projects. + +OSX: + Missing dependencies can be remedied with brew. + brew install cmake boost eigen diff --git a/native_client/kenlm/GIT_REVISION b/native_client/kenlm/GIT_REVISION index 36ed3dfd..d2243f52 100644 --- a/native_client/kenlm/GIT_REVISION +++ b/native_client/kenlm/GIT_REVISION @@ -1 +1 @@ -cdd794598ea15dc23a7daaf7a8cf89423c97f7e6 +b9f35777d112ce2fc10bd3986302517a16dc3883 diff --git a/native_client/kenlm/README.md b/native_client/kenlm/README.md index 2cef6588..45965c03 100644 --- a/native_client/kenlm/README.md +++ b/native_client/kenlm/README.md @@ -2,9 +2,9 @@ Language model inference code by Kenneth Heafield (kenlm at kheafield.com) -I do development in master on https://github.com/kpu/kenlm/. Normally, it works, but I do not guarantee it will compile, give correct answers, or generate non-broken binary files. For a more stable release, get http://kheafield.com/code/kenlm.tar.gz . +I do development in master on https://github.com/kpu/kenlm/. Normally, it works, but I do not guarantee it will compile, give correct answers, or generate non-broken binary files. For a more stable release, get https://kheafield.com/code/kenlm.tar.gz . -The website http://kheafield.com/code/kenlm/ has more documentation. If you're a decoder developer, please download the latest version from there instead of copying from another decoder. +The website https://kheafield.com/code/kenlm/ has more documentation. If you're a decoder developer, please download the latest version from there instead of copying from another decoder. ## Compiling Use cmake, see [BUILDING](BUILDING) for more detail. @@ -33,7 +33,7 @@ lmplz estimates unpruned language models with modified Kneser-Ney smoothing. Af ```bash bin/lmplz -o 5 text.arpa ``` -The algorithm is on-disk, using an amount of memory that you specify. See http://kheafield.com/code/kenlm/estimation/ for more. +The algorithm is on-disk, using an amount of memory that you specify. See https://kheafield.com/code/kenlm/estimation/ for more. MT Marathon 2012 team members Ivan Pouzyrevsky and Mohammed Mediani contributed to the computation design and early implementation. Jon Clark contributed to the design, clarified points about smoothing, and added logging. @@ -43,15 +43,15 @@ filter takes an ARPA or count file and removes entries that will never be querie ```bash bin/filter ``` -and see http://kheafield.com/code/kenlm/filter/ for more documentation. +and see https://kheafield.com/code/kenlm/filter/ for more documentation. ## Querying -Two data structures are supported: probing and trie. Probing is a probing hash table with keys that are 64-bit hashes of n-grams and floats as values. Trie is a fairly standard trie but with bit-level packing so it uses the minimum number of bits to store word indices and pointers. The trie node entries are sorted by word index. Probing is the fastest and uses the most memory. Trie uses the least memory and a bit slower. +Two data structures are supported: probing and trie. Probing is a probing hash table with keys that are 64-bit hashes of n-grams and floats as values. Trie is a fairly standard trie but with bit-level packing so it uses the minimum number of bits to store word indices and pointers. The trie node entries are sorted by word index. Probing is the fastest and uses the most memory. Trie uses the least memory and is a bit slower. As is the custom in language modeling, all probabilities are log base 10. -With trie, resident memory is 58% of IRST's smallest version and 21% of SRI's compact version. Simultaneously, trie CPU's use is 81% of IRST's fastest version and 84% of SRI's fast version. KenLM's probing hash table implementation goes even faster at the expense of using more memory. See http://kheafield.com/code/kenlm/benchmark/. +With trie, resident memory is 58% of IRST's smallest version and 21% of SRI's compact version. Simultaneously, trie CPU's use is 81% of IRST's fastest version and 84% of SRI's fast version. KenLM's probing hash table implementation goes even faster at the expense of using more memory. See https://kheafield.com/code/kenlm/benchmark/. Binary format via mmap is supported. Run `./build_binary` to make one then pass the binary file name to the appropriate Model constructor. @@ -71,7 +71,7 @@ Hideo Okuma and Tomoyuki Yoshimura from NICT contributed ports to ARM and MinGW. - Select the macros you want, listed in the previous section. -- There are two build systems: compile.sh and Jamroot+Jamfile. They're pretty simple and are intended to be reimplemented in your build system. +- There are two build systems: compile.sh and cmake. They're pretty simple and are intended to be reimplemented in your build system. - Use either the interface in `lm/model.hh` or `lm/virtual_interface.hh`. Interface documentation is in comments of `lm/virtual_interface.hh` and `lm/model.hh`. @@ -101,4 +101,4 @@ See [python/example.py](python/example.py) and [python/kenlm.pyx](python/kenlm.p --- -The name was Hieu Hoang's idea, not mine. +The name was Hieu Hoang's idea, not mine. diff --git a/native_client/kenlm/README.mozilla b/native_client/kenlm/README.mozilla index 7bad32fd..f5badcbb 100644 --- a/native_client/kenlm/README.mozilla +++ b/native_client/kenlm/README.mozilla @@ -1,7 +1,7 @@ -KenLM source downloaded from http://kheafield.com/code/kenlm.tar.gz on 2017/08/05 -sha256 c4c9f587048470c9a6a592914f0609a71fbb959f0a4cad371e8c355ce81f7c6b +KenLM source downloaded from https://github.com/kpu/kenlm on 2020/01/15 +commit b9f35777d112ce2fc10bd3986302517a16dc3883 -This corresponds to https://github.com/kpu/kenlm/commit/cdd794598ea15dc23a7daaf7a8cf89423c97f7e6 +This corresponds to https://github.com/kpu/kenlm/commit/b9f35777d112ce2fc10bd3986302517a16dc3883 The following procedure was run to remove unneeded files: @@ -10,19 +10,3 @@ rm -rf windows include lm/filter lm/builder util/stream util/getopt.* python This was done in order to ensure uniqueness of double_conversion: git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/double_conversion/kenlm_double_conversion/g' - -Please apply this patch to be able to build on Android: -diff --git a/native_client/kenlm/util/file.cc b/native_client/kenlm/util/file.cc -index d53dc0a..b5e36b2 100644 ---- a/native_client/kenlm/util/file.cc -+++ b/native_client/kenlm/util/file.cc -@@ -540,7 +540,7 @@ std::string DefaultTempDirectory() { - const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0}; - for (int i=0; vars[i]; ++i) { - char *val = --#if defined(_GNU_SOURCE) -+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ) - #if __GLIBC_PREREQ(2,17) - secure_getenv - #else // __GLIBC_PREREQ - diff --git a/native_client/kenlm/lm/build_binary_main.cc b/native_client/kenlm/lm/build_binary_main.cc index 35206e60..cd377b03 100644 --- a/native_client/kenlm/lm/build_binary_main.cc +++ b/native_client/kenlm/lm/build_binary_main.cc @@ -10,7 +10,6 @@ #include #include #include -#include #ifdef WIN32 #include "util/getopt.hh" @@ -23,11 +22,12 @@ namespace ngram { namespace { void Usage(const char *name, const char *default_mem) { - std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-i] [-w mmap|after] [-p probing_multiplier] [-T trie_temporary] [-S trie_building_mem] [-q bits] [-b bits] [-a bits] [type] input.arpa [output.mmap]\n\n" + std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-i] [-v] [-w mmap|after] [-p probing_multiplier] [-T trie_temporary] [-S trie_building_mem] [-q bits] [-b bits] [-a bits] [type] input.arpa [output.mmap]\n\n" "-u sets the log10 probability for if the ARPA file does not have one.\n" " Default is -100. The ARPA file will always take precedence.\n" "-s allows models to be built even if they do not have and .\n" "-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n" +"-v disables inclusion of the vocabulary in the binary file.\n" "-w mmap|after determines how writing is done.\n" " mmap maps the binary file and writes to it. Default for trie.\n" " after allocates anonymous memory, builds, and writes. Default for probing.\n" @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) { lm::ngram::Config config; config.building_memory = util::ParseSize(default_mem); int opt; - while ((opt = getopt(argc, argv, "q:b:a:u:p:t:T:m:S:w:sir:h")) != -1) { + while ((opt = getopt(argc, argv, "q:b:a:u:p:t:T:m:S:w:sir:vh")) != -1) { switch(opt) { case 'q': config.prob_bits = ParseBitCount(optarg); @@ -165,6 +165,9 @@ int main(int argc, char *argv[]) { ParseFileList(optarg, config.rest_lower_files); config.rest_function = Config::REST_LOWER; break; + case 'v': + config.include_vocab = false; + break; case 'h': // help default: Usage(argv[0], default_mem); diff --git a/native_client/kenlm/lm/max_order.hh b/native_client/kenlm/lm/max_order.hh index 0ad1379e..4e28031a 100644 --- a/native_client/kenlm/lm/max_order.hh +++ b/native_client/kenlm/lm/max_order.hh @@ -7,7 +7,7 @@ * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead */ #ifndef KENLM_ORDER_MESSAGE -#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh." +#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. With cmake:\n cmake -DKENLM_MAX_ORDER=10 ..\nWith Moses:\n bjam --max-kenlm-order=10 -a\nOtherwise, edit lm/max_order.hh." #endif #endif // LM_MAX_ORDER_H diff --git a/native_client/kenlm/lm/query_main.cc b/native_client/kenlm/lm/query_main.cc index f3ca6e61..76466030 100644 --- a/native_client/kenlm/lm/query_main.cc +++ b/native_client/kenlm/lm/query_main.cc @@ -19,8 +19,8 @@ void Usage(const char *name) { "Each word in the output is formatted as:\n" " word=vocab_id ngram_length log10(p(word|context))\n" "where ngram_length is the length of n-gram matched. A vocab_id of 0 indicates\n" - "indicates the unknown word. Sentence-level output includes log10 probability of\n" - "the sentence and OOV count.\n"; + "the unknown word. Sentence-level output includes log10 probability of the\n" + "sentence and OOV count.\n"; exit(1); } diff --git a/native_client/kenlm/lm/read_arpa.cc b/native_client/kenlm/lm/read_arpa.cc index dc05a653..6ee9bfb2 100644 --- a/native_client/kenlm/lm/read_arpa.cc +++ b/native_client/kenlm/lm/read_arpa.cc @@ -19,8 +19,8 @@ namespace lm { -// 1 for '\t', '\n', and ' '. This is stricter than isspace. -const bool kARPASpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +// 1 for '\t', '\n', '\r', and ' '. This is stricter than isspace. Apparently ARPA allows vertical tab inside a word. +const bool kARPASpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; namespace { @@ -85,6 +85,11 @@ void ReadNGramHeader(util::FilePiece &in, unsigned int length) { if (line != expected.str()) UTIL_THROW(FormatLoadException, "Was expecting n-gram header " << expected.str() << " but got " << line << " instead"); } +void ConsumeNewline(util::FilePiece &in) { + char follow = in.get(); + UTIL_THROW_IF('\n' != follow, FormatLoadException, "Expected newline got '" << follow << "'"); +} + void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) { switch (in.get()) { case '\t': @@ -94,6 +99,9 @@ void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) { UTIL_THROW(FormatLoadException, "Non-zero backoff " << got << " provided for an n-gram that should have no backoff"); } break; + case '\r': + ConsumeNewline(in); + // Intentionally no break. case '\n': break; default: @@ -120,8 +128,18 @@ void ReadBackoff(util::FilePiece &in, float &backoff) { UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff); #endif } - UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff"); + switch (char got = in.get()) { + case '\r': + ConsumeNewline(in); + case '\n': + break; + default: + UTIL_THROW(FormatLoadException, "Expected newline after backoffs, got " << got); + } break; + case '\r': + ConsumeNewline(in); + // Intentionally no break. case '\n': backoff = ngram::kNoExtensionBackoff; break; diff --git a/native_client/kenlm/lm/vocab.cc b/native_client/kenlm/lm/vocab.cc index 5df5ca27..7996ec7e 100644 --- a/native_client/kenlm/lm/vocab.cc +++ b/native_client/kenlm/lm/vocab.cc @@ -282,7 +282,7 @@ void ProbingVocabulary::LoadedBinary(bool have_words, int fd, EnumerateVocab *to if (have_words) ReadWords(fd, to, bound_, offset); } -void MissingUnknown(const Config &config) throw(SpecialWordMissingException) { +void MissingUnknown(const Config &config) { switch(config.unknown_missing) { case SILENT: return; @@ -294,7 +294,7 @@ void MissingUnknown(const Config &config) throw(SpecialWordMissingException) { } } -void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) { +void MissingSentenceMarker(const Config &config, const char *str) { switch (config.sentence_marker_missing) { case SILENT: return; diff --git a/native_client/kenlm/lm/vocab.hh b/native_client/kenlm/lm/vocab.hh index 99c0aa83..f36e62ca 100644 --- a/native_client/kenlm/lm/vocab.hh +++ b/native_client/kenlm/lm/vocab.hh @@ -207,10 +207,10 @@ class ProbingVocabulary : public base::Vocabulary { detail::ProbingVocabularyHeader *header_; }; -void MissingUnknown(const Config &config) throw(SpecialWordMissingException); -void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException); +void MissingUnknown(const Config &config); +void MissingSentenceMarker(const Config &config, const char *str); -template void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) { +template void CheckSpecials(const Config &config, const Vocab &vocab) { if (!vocab.SawUnk()) MissingUnknown(config); if (vocab.BeginSentence() == vocab.NotFound()) MissingSentenceMarker(config, ""); if (vocab.EndSentence() == vocab.NotFound()) MissingSentenceMarker(config, ""); diff --git a/native_client/kenlm/setup.py b/native_client/kenlm/setup.py index 9d40c019..9e0f0d15 100644 --- a/native_client/kenlm/setup.py +++ b/native_client/kenlm/setup.py @@ -2,6 +2,8 @@ from setuptools import setup, Extension import glob import platform import os +import sys +import re #Does gcc compile with this header and library? def compile_test(header, library): @@ -9,16 +11,28 @@ def compile_test(header, library): command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" return os.system(command) == 0 +max_order = "6" +is_max_order = [s for s in sys.argv if "--max_order" in s] +for element in is_max_order: + max_order = re.split('[= ]',element)[1] + sys.argv.remove(element) -FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc') +FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc') + glob.glob('python/*.cc') FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc'))] -LIBS = ['stdc++'] -if platform.system() != 'Darwin': - LIBS.append('rt') +if platform.system() == 'Linux': + LIBS = ['stdc++', 'rt'] +elif platform.system() == 'Darwin': + LIBS = ['c++'] +else: + LIBS = [] #We don't need -std=c++11 but python seems to be compiled with it now. https://github.com/kpu/kenlm/issues/86 -ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11'] +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER='+max_order, '-std=c++11'] + +#Attempted fix to https://github.com/kpu/kenlm/issues/186 and https://github.com/kpu/kenlm/issues/197 +if platform.system() == 'Darwin': + ARGS += ["-stdlib=libc++", "-mmacosx-version-min=10.7"] if compile_test('zlib.h', 'z'): ARGS.append('-DHAVE_ZLIB') diff --git a/native_client/kenlm/util/bit_packing.hh b/native_client/kenlm/util/bit_packing.hh index b24fd9c1..77abc0df 100644 --- a/native_client/kenlm/util/bit_packing.hh +++ b/native_client/kenlm/util/bit_packing.hh @@ -108,7 +108,7 @@ typedef union { float f; uint32_t i; } FloatEnc; inline float ReadFloat32(const void *base, uint64_t bit_off) { FloatEnc encoded; - encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32); + encoded.i = static_cast(ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32)); return encoded.f; } inline void WriteFloat32(void *base, uint64_t bit_off, float value) { @@ -135,7 +135,7 @@ inline void UnsetSign(float &to) { inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) { FloatEnc encoded; - encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31); + encoded.i = static_cast(ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31)); // Sign bit set means negative. encoded.i |= kSignBit; return encoded.f; diff --git a/native_client/kenlm/util/double-conversion/bignum-dtoa.cc b/native_client/kenlm/util/double-conversion/bignum-dtoa.cc index 4825888d..a687b90a 100644 --- a/native_client/kenlm/util/double-conversion/bignum-dtoa.cc +++ b/native_client/kenlm/util/double-conversion/bignum-dtoa.cc @@ -25,7 +25,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include +#include #include "bignum-dtoa.h" @@ -192,13 +192,13 @@ static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, delta_plus = delta_minus; } *length = 0; - while (true) { + for (;;) { uint16_t digit; digit = numerator->DivideModuloIntBignum(*denominator); ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. // digit = numerator / denominator (integer division). // numerator = numerator % denominator. - buffer[(*length)++] = digit + '0'; + buffer[(*length)++] = static_cast(digit + '0'); // Can we stop already? // If the remainder of the division is less than the distance to the lower @@ -282,7 +282,7 @@ static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, // exponent (decimal_point), when rounding upwards. static void GenerateCountedDigits(int count, int* decimal_point, Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length) { + Vector buffer, int* length) { ASSERT(count >= 0); for (int i = 0; i < count - 1; ++i) { uint16_t digit; @@ -290,7 +290,7 @@ static void GenerateCountedDigits(int count, int* decimal_point, ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. // digit = numerator / denominator (integer division). // numerator = numerator % denominator. - buffer[i] = digit + '0'; + buffer[i] = static_cast(digit + '0'); // Prepare for next iteration. numerator->Times10(); } @@ -300,7 +300,8 @@ static void GenerateCountedDigits(int count, int* decimal_point, if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { digit++; } - buffer[count - 1] = digit + '0'; + ASSERT(digit <= 10); + buffer[count - 1] = static_cast(digit + '0'); // Correct bad digits (in case we had a sequence of '9's). Propagate the // carry until we hat a non-'9' or til we reach the first digit. for (int i = count - 1; i > 0; --i) { diff --git a/native_client/kenlm/util/double-conversion/bignum.cc b/native_client/kenlm/util/double-conversion/bignum.cc index 3ff99d36..cbcc4ea2 100644 --- a/native_client/kenlm/util/double-conversion/bignum.cc +++ b/native_client/kenlm/util/double-conversion/bignum.cc @@ -40,6 +40,7 @@ Bignum::Bignum() template static int BitSize(S value) { + (void) value; // Mark variable as used. return 8 * sizeof(value); } @@ -103,7 +104,7 @@ void Bignum::AssignDecimalString(Vector value) { const int kMaxUint64DecimalDigits = 19; Zero(); int length = value.length(); - int pos = 0; + unsigned int pos = 0; // Let's just say that each digit needs 4 bits. while (length >= kMaxUint64DecimalDigits) { uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); @@ -122,9 +123,8 @@ void Bignum::AssignDecimalString(Vector value) { static int HexCharValue(char c) { if ('0' <= c && c <= '9') return c - '0'; if ('a' <= c && c <= 'f') return 10 + c - 'a'; - if ('A' <= c && c <= 'F') return 10 + c - 'A'; - UNREACHABLE(); - return 0; // To make compiler happy. + ASSERT('A' <= c && c <= 'F'); + return 10 + c - 'A'; } @@ -501,13 +501,14 @@ uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { // Start by removing multiples of 'other' until both numbers have the same // number of digits. while (BigitLength() > other.BigitLength()) { - // This naive approach is extremely inefficient if the this divided other - // might be big. This function is implemented for doubleToString where + // This naive approach is extremely inefficient if `this` divided by other + // is big. This function is implemented for doubleToString where // the result should be small (less than 10). ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); + ASSERT(bigits_[used_digits_ - 1] < 0x10000); // Remove the multiples of the first digit. // Example this = 23 and other equals 9. -> Remove 2 multiples. - result += bigits_[used_digits_ - 1]; + result += static_cast(bigits_[used_digits_ - 1]); SubtractTimes(other, bigits_[used_digits_ - 1]); } @@ -523,13 +524,15 @@ uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { // Shortcut for easy (and common) case. int quotient = this_bigit / other_bigit; bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; - result += quotient; + ASSERT(quotient < 0x10000); + result += static_cast(quotient); Clamp(); return result; } int division_estimate = this_bigit / (other_bigit + 1); - result += division_estimate; + ASSERT(division_estimate < 0x10000); + result += static_cast(division_estimate); SubtractTimes(other, division_estimate); if (other_bigit * (division_estimate + 1) > this_bigit) { @@ -560,8 +563,8 @@ static int SizeInHexChars(S number) { static char HexCharOfValue(int value) { ASSERT(0 <= value && value <= 16); - if (value < 10) return value + '0'; - return value - 10 + 'A'; + if (value < 10) return static_cast(value + '0'); + return static_cast(value - 10 + 'A'); } @@ -755,7 +758,6 @@ void Bignum::SubtractTimes(const Bignum& other, int factor) { Chunk difference = bigits_[i] - borrow; bigits_[i] = difference & kBigitMask; borrow = difference >> (kChunkSize - 1); - ++i; } Clamp(); } diff --git a/native_client/kenlm/util/double-conversion/bignum.h b/native_client/kenlm/util/double-conversion/bignum.h index 03a20601..553189f7 100644 --- a/native_client/kenlm/util/double-conversion/bignum.h +++ b/native_client/kenlm/util/double-conversion/bignum.h @@ -49,7 +49,6 @@ class Bignum { void AssignPowerUInt16(uint16_t base, int exponent); - void AddUInt16(uint16_t operand); void AddUInt64(uint64_t operand); void AddBignum(const Bignum& other); // Precondition: this >= other. diff --git a/native_client/kenlm/util/double-conversion/cached-powers.cc b/native_client/kenlm/util/double-conversion/cached-powers.cc index e61d7f34..e186bba6 100644 --- a/native_client/kenlm/util/double-conversion/cached-powers.cc +++ b/native_client/kenlm/util/double-conversion/cached-powers.cc @@ -25,9 +25,9 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include -#include -#include +#include +#include +#include #include "utils.h" @@ -131,7 +131,6 @@ static const CachedPower kCachedPowers[] = { {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, }; -static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers); static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) // Difference between the decimal exponents in the table above. @@ -149,9 +148,10 @@ void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( int foo = kCachedPowersOffset; int index = (foo + static_cast(k) - 1) / kDecimalExponentDistance + 1; - ASSERT(0 <= index && index < kCachedPowersLength); + ASSERT(0 <= index && index < static_cast(ARRAY_SIZE(kCachedPowers))); CachedPower cached_power = kCachedPowers[index]; ASSERT(min_exponent <= cached_power.binary_exponent); + (void) max_exponent; // Mark variable as used. ASSERT(cached_power.binary_exponent <= max_exponent); *decimal_exponent = cached_power.decimal_exponent; *power = DiyFp(cached_power.significand, cached_power.binary_exponent); diff --git a/native_client/kenlm/util/double-conversion/diy-fp.h b/native_client/kenlm/util/double-conversion/diy-fp.h index 71552b9b..6495d1d9 100644 --- a/native_client/kenlm/util/double-conversion/diy-fp.h +++ b/native_client/kenlm/util/double-conversion/diy-fp.h @@ -42,7 +42,7 @@ class DiyFp { static const int kSignificandSize = 64; DiyFp() : f_(0), e_(0) {} - DiyFp(uint64_t f, int e) : f_(f), e_(e) {} + DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {} // this = this - other. // The exponents of both numbers must be the same and the significand of this @@ -76,22 +76,22 @@ class DiyFp { void Normalize() { ASSERT(f_ != 0); - uint64_t f = f_; - int e = e_; + uint64_t significand = f_; + int exponent = e_; // This method is mainly called for normalizing boundaries. In general // boundaries need to be shifted by 10 bits. We thus optimize for this case. const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); - while ((f & k10MSBits) == 0) { - f <<= 10; - e -= 10; + while ((significand & k10MSBits) == 0) { + significand <<= 10; + exponent -= 10; } - while ((f & kUint64MSB) == 0) { - f <<= 1; - e--; + while ((significand & kUint64MSB) == 0) { + significand <<= 1; + exponent--; } - f_ = f; - e_ = e; + f_ = significand; + e_ = exponent; } static DiyFp Normalize(const DiyFp& a) { diff --git a/native_client/kenlm/util/double-conversion/double-conversion.cc b/native_client/kenlm/util/double-conversion/double-conversion.cc index 115fe16f..be5cf75f 100644 --- a/native_client/kenlm/util/double-conversion/double-conversion.cc +++ b/native_client/kenlm/util/double-conversion/double-conversion.cc @@ -25,8 +25,8 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include -#include +#include +#include #include "double-conversion.h" @@ -118,7 +118,7 @@ void DoubleToStringConverter::CreateDecimalRepresentation( StringBuilder* result_builder) const { // Create a representation that is padded with zeros if needed. if (decimal_point <= 0) { - // "0.00000decimal_rep". + // "0.00000decimal_rep" or "0.000decimal_rep00". result_builder->AddCharacter('0'); if (digits_after_point > 0) { result_builder->AddCharacter('.'); @@ -129,7 +129,7 @@ void DoubleToStringConverter::CreateDecimalRepresentation( result_builder->AddPadding('0', remaining_digits); } } else if (decimal_point >= length) { - // "decimal_rep0000.00000" or "decimal_rep.0000" + // "decimal_rep0000.00000" or "decimal_rep.0000". result_builder->AddSubstring(decimal_digits, length); result_builder->AddPadding('0', decimal_point - length); if (digits_after_point > 0) { @@ -137,7 +137,7 @@ void DoubleToStringConverter::CreateDecimalRepresentation( result_builder->AddPadding('0', digits_after_point); } } else { - // "decima.l_rep000" + // "decima.l_rep000". ASSERT(digits_after_point > 0); result_builder->AddSubstring(decimal_digits, decimal_point); result_builder->AddCharacter('.'); @@ -348,7 +348,6 @@ static BignumDtoaMode DtoaToBignumDtoaMode( case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; default: UNREACHABLE(); - return BIGNUM_DTOA_SHORTEST; // To silence compiler. } } @@ -403,8 +402,8 @@ void DoubleToStringConverter::DoubleToAscii(double v, vector, length, point); break; default: - UNREACHABLE(); fast_worked = false; + UNREACHABLE(); } if (fast_worked) return; @@ -417,8 +416,9 @@ void DoubleToStringConverter::DoubleToAscii(double v, // Consumes the given substring from the iterator. // Returns false, if the substring does not match. -static bool ConsumeSubString(const char** current, - const char* end, +template +static bool ConsumeSubString(Iterator* current, + Iterator end, const char* substring) { ASSERT(**current == *substring); for (substring++; *substring != '\0'; substring++) { @@ -440,10 +440,36 @@ static bool ConsumeSubString(const char** current, const int kMaxSignificantDigits = 772; +static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 }; +static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7); + + +static const uc16 kWhitespaceTable16[] = { + 160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195, + 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279 +}; +static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16); + + +static bool isWhitespace(int x) { + if (x < 128) { + for (int i = 0; i < kWhitespaceTable7Length; i++) { + if (kWhitespaceTable7[i] == x) return true; + } + } else { + for (int i = 0; i < kWhitespaceTable16Length; i++) { + if (kWhitespaceTable16[i] == x) return true; + } + } + return false; +} + + // Returns true if a nonspace found and false if the end has reached. -static inline bool AdvanceToNonspace(const char** current, const char* end) { +template +static inline bool AdvanceToNonspace(Iterator* current, Iterator end) { while (*current != end) { - if (**current != ' ') return true; + if (!isWhitespace(**current)) return true; ++*current; } return false; @@ -462,26 +488,57 @@ static double SignedZero(bool sign) { } +// Returns true if 'c' is a decimal digit that is valid for the given radix. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the last +// condition was always true. By moving it into a separate function the +// compiler wouldn't warn anymore. +#if _MSC_VER +#pragma optimize("",off) +static bool IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#pragma optimize("",on) +#else +static bool inline IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#endif +// Returns true if 'c' is a character digit that is valid for the given radix. +// The 'a_character' should be 'a' or 'A'. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the first +// condition was always false. By moving it into a separate function the +// compiler wouldn't warn anymore. +static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { + return radix > 10 && c >= a_character && c < a_character + radix - 10; +} + + // Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. -template -static double RadixStringToIeee(const char* current, - const char* end, +template +static double RadixStringToIeee(Iterator* current, + Iterator end, bool sign, bool allow_trailing_junk, double junk_string_value, bool read_as_double, - const char** trailing_pointer) { - ASSERT(current != end); + bool* result_is_junk) { + ASSERT(*current != end); const int kDoubleSize = Double::kSignificandSize; const int kSingleSize = Single::kSignificandSize; const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; + *result_is_junk = true; + // Skip leading 0s. - while (*current == '0') { - ++current; - if (current == end) { - *trailing_pointer = end; + while (**current == '0') { + ++(*current); + if (*current == end) { + *result_is_junk = false; return SignedZero(sign); } } @@ -492,14 +549,14 @@ static double RadixStringToIeee(const char* current, do { int digit; - if (*current >= '0' && *current <= '9' && *current < '0' + radix) { - digit = static_cast(*current) - '0'; - } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) { - digit = static_cast(*current) - 'a' + 10; - } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) { - digit = static_cast(*current) - 'A' + 10; + if (IsDecimalDigitForRadix(**current, radix)) { + digit = static_cast(**current) - '0'; + } else if (IsCharacterDigitForRadix(**current, radix, 'a')) { + digit = static_cast(**current) - 'a' + 10; + } else if (IsCharacterDigitForRadix(**current, radix, 'A')) { + digit = static_cast(**current) - 'A' + 10; } else { - if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) { + if (allow_trailing_junk || !AdvanceToNonspace(current, end)) { break; } else { return junk_string_value; @@ -523,14 +580,14 @@ static double RadixStringToIeee(const char* current, exponent = overflow_bits_count; bool zero_tail = true; - while (true) { - ++current; - if (current == end || !isDigit(*current, radix)) break; - zero_tail = zero_tail && *current == '0'; + for (;;) { + ++(*current); + if (*current == end || !isDigit(**current, radix)) break; + zero_tail = zero_tail && **current == '0'; exponent += radix_log_2; } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + if (!allow_trailing_junk && AdvanceToNonspace(current, end)) { return junk_string_value; } @@ -552,13 +609,13 @@ static double RadixStringToIeee(const char* current, } break; } - ++current; - } while (current != end); + ++(*current); + } while (*current != end); ASSERT(number < ((int64_t)1 << kSignificandSize)); ASSERT(static_cast(static_cast(number)) == number); - *trailing_pointer = current; + *result_is_junk = false; if (exponent == 0) { if (sign) { @@ -573,13 +630,14 @@ static double RadixStringToIeee(const char* current, } +template double StringToDoubleConverter::StringToIeee( - const char* input, + Iterator input, int length, - int* processed_characters_count, - bool read_as_double) const { - const char* current = input; - const char* end = input + length; + bool read_as_double, + int* processed_characters_count) const { + Iterator current = input; + Iterator end = input + length; *processed_characters_count = 0; @@ -600,7 +658,7 @@ double StringToDoubleConverter::StringToIeee( if (allow_leading_spaces || allow_trailing_spaces) { if (!AdvanceToNonspace(¤t, end)) { - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return empty_string_value_; } if (!allow_leading_spaces && (input != current)) { @@ -626,7 +684,7 @@ double StringToDoubleConverter::StringToIeee( if (*current == '+' || *current == '-') { sign = (*current == '-'); ++current; - const char* next_non_space = current; + Iterator next_non_space = current; // Skip following spaces (if allowed). if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; if (!allow_spaces_after_sign && (current != next_non_space)) { @@ -649,7 +707,7 @@ double StringToDoubleConverter::StringToIeee( } ASSERT(buffer_pos == 0); - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return sign ? -Double::Infinity() : Double::Infinity(); } } @@ -668,7 +726,7 @@ double StringToDoubleConverter::StringToIeee( } ASSERT(buffer_pos == 0); - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return sign ? -Double::NaN() : Double::NaN(); } } @@ -677,7 +735,7 @@ double StringToDoubleConverter::StringToIeee( if (*current == '0') { ++current; if (current == end) { - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return SignedZero(sign); } @@ -690,17 +748,17 @@ double StringToDoubleConverter::StringToIeee( return junk_string_value_; // "0x". } - const char* tail_pointer = NULL; - double result = RadixStringToIeee<4>(current, + bool result_is_junk; + double result = RadixStringToIeee<4>(¤t, end, sign, allow_trailing_junk, junk_string_value_, read_as_double, - &tail_pointer); - if (tail_pointer != NULL) { - if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end); - *processed_characters_count = tail_pointer - input; + &result_is_junk); + if (!result_is_junk) { + if (allow_trailing_spaces) AdvanceToNonspace(¤t, end); + *processed_characters_count = static_cast(current - input); } return result; } @@ -709,7 +767,7 @@ double StringToDoubleConverter::StringToIeee( while (*current == '0') { ++current; if (current == end) { - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return SignedZero(sign); } } @@ -757,7 +815,7 @@ double StringToDoubleConverter::StringToIeee( while (*current == '0') { ++current; if (current == end) { - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return SignedZero(sign); } exponent--; // Move this 0 into the exponent. @@ -801,9 +859,9 @@ double StringToDoubleConverter::StringToIeee( return junk_string_value_; } } - char sign = '+'; + char exponen_sign = '+'; if (*current == '+' || *current == '-') { - sign = static_cast(*current); + exponen_sign = static_cast(*current); ++current; if (current == end) { if (allow_trailing_junk) { @@ -837,7 +895,7 @@ double StringToDoubleConverter::StringToIeee( ++current; } while (current != end && *current >= '0' && *current <= '9'); - exponent += (sign == '-' ? -num : num); + exponent += (exponen_sign == '-' ? -num : num); } if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { @@ -855,16 +913,17 @@ double StringToDoubleConverter::StringToIeee( if (octal) { double result; - const char* tail_pointer = NULL; - result = RadixStringToIeee<3>(buffer, + bool result_is_junk; + char* start = buffer; + result = RadixStringToIeee<3>(&start, buffer + buffer_pos, sign, allow_trailing_junk, junk_string_value_, read_as_double, - &tail_pointer); - ASSERT(tail_pointer != NULL); - *processed_characters_count = current - input; + &result_is_junk); + ASSERT(!result_is_junk); + *processed_characters_count = static_cast(current - input); return result; } @@ -882,8 +941,42 @@ double StringToDoubleConverter::StringToIeee( } else { converted = Strtof(Vector(buffer, buffer_pos), exponent); } - *processed_characters_count = current - input; + *processed_characters_count = static_cast(current - input); return sign? -converted: converted; } + +double StringToDoubleConverter::StringToDouble( + const char* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +double StringToDoubleConverter::StringToDouble( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +float StringToDoubleConverter::StringToFloat( + const char* buffer, + int length, + int* processed_characters_count) const { + return static_cast(StringToIeee(buffer, length, false, + processed_characters_count)); +} + + +float StringToDoubleConverter::StringToFloat( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return static_cast(StringToIeee(buffer, length, false, + processed_characters_count)); +} + } // namespace kenlm_double_conversion diff --git a/native_client/kenlm/util/double-conversion/double-conversion.h b/native_client/kenlm/util/double-conversion/double-conversion.h index d3a57c05..62e5bbf0 100644 --- a/native_client/kenlm/util/double-conversion/double-conversion.h +++ b/native_client/kenlm/util/double-conversion/double-conversion.h @@ -415,9 +415,10 @@ class StringToDoubleConverter { // junk, too. // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of // a double literal. - // - ALLOW_LEADING_SPACES: skip over leading spaces. - // - ALLOW_TRAILING_SPACES: ignore trailing spaces. - // - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign. + // - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces, + // new-lines, and tabs. + // - ALLOW_TRAILING_SPACES: ignore trailing whitespace. + // - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign. // Ex: StringToDouble("- 123.2") -> -123.2. // StringToDouble("+ 123.2") -> 123.2 // @@ -502,19 +503,24 @@ class StringToDoubleConverter { // in the 'processed_characters_count'. Trailing junk is never included. double StringToDouble(const char* buffer, int length, - int* processed_characters_count) const { - return StringToIeee(buffer, length, processed_characters_count, true); - } + int* processed_characters_count) const; + + // Same as StringToDouble above but for 16 bit characters. + double StringToDouble(const uc16* buffer, + int length, + int* processed_characters_count) const; // Same as StringToDouble but reads a float. // Note that this is not equivalent to static_cast(StringToDouble(...)) // due to potential double-rounding. float StringToFloat(const char* buffer, int length, - int* processed_characters_count) const { - return static_cast(StringToIeee(buffer, length, - processed_characters_count, false)); - } + int* processed_characters_count) const; + + // Same as StringToFloat above but for 16 bit characters. + float StringToFloat(const uc16* buffer, + int length, + int* processed_characters_count) const; private: const int flags_; @@ -523,10 +529,11 @@ class StringToDoubleConverter { const char* const infinity_symbol_; const char* const nan_symbol_; - double StringToIeee(const char* buffer, + template + double StringToIeee(Iterator start_pointer, int length, - int* processed_characters_count, - bool read_as_double) const; + bool read_as_double, + int* processed_characters_count) const; DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter); }; diff --git a/native_client/kenlm/util/double-conversion/fast-dtoa.cc b/native_client/kenlm/util/double-conversion/fast-dtoa.cc index ff2936d6..1a3d8496 100644 --- a/native_client/kenlm/util/double-conversion/fast-dtoa.cc +++ b/native_client/kenlm/util/double-conversion/fast-dtoa.cc @@ -248,10 +248,7 @@ static void BiggestPowerTen(uint32_t number, // Note: kPowersOf10[i] == 10^(i-1). exponent_plus_one_guess++; // We don't have any guarantees that 2^number_bits <= number. - // TODO(floitsch): can we change the 'while' into an 'if'? We definitely see - // number < (2^number_bits - 1), but I haven't encountered - // number < (2^number_bits - 2) yet. - while (number < kSmallPowersOfTen[exponent_plus_one_guess]) { + if (number < kSmallPowersOfTen[exponent_plus_one_guess]) { exponent_plus_one_guess--; } *power = kSmallPowersOfTen[exponent_plus_one_guess]; @@ -350,7 +347,8 @@ static bool DigitGen(DiyFp low, // that is smaller than integrals. while (*kappa > 0) { int digit = integrals / divisor; - buffer[*length] = '0' + digit; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); (*length)++; integrals %= divisor; (*kappa)--; @@ -379,13 +377,14 @@ static bool DigitGen(DiyFp low, ASSERT(one.e() >= -60); ASSERT(fractionals < one.f()); ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - while (true) { + for (;;) { fractionals *= 10; unit *= 10; unsafe_interval.set_f(unsafe_interval.f() * 10); // Integer division by one. int digit = static_cast(fractionals >> -one.e()); - buffer[*length] = '0' + digit; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); (*length)++; fractionals &= one.f() - 1; // Modulo by one. (*kappa)--; @@ -459,7 +458,8 @@ static bool DigitGenCounted(DiyFp w, // that is smaller than 'integrals'. while (*kappa > 0) { int digit = integrals / divisor; - buffer[*length] = '0' + digit; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); (*length)++; requested_digits--; integrals %= divisor; @@ -492,7 +492,8 @@ static bool DigitGenCounted(DiyFp w, w_error *= 10; // Integer division by one. int digit = static_cast(fractionals >> -one.e()); - buffer[*length] = '0' + digit; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); (*length)++; requested_digits--; fractionals &= one.f() - 1; // Modulo by one. diff --git a/native_client/kenlm/util/double-conversion/fixed-dtoa.cc b/native_client/kenlm/util/double-conversion/fixed-dtoa.cc index a1a16a62..d5eec223 100644 --- a/native_client/kenlm/util/double-conversion/fixed-dtoa.cc +++ b/native_client/kenlm/util/double-conversion/fixed-dtoa.cc @@ -25,7 +25,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include +#include #include "fixed-dtoa.h" #include "ieee.h" @@ -98,7 +98,7 @@ class UInt128 { return high_bits_ == 0 && low_bits_ == 0; } - int BitAt(int position) { + int BitAt(int position) const { if (position >= 64) { return static_cast(high_bits_ >> (position - 64)) & 1; } else { @@ -133,7 +133,7 @@ static void FillDigits32(uint32_t number, Vector buffer, int* length) { while (number != 0) { int digit = number % 10; number /= 10; - buffer[(*length) + number_length] = '0' + digit; + buffer[(*length) + number_length] = static_cast('0' + digit); number_length++; } // Exchange the digits. @@ -150,7 +150,7 @@ static void FillDigits32(uint32_t number, Vector buffer, int* length) { } -static void FillDigits64FixedLength(uint64_t number, int requested_length, +static void FillDigits64FixedLength(uint64_t number, Vector buffer, int* length) { const uint32_t kTen7 = 10000000; // For efficiency cut the number into 3 uint32_t parts, and print those. @@ -253,12 +253,14 @@ static void FillFractionals(uint64_t fractionals, int exponent, fractionals *= 5; point--; int digit = static_cast(fractionals >> point); - buffer[*length] = '0' + digit; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); (*length)++; fractionals -= static_cast(digit) << point; } // If the first bit after the point is set we have to round up. - if (((fractionals >> (point - 1)) & 1) == 1) { + ASSERT(fractionals == 0 || point - 1 >= 0); + if ((fractionals != 0) && ((fractionals >> (point - 1)) & 1) == 1) { RoundUp(buffer, length, decimal_point); } } else { // We need 128 bits. @@ -274,7 +276,8 @@ static void FillFractionals(uint64_t fractionals, int exponent, fractionals128.Multiply(5); point--; int digit = fractionals128.DivModPowerOf2(point); - buffer[*length] = '0' + digit; + ASSERT(digit <= 9); + buffer[*length] = static_cast('0' + digit); (*length)++; } if (fractionals128.BitAt(point - 1) == 1) { @@ -358,7 +361,7 @@ bool FastFixedDtoa(double v, remainder = (dividend % divisor) << exponent; } FillDigits32(quotient, buffer, length); - FillDigits64FixedLength(remainder, divisor_power, buffer, length); + FillDigits64FixedLength(remainder, buffer, length); *decimal_point = *length; } else if (exponent >= 0) { // 0 <= exponent <= 11 diff --git a/native_client/kenlm/util/double-conversion/ieee.h b/native_client/kenlm/util/double-conversion/ieee.h index ee11508f..1525d1b2 100644 --- a/native_client/kenlm/util/double-conversion/ieee.h +++ b/native_client/kenlm/util/double-conversion/ieee.h @@ -99,7 +99,7 @@ class Double { } double PreviousDouble() const { - if (d64_ == (kInfinity | kSignMask)) return -Double::Infinity(); + if (d64_ == (kInfinity | kSignMask)) return -Infinity(); if (Sign() < 0) { return Double(d64_ + 1).value(); } else { @@ -256,6 +256,8 @@ class Double { return (significand & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize); } + + DISALLOW_COPY_AND_ASSIGN(Double); }; class Single { @@ -391,6 +393,8 @@ class Single { static const uint32_t kNaN = 0x7FC00000; const uint32_t d32_; + + DISALLOW_COPY_AND_ASSIGN(Single); }; } // namespace kenlm_double_conversion diff --git a/native_client/kenlm/util/double-conversion/strtod.cc b/native_client/kenlm/util/double-conversion/strtod.cc index 2c66e6e5..33e68e1c 100644 --- a/native_client/kenlm/util/double-conversion/strtod.cc +++ b/native_client/kenlm/util/double-conversion/strtod.cc @@ -25,8 +25,8 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include -#include +#include +#include #include "strtod.h" #include "bignum.h" @@ -137,6 +137,7 @@ static void TrimAndCut(Vector buffer, int exponent, Vector right_trimmed = TrimTrailingZeros(left_trimmed); exponent += left_trimmed.length() - right_trimmed.length(); if (right_trimmed.length() > kMaxSignificantDecimalDigits) { + (void) space_size; // Mark variable as used. ASSERT(space_size >= kMaxSignificantDecimalDigits); CutToMaxSignificantDigits(right_trimmed, exponent, buffer_copy_space, updated_exponent); @@ -263,7 +264,6 @@ static DiyFp AdjustmentPowerOfTen(int exponent) { case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40); default: UNREACHABLE(); - return DiyFp(0, 0); } } @@ -286,7 +286,7 @@ static bool DiyFpStrtod(Vector buffer, const int kDenominator = 1 << kDenominatorLog; // Move the remaining decimals into the exponent. exponent += remaining_decimals; - int error = (remaining_decimals == 0 ? 0 : kDenominator / 2); + uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2); int old_e = input.e(); input.Normalize(); @@ -506,9 +506,7 @@ float Strtof(Vector buffer, int exponent) { double double_previous = Double(double_guess).PreviousDouble(); float f1 = static_cast(double_previous); -#ifndef NDEBUG float f2 = float_guess; -#endif float f3 = static_cast(double_next); float f4; if (is_correct) { @@ -517,9 +515,8 @@ float Strtof(Vector buffer, int exponent) { double double_next2 = Double(double_next).NextDouble(); f4 = static_cast(double_next2); } -#ifndef NDEBUG + (void) f2; // Mark variable as used. ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4); -#endif // If the guess doesn't lie near a single-precision boundary we can simply // return its float-value. diff --git a/native_client/kenlm/util/double-conversion/utils.h b/native_client/kenlm/util/double-conversion/utils.h index ae40b116..41386253 100644 --- a/native_client/kenlm/util/double-conversion/utils.h +++ b/native_client/kenlm/util/double-conversion/utils.h @@ -33,14 +33,29 @@ #include #ifndef ASSERT -#define ASSERT(condition) (assert(condition)) +#define ASSERT(condition) \ + assert(condition); #endif #ifndef UNIMPLEMENTED #define UNIMPLEMENTED() (abort()) #endif +#ifndef DOUBLE_CONVERSION_NO_RETURN +#ifdef _MSC_VER +#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) +#else +#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) +#endif +#endif #ifndef UNREACHABLE +#ifdef _MSC_VER +void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); +inline void abort_noreturn() { abort(); } +#define UNREACHABLE() (abort_noreturn()) +#else #define UNREACHABLE() (abort()) #endif +#endif + // Double operations detection based on target architecture. // Linux uses a 80bit wide floating point stack on x86. This induces double @@ -55,11 +70,17 @@ #if defined(_M_X64) || defined(__x86_64__) || \ defined(__ARMEL__) || defined(__avr32__) || \ defined(__hppa__) || defined(__ia64__) || \ - defined(__mips__) || defined(__powerpc__) || \ + defined(__mips__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ defined(__SH4__) || defined(__alpha__) || \ - defined(_MIPS_ARCH_MIPS32R2) || defined(__aarch64__) + defined(_MIPS_ARCH_MIPS32R2) || \ + defined(__AARCH64EL__) || defined(__aarch64__) || \ + defined(__riscv) #define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#elif defined(__mc68000__) +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS #elif defined(_M_IX86) || defined(__i386__) || defined(__i386) #if defined(_WIN32) // Windows uses a 64bit wide floating point stack. @@ -71,6 +92,11 @@ #error Target architecture was not detected as supported by Double-Conversion. #endif +#if defined(__GNUC__) +#define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) +#else +#define DOUBLE_CONVERSION_UNUSED +#endif #if defined(_WIN32) && !defined(__MINGW32__) @@ -90,6 +116,8 @@ typedef unsigned __int64 uint64_t; #endif +typedef uint16_t uc16; + // The following macro works on both 32 and 64-bit platforms. // Usage: instead of writing 0x1234567890123456 // write UINT64_2PART_C(0x12345678,90123456); @@ -155,8 +183,8 @@ template class Vector { public: Vector() : start_(NULL), length_(0) {} - Vector(T* data, int length) : start_(data), length_(length) { - ASSERT(length == 0 || (length > 0 && data != NULL)); + Vector(T* data, int len) : start_(data), length_(len) { + ASSERT(len == 0 || (len > 0 && data != NULL)); } // Returns a vector using the same backing storage as this one, @@ -198,8 +226,8 @@ class Vector { // buffer bounds on all operations in debug mode. class StringBuilder { public: - StringBuilder(char* buffer, int size) - : buffer_(buffer, size), position_(0) { } + StringBuilder(char* buffer, int buffer_size) + : buffer_(buffer, buffer_size), position_(0) { } ~StringBuilder() { if (!is_finalized()) Finalize(); } @@ -218,8 +246,7 @@ class StringBuilder { // 0-characters; use the Finalize() method to terminate the string // instead. void AddCharacter(char c) { - // I just extract raw data not a cstr so null is fine. - //ASSERT(c != '\0'); + ASSERT(c != '\0'); ASSERT(!is_finalized() && position_ < buffer_.length()); buffer_[position_++] = c; } @@ -234,8 +261,7 @@ class StringBuilder { // builder. The input string must have enough characters. void AddSubstring(const char* s, int n) { ASSERT(!is_finalized() && position_ + n < buffer_.length()); - // I just extract raw data not a cstr so null is fine. - //ASSERT(static_cast(n) <= strlen(s)); + ASSERT(static_cast(n) <= strlen(s)); memmove(&buffer_[position_], s, n * kCharSize); position_ += n; } @@ -255,8 +281,7 @@ class StringBuilder { buffer_[position_] = '\0'; // Make sure nobody managed to add a 0-character to the // buffer while building the string. - // I just extract raw data not a cstr so null is fine. - //ASSERT(strlen(buffer_.start()) == static_cast(position_)); + ASSERT(strlen(buffer_.start()) == static_cast(position_)); position_ = -1; ASSERT(is_finalized()); return buffer_.start(); @@ -299,11 +324,8 @@ template inline Dest BitCast(const Source& source) { // Compile time assertion: sizeof(Dest) == sizeof(Source) // A compile error here means your Dest and Source have different sizes. - typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1] -#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8 - __attribute__((unused)) -#endif - ; + DOUBLE_CONVERSION_UNUSED + typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; Dest dest; memmove(&dest, &source, sizeof(dest)); diff --git a/native_client/kenlm/util/exception.hh b/native_client/kenlm/util/exception.hh index 03543a9b..614a88fa 100644 --- a/native_client/kenlm/util/exception.hh +++ b/native_client/kenlm/util/exception.hh @@ -134,7 +134,7 @@ class OverflowException : public Exception { template inline std::size_t CheckOverflowInternal(uint64_t value) { UTIL_THROW_IF(value > static_cast(std::numeric_limits::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code."); - return value; + return static_cast(value); } template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) { diff --git a/native_client/kenlm/util/file.cc b/native_client/kenlm/util/file.cc index b5e36b20..1a70387e 100644 --- a/native_client/kenlm/util/file.cc +++ b/native_client/kenlm/util/file.cc @@ -490,7 +490,7 @@ int mkstemp_and_unlink(char *tmpl) { int ret = mkstemp(tmpl); if (ret != -1) { - UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl); + UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting " << tmpl); } return ret; } diff --git a/native_client/kenlm/util/file_piece.hh b/native_client/kenlm/util/file_piece.hh index 67b28848..042a78e9 100644 --- a/native_client/kenlm/util/file_piece.hh +++ b/native_client/kenlm/util/file_piece.hh @@ -103,7 +103,7 @@ class FilePiece { if (position_ == position_end_) { try { Shift(); - } catch (const util::EndOfFileException &e) { return false; } + } catch (const util::EndOfFileException &) { return false; } // And break out at end of file. if (position_ == position_end_) return false; } diff --git a/native_client/kenlm/util/mmap.cc b/native_client/kenlm/util/mmap.cc index 4da5a975..39b9cd59 100644 --- a/native_client/kenlm/util/mmap.cc +++ b/native_client/kenlm/util/mmap.cc @@ -142,7 +142,7 @@ void UnmapOrThrow(void *start, size_t length) { #if defined(_WIN32) || defined(_WIN64) UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file"); #else - UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed"); + UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed with " << start << " for length " << length); #endif } diff --git a/native_client/kenlm/util/probing_hash_table.hh b/native_client/kenlm/util/probing_hash_table.hh index 438de92f..1d45b619 100644 --- a/native_client/kenlm/util/probing_hash_table.hh +++ b/native_client/kenlm/util/probing_hash_table.hh @@ -30,7 +30,7 @@ class DivMod { public: explicit DivMod(std::size_t buckets) : buckets_(buckets) {} - static std::size_t RoundBuckets(std::size_t from) { + static uint64_t RoundBuckets(uint64_t from) { return from; } @@ -58,7 +58,7 @@ class Power2Mod { } // Round up to next power of 2. - static std::size_t RoundBuckets(std::size_t from) { + static uint64_t RoundBuckets(uint64_t from) { --from; from |= from >> 1; from |= from >> 2; diff --git a/native_client/kenlm/util/tokenize_piece.hh b/native_client/kenlm/util/tokenize_piece.hh index 14ff9885..f5ce3367 100644 --- a/native_client/kenlm/util/tokenize_piece.hh +++ b/native_client/kenlm/util/tokenize_piece.hh @@ -5,10 +5,9 @@ #include "util/spaces.hh" #include "util/string_piece.hh" -#include - #include #include +#include namespace util { @@ -97,12 +96,12 @@ class AnyCharacterLast { StringPiece chars_; }; -template class TokenIter : public boost::iterator_facade, const StringPiece, boost::forward_traversal_tag> { +template class TokenIter : public std::iterator { public: TokenIter() {} template TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) { - increment(); + ++*this; } bool operator!() const { @@ -116,10 +115,15 @@ template class TokenIter : public boost::it return TokenIter(); } - private: - friend class boost::iterator_core_access; + bool operator==(const TokenIter &other) const { + return current_.data() == other.current_.data(); + } - void increment() { + bool operator!=(const TokenIter &other) const { + return !(*this == other); + } + + TokenIter &operator++() { do { StringPiece found(finder_.Find(after_)); current_ = StringPiece(after_.data(), found.data() - after_.data()); @@ -129,17 +133,25 @@ template class TokenIter : public boost::it after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size()); } } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false. + return *this; } - bool equal(const TokenIter &other) const { - return current_.data() == other.current_.data(); + TokenIter &operator++(int) { + TokenIter ret(*this); + ++*this; + return ret; } - const StringPiece &dereference() const { + const StringPiece &operator*() const { UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens"); return current_; } + const StringPiece *operator->() const { + UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens"); + return ¤t_; + } + private: StringPiece current_; StringPiece after_; From 7c0354483ed4eac8cfacfb8e72dbedb56f3082b2 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 15 Jan 2020 23:28:35 +0100 Subject: [PATCH 02/16] Stop including vocabulary data in LM.binary. --- data/lm/README.rst | 2 +- data/lm/generate_lm.py | 1 + data/lm/lm.binary | 4 +- native_client/ctcdecode/scorer.cpp | 79 +++++++++++------------------- 4 files changed, 32 insertions(+), 54 deletions(-) diff --git a/data/lm/README.rst b/data/lm/README.rst index add2b195..bd2c2d3b 100644 --- a/data/lm/README.rst +++ b/data/lm/README.rst @@ -1,5 +1,5 @@ -lm.binary was generated from the LibriSpeech normalized LM training text, available `here `_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). KenLM's built binaries must be in your PATH (lmplz, build_binary, filter). +lm.binary was generated from the LibriSpeech normalized LM training text, available `here `_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). `KenLM `_'s built binaries must be in your PATH (lmplz, build_binary, filter). The trie was then generated from the vocabulary of the language model: diff --git a/data/lm/generate_lm.py b/data/lm/generate_lm.py index 82fe6468..6dc320a5 100644 --- a/data/lm/generate_lm.py +++ b/data/lm/generate_lm.py @@ -50,6 +50,7 @@ def main(): subprocess.check_call([ 'build_binary', '-a', '255', '-q', '8', + '-v', 'trie', filtered_path, 'lm.binary' diff --git a/data/lm/lm.binary b/data/lm/lm.binary index 16e7d6d9..22584cb1 100644 --- a/data/lm/lm.binary +++ b/data/lm/lm.binary @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a24953ce3f013bbf5f4a1c9f5a0e5482bc56eaa81638276de522f39e62ff3a56 -size 945699324 +oid sha256:cc8d9e5f49e2fa05c56cc928520c6c79cb78ff95226ec9a07785b3a28d1a680b +size 941235601 diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 82197c79..79dbef8b 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -77,63 +77,40 @@ void Scorer::setup(const std::string& lm_path, const std::string& trie_path) VALID_CHECK_EQ(access(filename, R_OK), 0, "Invalid language model path"); bool has_trie = trie_path.size() && access(trie_path.c_str(), R_OK) == 0; + VALID_CHECK(has_trie, "Invalid trie path"); lm::ngram::Config config; + config.load_method = util::LoadMethod::LAZY; + language_model_.reset(lm::ngram::LoadVirtual(filename, config)); - if (!has_trie) { // no trie was specified, build it now - RetrieveStrEnumerateVocab enumerate; - config.enumerate_vocab = &enumerate; - language_model_.reset(lm::ngram::LoadVirtual(filename, config)); - auto vocab = enumerate.vocabulary; - for (size_t i = 0; i < vocab.size(); ++i) { - if (vocab[i] != UNK_TOKEN && - vocab[i] != START_TOKEN && - vocab[i] != END_TOKEN && - get_utf8_str_len(vocab[i]) > 1) { - is_utf8_mode_ = false; - break; - } - } + // Read metadata and trie from file + std::ifstream fin(trie_path, std::ios::binary); - if (alphabet_.GetSize() != 255) { - is_utf8_mode_ = false; - } - - // Add spaces only in word-based scoring - fill_dictionary(vocab); - } else { - config.load_method = util::LoadMethod::LAZY; - language_model_.reset(lm::ngram::LoadVirtual(filename, config)); - - // Read metadata and trie from file - std::ifstream fin(trie_path, std::ios::binary); - - int magic; - fin.read(reinterpret_cast(&magic), sizeof(magic)); - if (magic != MAGIC) { - std::cerr << "Error: Can't parse trie file, invalid header. Try updating " - "your trie file." << std::endl; - throw 1; - } - - int version; - fin.read(reinterpret_cast(&version), sizeof(version)); - if (version != FILE_VERSION) { - std::cerr << "Error: Trie file version mismatch (" << version - << " instead of expected " << FILE_VERSION - << "). Update your trie file." - << std::endl; - throw 1; - } - - fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); - - fst::FstReadOptions opt; - opt.mode = fst::FstReadOptions::MAP; - opt.source = trie_path; - dictionary.reset(FstType::Read(fin, opt)); + int magic; + fin.read(reinterpret_cast(&magic), sizeof(magic)); + if (magic != MAGIC) { + std::cerr << "Error: Can't parse trie file, invalid header. Try updating " + "your trie file." << std::endl; + throw 1; } + int version; + fin.read(reinterpret_cast(&version), sizeof(version)); + if (version != FILE_VERSION) { + std::cerr << "Error: Trie file version mismatch (" << version + << " instead of expected " << FILE_VERSION + << "). Update your trie file." + << std::endl; + throw 1; + } + + fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + + fst::FstReadOptions opt; + opt.mode = fst::FstReadOptions::MAP; + opt.source = trie_path; + dictionary_.reset(FstType::Read(fin, opt)); + max_order_ = language_model_->Order(); } From be2229ef29032f3be1c92b5f6b124c93f80e79d7 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jan 2020 11:34:33 +0100 Subject: [PATCH 03/16] Refactor Scorer so model/trie package can be created by an external tool --- native_client/ctcdecode/__init__.py | 11 ++- .../ctcdecode/ctc_beam_search_decoder.cpp | 2 +- native_client/ctcdecode/scorer.cpp | 74 +++++++++++-------- native_client/ctcdecode/scorer.h | 25 ++++--- native_client/ctcdecode/swigwrapper.i | 4 + 5 files changed, 74 insertions(+), 42 deletions(-) diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index e33eeb5c..71432a7c 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function from . import swigwrapper # pylint: disable=import-self +from .swigwrapper import Alphabet __version__ = swigwrapper.__version__ @@ -16,7 +17,6 @@ class Scorer(swigwrapper.Scorer): :alphabet: Alphabet :type model_path: basestring """ - def __init__(self, alpha, beta, model_path, trie_path, alphabet): super(Scorer, self).__init__() serialized = alphabet.serialize() @@ -32,6 +32,15 @@ class Scorer(swigwrapper.Scorer): if err != 0: raise ValueError("Scorer initialization failed with error code {}".format(err), err) + def __init__(self): + super(Scorer, self).__init__() + + def load_lm(self, lm_path, trie_path): + super(Scorer, self).load_lm(lm_path.encode('utf-8'), trie_path.encode('utf-8')) + + def save_dictionary(self, save_path): + super(Scorer, self).save_dictionary(save_path.encode('utf-8')) + def ctc_beam_search_decoder(probs_seq, alphabet, diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 7ec00f2f..852ef34c 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -36,7 +36,7 @@ DecoderState::init(const Alphabet& alphabet, prefix_root_.reset(root); prefixes_.push_back(root); - if (ext_scorer != nullptr) { + if (ext_scorer != nullptr && (bool)ext_scorer_->dictionary) { // no need for std::make_shared<>() since Copy() does 'new' behind the doors auto dict_ptr = std::shared_ptr(ext_scorer->dictionary->Copy(true)); root->set_dictionary(dict_ptr); diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 79dbef8b..3180724f 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -38,7 +38,8 @@ Scorer::init(double alpha, { reset_params(alpha, beta); alphabet_ = alphabet; - setup(lm_path, trie_path); + setup_char_map(); + load_lm(lm_path, trie_path); return 0; } @@ -54,11 +55,19 @@ Scorer::init(double alpha, if (err != 0) { return err; } - setup(lm_path, trie_path); + setup_char_map(); + load_lm(lm_path, trie_path); return 0; } -void Scorer::setup(const std::string& lm_path, const std::string& trie_path) +void +Scorer::set_alphabet(const Alphabet& alphabet) +{ + alphabet_ = alphabet; + setup_char_map(); +} + +void Scorer::setup_char_map() { // (Re-)Initialize character map char_map_.clear(); @@ -71,52 +80,57 @@ void Scorer::setup(const std::string& lm_path, const std::string& trie_path) // state, otherwise wrong decoding results would be given. char_map_[alphabet_.StringFromLabel(i)] = i + 1; } +} +void Scorer::load_lm(const std::string& lm_path, const std::string& trie_path) +{ // load language model const char* filename = lm_path.c_str(); VALID_CHECK_EQ(access(filename, R_OK), 0, "Invalid language model path"); bool has_trie = trie_path.size() && access(trie_path.c_str(), R_OK) == 0; - VALID_CHECK(has_trie, "Invalid trie path"); + // VALID_CHECK(has_trie, "Invalid trie path"); lm::ngram::Config config; config.load_method = util::LoadMethod::LAZY; language_model_.reset(lm::ngram::LoadVirtual(filename, config)); - // Read metadata and trie from file - std::ifstream fin(trie_path, std::ios::binary); + if (has_trie) { + // Read metadata and trie from file + std::ifstream fin(trie_path, std::ios::binary); - int magic; - fin.read(reinterpret_cast(&magic), sizeof(magic)); - if (magic != MAGIC) { - std::cerr << "Error: Can't parse trie file, invalid header. Try updating " - "your trie file." << std::endl; - throw 1; + int magic; + fin.read(reinterpret_cast(&magic), sizeof(magic)); + if (magic != MAGIC) { + std::cerr << "Error: Can't parse trie file, invalid header. Try updating " + "your trie file." << std::endl; + throw 1; + } + + int version; + fin.read(reinterpret_cast(&version), sizeof(version)); + if (version != FILE_VERSION) { + std::cerr << "Error: Trie file version mismatch (" << version + << " instead of expected " << FILE_VERSION + << "). Update your trie file." + << std::endl; + throw 1; + } + + fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + + fst::FstReadOptions opt; + opt.mode = fst::FstReadOptions::MAP; + opt.source = trie_path; + dictionary.reset(FstType::Read(fin, opt)); } - int version; - fin.read(reinterpret_cast(&version), sizeof(version)); - if (version != FILE_VERSION) { - std::cerr << "Error: Trie file version mismatch (" << version - << " instead of expected " << FILE_VERSION - << "). Update your trie file." - << std::endl; - throw 1; - } - - fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); - - fst::FstReadOptions opt; - opt.mode = fst::FstReadOptions::MAP; - opt.source = trie_path; - dictionary_.reset(FstType::Read(fin, opt)); - max_order_ = language_model_->Order(); } void Scorer::save_dictionary(const std::string& path) { - std::ofstream fout(path, std::ios::binary); + std::fstream fout(path, std::ios::in|std::ios::out|std::ios::binary|std::ios::ate); fout.write(reinterpret_cast(&MAGIC), sizeof(MAGIC)); fout.write(reinterpret_cast(&FILE_VERSION), sizeof(FILE_VERSION)); fout.write(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index 96c092b0..f6c7d7bb 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -40,9 +40,9 @@ public: * scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); */ class Scorer { +public: using FstType = PathTrie::FstType; -public: Scorer() = default; ~Scorer() = default; @@ -76,12 +76,15 @@ public: // return the max order size_t get_max_order() const { return max_order_; } - // retrun true if the language model is character based + // return true if the language model is character based bool is_utf8_mode() const { return is_utf8_mode_; } // reset params alpha & beta void reset_params(float alpha, float beta); + // force set UTF-8 mode, ignore value read from file + void set_utf8_mode(bool utf8) { is_utf8_mode_ = utf8; } + // make ngram for a given prefix std::vector make_ngram(PathTrie *prefix); @@ -89,12 +92,20 @@ public: // the vector of characters (character based lm) std::vector split_labels_into_scored_units(const std::vector &labels); + void set_alphabet(const Alphabet& alphabet); + // save dictionary in file void save_dictionary(const std::string &path); // return weather this step represents a boundary where beam scoring should happen bool is_scoring_boundary(PathTrie* prefix, size_t new_label); + // fill dictionary FST from a vocabulary + void fill_dictionary(const std::vector &vocabulary); + + // load language model from given path + void load_lm(const std::string &lm_path, const std::string &trie_path); + // language model weight double alpha = 0.; // word insertion weight @@ -104,14 +115,8 @@ public: std::unique_ptr dictionary; protected: - // necessary setup: load language model, fill FST's dictionary - void setup(const std::string &lm_path, const std::string &trie_path); - - // load language model from given path - void load_lm(const std::string &lm_path); - - // fill dictionary for FST - void fill_dictionary(const std::vector &vocabulary); + // necessary setup after setting alphabet + void setup_char_map(); private: std::unique_ptr language_model_; diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index d3e70898..af3a1952 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -16,6 +16,10 @@ import_array(); %} +namespace std { + %template(StringVector) vector; +} + // Convert NumPy arrays to pointer+lengths %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(const double *probs, int time_dim, int class_dim)}; %apply (double* IN_ARRAY3, int DIM1, int DIM2, int DIM3) {(const double *probs, int batch_size, int time_dim, int class_dim)}; From 214b50f4904fe144589047d9e44b7addc1dbc25d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jan 2020 15:45:57 +0100 Subject: [PATCH 04/16] Add generate_package tool to create combined scorer package --- data/lm/generate_package.py | 120 ++++++++++++++++++++++++++++ native_client/ctcdecode/__init__.py | 33 ++++---- native_client/ctcdecode/scorer.cpp | 10 ++- native_client/ctcdecode/scorer.h | 2 +- 4 files changed, 145 insertions(+), 20 deletions(-) create mode 100644 data/lm/generate_package.py diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py new file mode 100644 index 00000000..ee3c106b --- /dev/null +++ b/data/lm/generate_package.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +from __future__ import absolute_import, division, print_function + +# Make sure we can import stuff from util/ +# This script needs to be run from the root of the DeepSpeech repository +import os +import sys +sys.path.insert(1, os.path.join(sys.path[0], '..', '..')) + +import argparse +import shutil + +from util.text import Alphabet, UTF8Alphabet +from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet + + +def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8): + words = set() + vocab_looks_char_based = True + with open(vocab_path) as fin: + for line in fin: + for word in line.split(): + words.add(word.encode('utf-8')) + if len(word) > 1: + vocab_looks_char_based = False + print("{} unique words read from vocabulary file.".format(len(words))) + print( + "{} like a character based model.".format( + "Looks" if vocab_looks_char_based else "Doesn't look" + ) + ) + + if force_utf8 != None: + use_utf8 = force_utf8.value + else: + use_utf8 = vocab_looks_char_based + + if use_utf8: + serialized_alphabet = UTF8Alphabet().serialize() + else: + serialized_alphabet = Alphabet(alphabet_path).serialize() + + alphabet = NativeAlphabet() + err = alphabet.deserialize(serialized_alphabet, len(serialized_alphabet)) + if err != 0: + print("Error loading alphabet: {}".format(err)) + sys.exit(1) + + scorer = Scorer() + scorer.set_alphabet(alphabet) + scorer.set_utf8_mode(use_utf8) + scorer.load_lm(lm_path, "") + scorer.fill_dictionary(list(words)) + shutil.copy(lm_path, package_path) + scorer.save_dictionary(package_path, True) # append, not overwrite + print('Package created in {}'.format(package_path)) + + +class Tristate(object): + def __init__(self, value=None): + if any(value is v for v in (True, False, None)): + self.value = value + else: + raise ValueError("Tristate value must be True, False, or None") + + def __eq__(self, other): + return (self.value is other.value if isinstance(other, Tristate) + else self.value is other) + + def __ne__(self, other): + return not self == other + + def __bool__(self): + raise TypeError("Tristate object may not be used as a Boolean") + + def __str__(self): + return str(self.value) + + def __repr__(self): + return "Tristate(%s)" % self.value + + +def main(): + parser = argparse.ArgumentParser( + description="Generate an external scorer package for DeepSpeech." + ) + parser.add_argument( + "--alphabet", + help="Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using UTF-8 mode.", + ) + parser.add_argument( + "--lm", + required=True, + help="Path of KenLM binary LM file. Must be built without including the vocabulary (use the -v flag). See generate_lm.py for how to create a binary LM.", + ) + parser.add_argument( + "--vocab", + required=True, + help="Path of vocabulary file. Must contain words separated by whitespace.", + ) + parser.add_argument("--package", required=True, help="Path to save scorer package.") + parser.add_argument( + "--force_utf8", + default="", + help="Boolean flag, force set or unset UTF-8 mode in the scorer package. If not set, infers from the vocabulary.", + ) + args = parser.parse_args() + + if args.force_utf8 in ("True", "1", "true", "yes", "y"): + force_utf8 = Tristate(True) + elif args.force_utf8 in ("False", "0", "false", "no", "n"): + force_utf8 = Tristate(False) + else: + force_utf8 = Tristate(None) + + create_bundle(args.alphabet, args.lm, args.vocab, args.package, force_utf8) + + +if __name__ == "__main__": + main() diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 71432a7c..3fab4eb7 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -17,29 +17,28 @@ class Scorer(swigwrapper.Scorer): :alphabet: Alphabet :type model_path: basestring """ - def __init__(self, alpha, beta, model_path, trie_path, alphabet): + def __init__(self, alpha=None, beta=None, model_path=None, trie_path=None, alphabet=None): super(Scorer, self).__init__() - serialized = alphabet.serialize() - native_alphabet = swigwrapper.Alphabet() - err = native_alphabet.deserialize(serialized, len(serialized)) - if err != 0: - raise ValueError("Error when deserializing alphabet.") + # Allow bare initialization + if alphabet: + serialized = alphabet.serialize() + native_alphabet = swigwrapper.Alphabet() + err = native_alphabet.deserialize(serialized, len(serialized)) + if err != 0: + raise ValueError("Error when deserializing alphabet.") - err = self.init(alpha, beta, - model_path.encode('utf-8'), - trie_path.encode('utf-8'), - native_alphabet) - if err != 0: - raise ValueError("Scorer initialization failed with error code {}".format(err), err) - - def __init__(self): - super(Scorer, self).__init__() + err = self.init(alpha, beta, + model_path.encode('utf-8'), + trie_path.encode('utf-8'), + native_alphabet) + if err != 0: + raise ValueError("Scorer initialization failed with error code {}".format(err), err) def load_lm(self, lm_path, trie_path): super(Scorer, self).load_lm(lm_path.encode('utf-8'), trie_path.encode('utf-8')) - def save_dictionary(self, save_path): - super(Scorer, self).save_dictionary(save_path.encode('utf-8')) + def save_dictionary(self, save_path, *args, **kwargs): + super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs) def ctc_beam_search_decoder(probs_seq, diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 3180724f..dfe2824a 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -128,9 +128,15 @@ void Scorer::load_lm(const std::string& lm_path, const std::string& trie_path) max_order_ = language_model_->Order(); } -void Scorer::save_dictionary(const std::string& path) +void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) { - std::fstream fout(path, std::ios::in|std::ios::out|std::ios::binary|std::ios::ate); + std::ios::openmode om; + if (append_instead_of_overwrite) { + om = std::ios::in|std::ios::out|std::ios::binary|std::ios::ate; + } else { + om = std::ios::out|std::ios::binary; + } + std::fstream fout(path, om); fout.write(reinterpret_cast(&MAGIC), sizeof(MAGIC)); fout.write(reinterpret_cast(&FILE_VERSION), sizeof(FILE_VERSION)); fout.write(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index f6c7d7bb..17bd1028 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -95,7 +95,7 @@ public: void set_alphabet(const Alphabet& alphabet); // save dictionary in file - void save_dictionary(const std::string &path); + void save_dictionary(const std::string &path, bool append_instead_of_overwrite=false); // return weather this step represents a boundary where beam scoring should happen bool is_scoring_boundary(PathTrie* prefix, size_t new_label); From b33d90b7bd00448aeb03dc6baaf9b1c2f54f8c33 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jan 2020 16:16:06 +0100 Subject: [PATCH 05/16] Load combined format from Scorer --- native_client/ctcdecode/scorer.cpp | 68 ++++++++++++--------- native_client/ctcdecode/scorer.h | 2 + native_client/kenlm/lm/model.cc | 4 ++ native_client/kenlm/lm/model.hh | 2 + native_client/kenlm/lm/virtual_interface.hh | 2 + 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index dfe2824a..5bd4da8e 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -88,46 +88,56 @@ void Scorer::load_lm(const std::string& lm_path, const std::string& trie_path) const char* filename = lm_path.c_str(); VALID_CHECK_EQ(access(filename, R_OK), 0, "Invalid language model path"); - bool has_trie = trie_path.size() && access(trie_path.c_str(), R_OK) == 0; - // VALID_CHECK(has_trie, "Invalid trie path"); - lm::ngram::Config config; config.load_method = util::LoadMethod::LAZY; language_model_.reset(lm::ngram::LoadVirtual(filename, config)); + uint64_t package_size; + { + util::scoped_fd fd(util::OpenReadOrThrow(filename)); + package_size = util::SizeFile(fd.get()); + } + uint64_t trie_offset = language_model_->GetEndOfSearchOffset(); + bool has_trie = package_size > trie_offset; + if (has_trie) { // Read metadata and trie from file - std::ifstream fin(trie_path, std::ios::binary); - - int magic; - fin.read(reinterpret_cast(&magic), sizeof(magic)); - if (magic != MAGIC) { - std::cerr << "Error: Can't parse trie file, invalid header. Try updating " - "your trie file." << std::endl; - throw 1; - } - - int version; - fin.read(reinterpret_cast(&version), sizeof(version)); - if (version != FILE_VERSION) { - std::cerr << "Error: Trie file version mismatch (" << version - << " instead of expected " << FILE_VERSION - << "). Update your trie file." - << std::endl; - throw 1; - } - - fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); - - fst::FstReadOptions opt; - opt.mode = fst::FstReadOptions::MAP; - opt.source = trie_path; - dictionary.reset(FstType::Read(fin, opt)); + std::ifstream fin(lm_path, std::ios::binary); + fin.seekg(trie_offset); + load_trie(fin, lm_path); } max_order_ = language_model_->Order(); } +void Scorer::load_trie(std::ifstream& fin, const std::string& file_path) +{ + int magic; + fin.read(reinterpret_cast(&magic), sizeof(magic)); + if (magic != MAGIC) { + std::cerr << "Error: Can't parse trie file, invalid header. Try updating " + "your trie file." << std::endl; + throw 1; + } + + int version; + fin.read(reinterpret_cast(&version), sizeof(version)); + if (version != FILE_VERSION) { + std::cerr << "Error: Trie file version mismatch (" << version + << " instead of expected " << FILE_VERSION + << "). Update your trie file." + << std::endl; + throw 1; + } + + fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + + fst::FstReadOptions opt; + opt.mode = fst::FstReadOptions::MAP; + opt.source = file_path; + dictionary.reset(FstType::Read(fin, opt)); +} + void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) { std::ios::openmode om; diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index 17bd1028..e4b86c9a 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -118,6 +118,8 @@ protected: // necessary setup after setting alphabet void setup_char_map(); + void load_trie(std::ifstream& fin, const std::string& file_path); + private: std::unique_ptr language_model_; bool is_utf8_mode_ = true; diff --git a/native_client/kenlm/lm/model.cc b/native_client/kenlm/lm/model.cc index a5a16bf8..fc4e374c 100644 --- a/native_client/kenlm/lm/model.cc +++ b/native_client/kenlm/lm/model.cc @@ -226,6 +226,10 @@ template FullScoreReturn GenericModel uint64_t GenericModel::GetEndOfSearchOffset() const { + return backing_.VocabStringReadingOffset(); +} + namespace { // Do a paraonoid copy of history, assuming new_word has already been copied // (hence the -1). out_state.length could be zero so I avoided using diff --git a/native_client/kenlm/lm/model.hh b/native_client/kenlm/lm/model.hh index b2bbe399..9b7206e8 100644 --- a/native_client/kenlm/lm/model.hh +++ b/native_client/kenlm/lm/model.hh @@ -102,6 +102,8 @@ template class GenericModel : public base::Mod return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0; } + uint64_t GetEndOfSearchOffset() const; + private: FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const; diff --git a/native_client/kenlm/lm/virtual_interface.hh b/native_client/kenlm/lm/virtual_interface.hh index ea491fbf..91abe90e 100644 --- a/native_client/kenlm/lm/virtual_interface.hh +++ b/native_client/kenlm/lm/virtual_interface.hh @@ -137,6 +137,8 @@ class Model { const Vocabulary &BaseVocabulary() const { return *base_vocab_; } + virtual uint64_t GetEndOfSearchOffset() const = 0; + private: template friend class ModelFacade; explicit Model(size_t state_size) : state_size_(state_size) {} From 16d5632d6f85aaec7b31d6e6b7b978bd55690576 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jan 2020 16:27:54 +0100 Subject: [PATCH 06/16] Write default values for alpha and beta into trie header --- data/lm/generate_package.py | 7 +++++-- native_client/ctcdecode/scorer.cpp | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index ee3c106b..4d064fdd 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -14,7 +14,7 @@ from util.text import Alphabet, UTF8Alphabet from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet -def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8): +def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, default_alpha, default_beta): words = set() vocab_looks_char_based = True with open(vocab_path) as fin: @@ -49,6 +49,7 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8): scorer = Scorer() scorer.set_alphabet(alphabet) scorer.set_utf8_mode(use_utf8) + scorer.reset_params(default_alpha, default_beta) scorer.load_lm(lm_path, "") scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) @@ -99,6 +100,8 @@ def main(): help="Path of vocabulary file. Must contain words separated by whitespace.", ) parser.add_argument("--package", required=True, help="Path to save scorer package.") + parser.add_argument("--default_alpha", type=float, required=True, help="Default value of alpha hyperparameter.") + parser.add_argument("--default_beta", type=float, required=True, help="Default value of beta hyperparameter.") parser.add_argument( "--force_utf8", default="", @@ -113,7 +116,7 @@ def main(): else: force_utf8 = Tristate(None) - create_bundle(args.alphabet, args.lm, args.vocab, args.package, force_utf8) + create_bundle(args.alphabet, args.lm, args.vocab, args.package, force_utf8, args.default_alpha, args.default_beta) if __name__ == "__main__": diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 5bd4da8e..c2bdc4c2 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -27,7 +27,7 @@ using namespace lm::ngram; static const int32_t MAGIC = 'TRIE'; -static const int32_t FILE_VERSION = 5; +static const int32_t FILE_VERSION = 6; int Scorer::init(double alpha, @@ -125,13 +125,24 @@ void Scorer::load_trie(std::ifstream& fin, const std::string& file_path) if (version != FILE_VERSION) { std::cerr << "Error: Trie file version mismatch (" << version << " instead of expected " << FILE_VERSION - << "). Update your trie file." - << std::endl; + << "). "; + if (version < FILE_VERSION) { + std::cerr << "Update your trie file."; + } else { + std::cerr << "Downgrade your trie file or update your version of DeepSpeech."; + } + std::cerr << std::endl; throw 1; } fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + // Read hyperparameters from header + double alpha, beta; + fin.read(reinterpret_cast(&alpha), sizeof(alpha)); + fin.read(reinterpret_cast(&beta), sizeof(beta)); + reset_params(alpha, beta); + fst::FstReadOptions opt; opt.mode = fst::FstReadOptions::MAP; opt.source = file_path; @@ -150,6 +161,8 @@ void Scorer::save_dictionary(const std::string& path, bool append_instead_of_ove fout.write(reinterpret_cast(&MAGIC), sizeof(MAGIC)); fout.write(reinterpret_cast(&FILE_VERSION), sizeof(FILE_VERSION)); fout.write(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + fout.write(reinterpret_cast(&alpha), sizeof(alpha)); + fout.write(reinterpret_cast(&beta), sizeof(beta)); fst::FstWriteOptions opt; opt.align = true; opt.source = path; From ab08f5ee5a29f3fa6ca2e121d668d5dc6472f832 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jan 2020 08:28:24 +0100 Subject: [PATCH 07/16] Change decoder API --- native_client/BUILD | 15 ----------- native_client/ctcdecode/scorer.cpp | 20 ++++---------- native_client/ctcdecode/scorer.h | 12 +++------ native_client/deepspeech.cc | 33 ++++++++++++++++------- native_client/deepspeech.h | 42 +++++++++++++++++++++--------- native_client/generate_trie.cpp | 32 ----------------------- native_client/trie_load.cc | 7 +++-- 7 files changed, 64 insertions(+), 97 deletions(-) delete mode 100644 native_client/generate_trie.cpp diff --git a/native_client/BUILD b/native_client/BUILD index 6d1c9d1d..250bc450 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -181,21 +181,6 @@ genrule( cmd = "dsymutil $(location :libdeepspeech.so) -o $@" ) -cc_binary( - name = "generate_trie", - srcs = [ - "alphabet.h", - "generate_trie.cpp", - ], - copts = ["-std=c++11"], - linkopts = [ - "-lm", - "-ldl", - "-pthread", - ], - deps = [":decoder"], -) - cc_binary( name = "trie_load", srcs = [ diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index c2bdc4c2..d53fe917 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -24,39 +24,29 @@ #include "decoder_utils.h" -using namespace lm::ngram; - static const int32_t MAGIC = 'TRIE'; static const int32_t FILE_VERSION = 6; int -Scorer::init(double alpha, - double beta, - const std::string& lm_path, - const std::string& trie_path, +Scorer::init(const std::string& lm_path, const Alphabet& alphabet) { - reset_params(alpha, beta); alphabet_ = alphabet; setup_char_map(); - load_lm(lm_path, trie_path); + load_lm(lm_path); return 0; } int -Scorer::init(double alpha, - double beta, - const std::string& lm_path, - const std::string& trie_path, +Scorer::init(const std::string& lm_path, const std::string& alphabet_config_path) { - reset_params(alpha, beta); int err = alphabet_.init(alphabet_config_path.c_str()); if (err != 0) { return err; } setup_char_map(); - load_lm(lm_path, trie_path); + load_lm(lm_path); return 0; } @@ -82,7 +72,7 @@ void Scorer::setup_char_map() } } -void Scorer::load_lm(const std::string& lm_path, const std::string& trie_path) +void Scorer::load_lm(const std::string& lm_path) { // load language model const char* filename = lm_path.c_str(); diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index e4b86c9a..db58d581 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -50,16 +50,10 @@ public: Scorer(const Scorer&) = delete; Scorer& operator=(const Scorer&) = delete; - int init(double alpha, - double beta, - const std::string &lm_path, - const std::string &trie_path, + int init(const std::string &lm_path, const Alphabet &alphabet); - int init(double alpha, - double beta, - const std::string &lm_path, - const std::string &trie_path, + int init(const std::string &lm_path, const std::string &alphabet_config_path); double get_log_cond_prob(const std::vector &words, @@ -104,7 +98,7 @@ public: void fill_dictionary(const std::vector &vocabulary); // load language model from given path - void load_lm(const std::string &lm_path, const std::string &trie_path); + void load_lm(const std::string &lm_path); // language model weight double alpha = 0.; diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 716f2267..e8b3dc02 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -304,23 +304,38 @@ DS_FreeModel(ModelState* ctx) } int -DS_EnableDecoderWithLM(ModelState* aCtx, - const char* aLMPath, - const char* aTriePath, - float aLMAlpha, - float aLMBeta) +DS_EnableExternalScorer(ModelState* aCtx, + const char* aScorerPath) { aCtx->scorer_.reset(new Scorer()); - int err = aCtx->scorer_->init(aLMAlpha, aLMBeta, - aLMPath ? aLMPath : "", - aTriePath ? aTriePath : "", - aCtx->alphabet_); + int err = aCtx->scorer_->init(aScorerPath, aCtx->alphabet_); if (err != 0) { return DS_ERR_INVALID_LM; } return DS_ERR_OK; } +int +DS_DisableExternalScorer(ModelState* aCtx) +{ + if (aCtx->scorer_) { + aCtx->scorer_.reset(nullptr); + return DS_ERR_OK; + } + return DS_ERR_SCORER_NOT_ENABLED; +} + +int DS_SetScorerAlphaBeta(ModelState* aCtx, + float aAlpha, + float aBeta) +{ + if (aCtx->scorer_) { + aCtx->scorer_->reset_params(aAlpha, aBeta); + return DS_ERR_OK; + } + return DS_ERR_SCORER_NOT_ENABLED; +} + int DS_CreateStream(ModelState* aCtx, StreamingState** retval) diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index dae75289..94f6664e 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -61,6 +61,7 @@ enum DeepSpeech_Error_Codes DS_ERR_INVALID_SHAPE = 0x2001, DS_ERR_INVALID_LM = 0x2002, DS_ERR_MODEL_INCOMPATIBLE = 0x2003, + DS_ERR_SCORER_NOT_ENABLED = 0x2004, // Runtime failures DS_ERR_FAIL_INIT_MMAP = 0x3000, @@ -106,25 +107,40 @@ DEEPSPEECH_EXPORT void DS_FreeModel(ModelState* ctx); /** - * @brief Enable decoding using beam scoring with a KenLM language model. + * @brief Enable decoding using an external scorer. * * @param aCtx The ModelState pointer for the model being changed. - * @param aLMPath The path to the language model binary file. - * @param aTriePath The path to the trie file build from the same vocabu- - * lary as the language model binary. - * @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model - weight. - * @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion - weight. + * @param aScorerPath The path to the external scorer file. * * @return Zero on success, non-zero on failure (invalid arguments). */ DEEPSPEECH_EXPORT -int DS_EnableDecoderWithLM(ModelState* aCtx, - const char* aLMPath, - const char* aTriePath, - float aLMAlpha, - float aLMBeta); +int DS_EnableExternalScorer(ModelState* aCtx, + const char* aScorerPath); + +/** + * @brief Disable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_DisableExternalScorer(ModelState* aCtx); + +/** + * @brief Set hyperparameters alpha and beta of a KenLM external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. + * @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetScorerAlphaBeta(ModelState* aCtx, + float aAlpha, + float aBeta); /** * @brief Use the DeepSpeech model to perform Speech-To-Text. diff --git a/native_client/generate_trie.cpp b/native_client/generate_trie.cpp deleted file mode 100644 index f593f65d..00000000 --- a/native_client/generate_trie.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#include - -#include "ctcdecode/scorer.h" -#include "alphabet.h" - -using namespace std; - -int generate_trie(const char* alphabet_path, const char* kenlm_path, const char* trie_path) { - Alphabet alphabet; - int err = alphabet.init(alphabet_path); - if (err != 0) { - return err; - } - Scorer scorer; - err = scorer.init(0.0, 0.0, kenlm_path, "", alphabet); - if (err != 0) { - return err; - } - scorer.save_dictionary(trie_path); - return 0; -} - -int main(int argc, char** argv) { - if (argc != 4) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; - return -1; - } - - return generate_trie(argv[1], argv[2], argv[3]); -} diff --git a/native_client/trie_load.cc b/native_client/trie_load.cc index cd625c23..df270f09 100644 --- a/native_client/trie_load.cc +++ b/native_client/trie_load.cc @@ -27,9 +27,9 @@ int main(int argc, char** argv) return err; } Scorer scorer; - + err = scorer.init(kenlm_path, alphabet); #ifndef DEBUG - return scorer.init(0.0, 0.0, kenlm_path, trie_path, alphabet); + return err; #else // Print some info about the FST using FstType = fst::ConstFst; @@ -60,7 +60,6 @@ int main(int argc, char** argv) // for (int i = 1; i < 10; ++i) { // print_states_from(i); // } -#endif // DEBUG - return 0; +#endif // DEBUG } From b34723588daa8f07c6393dfd5fdc73d6197c5d7e Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jan 2020 15:55:21 +0100 Subject: [PATCH 08/16] Switch to new scorer format --- .gitattributes | 1 + data/lm/kenlm.scorer | 3 +++ data/lm/lm.binary | 3 --- data/lm/trie | 3 --- 4 files changed, 4 insertions(+), 6 deletions(-) create mode 100644 data/lm/kenlm.scorer delete mode 100644 data/lm/lm.binary delete mode 100644 data/lm/trie diff --git a/.gitattributes b/.gitattributes index 0bdd5738..b2aaede4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.binary filter=lfs diff=lfs merge=lfs -crlf data/lm/trie filter=lfs diff=lfs merge=lfs -crlf data/lm/vocab.txt filter=lfs diff=lfs merge=lfs -text +data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text diff --git a/data/lm/kenlm.scorer b/data/lm/kenlm.scorer new file mode 100644 index 00000000..02d5de6c --- /dev/null +++ b/data/lm/kenlm.scorer @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba04978fca285c34c99bf115ee61549937e422ac91def80122a767e114c035e +size 953436352 diff --git a/data/lm/lm.binary b/data/lm/lm.binary deleted file mode 100644 index 22584cb1..00000000 --- a/data/lm/lm.binary +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc8d9e5f49e2fa05c56cc928520c6c79cb78ff95226ec9a07785b3a28d1a680b -size 941235601 diff --git a/data/lm/trie b/data/lm/trie deleted file mode 100644 index 8edb4157..00000000 --- a/data/lm/trie +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0281e5e784ffccb4aeae5e7d64099058a0c22e42dbb7aa2d3ef2fbbff53db3ab -size 12200736 From a156d2850429d238efb16347f3e8cac5f23fb7cc Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jan 2020 17:46:50 +0100 Subject: [PATCH 09/16] Switch smoke test scorer to new format --- .../{vocab.pruned.lm => pruned_lm.scorer} | Bin 293790 -> 439040 bytes data/smoke_test/vocab.pruned.txt | 3540 +++++++++++++++++ data/smoke_test/vocab.trie | Bin 172912 -> 0 bytes 3 files changed, 3540 insertions(+) rename data/smoke_test/{vocab.pruned.lm => pruned_lm.scorer} (60%) create mode 100644 data/smoke_test/vocab.pruned.txt delete mode 100644 data/smoke_test/vocab.trie diff --git a/data/smoke_test/vocab.pruned.lm b/data/smoke_test/pruned_lm.scorer similarity index 60% rename from data/smoke_test/vocab.pruned.lm rename to data/smoke_test/pruned_lm.scorer index e51c090c4261f10e762805b31c15057dca1c41fb..ba14b09ed4d7490adb8315be6b92af3199dc9dd3 100644 GIT binary patch delta 174316 zcmaf+1-u(o_q78>ij+cu;%)_sySr;~cXx*(oxz>r?(XjH?(XgmEpFelCz)hr-}QgL z{F3&r=j?N~oO5O-a&yoBZJhG+iBH^WgvP;JAHMYnhi^UN;am5*ZR?RH>02zb!WwIh z(rDklT#O$tJnO7~7JL1ln}>{y*mBoh_S~yCKkT{J=DTdQ`5p|?XpF>TgvMzbTzwfo zv~CCVY5uU@L^WytUl#FyH!O?L<2M>rThp#l7PInGxa!=>VqU~(9l%dz5kY3QJZzj~ zw?2B*#5U_HvN!i-Y+$Cvr3_2XkWh z&yG)r9i4~Nzbsw`{+3`rQJWZpHfd5>=-heR} zv~w$qabXfQO{`#8l(2Ivi)mmvm>FPs>Sl*k)6}VdS|@f%TV5b2x_C@zJL9P*|?a z$*>&Y8L-xosg+_`T-qM5UH`JU+H}65ZCID5EN+A4?erKdubCHNxias^@qS5 z^=B~dax06^4f_I?*TPq@Jl^-F?+>sdm+)s}ILxr&u)YDQERqr!5vdczn`*MC`z*Mwb}ObN^TcmOPK*|}hO%Pt5bM!A*6!mv(qmH)C> z5}3E$%CKDi)nPf9^g4Ows9PPbJSZI!>tY52F6`(Y-2#qct^u_hDCBx z|FYN}$X#w_u@@lE=U`aQZhu&=)(~SC2}^+CpUY)qcnFNU+{)r`K%S!?%zvUnEQS3-%(yaVGdx3YL2 zkk`y-u)Hw8gLPJV3{e)tV0x*C|FRf?TIB(I!_+YB`or>olfv>=oB@_AGXR#$Kg&q0 zKLV0lS%`m<*tIhajgd2wwE%cb5GmL2b7 z?1sR))>d5q9Sh8voCM3koNCw^uw3dh4Li%QvtdPEa907d<7;8uY zm&I%ftHr>P(6BiSo71qlU^&2fja@OHf%6--fME;5@>*Ef*ezn%qD`jt=XE_Z;1Z_u zl7=m1`Yr>@Ram~OZ{@!%R_KCJ|FT%o47jqfTg9+dVcGE@W4A_O;F>V*ax07VjNST% zZD81jusls0x9p_;Ww8k`N4>e}yrr?*238+bnvs{q4zPT5+_AfF_%DlHjNz_^?FP$E z_JZZ?_A!0;HEh3l{+%NnY@8fqoVckgjol%}@u4;A)W0ka1LpD{SsS)jLs=YU*wL_D zg=3A~amfz%V2peTxIO8 zfn^`pw)?j0&lQd7e6#6%3oKXWc4K#kVRz=fas79XF}&BX`wY7umMin1VGqM{(2v22 z-1!M$F4vQ$^E0sA_c>$tqOp4!*78x;zbsxconP-5w$^`HykXdzupG?WhP~T^asBsx z56DZhuw1TBV7W4%!tx^g5|($w_plt`k1<}Ie|^aODPViA#}8%kv*|qC*!=>_!Te$R z{spT8Z29LoAS6fF3x=)Sc=;8UD>DWx2R)`4Z!BY1^fhp7)489q8yA+Ja7+TrRhR;n z6PXs)tMmLXlH~dim@^q**o=nFWZ2Ax4TQA=Y}TJ^Jzy^XoTl?!uw4FmP2c%p+~rml zivn_T#bO374$DrKg5?q}56jPv2bsR{26OJaK8#>={g=fCP1qInhOl5N{a6dEP{-*N~V^^B- z4u)};TUi_lh~%XHybuBGlqT%U;&{V`!g3W(Yz^4)QWmGe@;W-ByKnd}i!-}n#m+M9 zY*=1M7Z|$>VcGE|#_rNl@Siie48vT7D-65J40yF+*BEwfJzl5&WpO<)M}3p&d^0S+ zDspcPPq+w5+@tzsQT>m|544;GLt@siw z2lFZ{Pu=T=y$Nehd%ON+@m3qwH7tv_4SUD1cMW?FmKW>Cu&$uv`tLJfj`|BT;FpGd zl?PBO$(zbvLV1I}bR&tll@rtjQ_%@4}~77G}|MU3I1u)Iqahvn>+ zGIq-twj3-zeSnw_%9{SmVhx3r|FT%q47j$jTgTX~ z2g?VajbS0HFht-D!-lj^O+4`UP!MR_Ilm99S*OG81|-NZzYC* zu5y66tnVemuzMet1N_j~ePqV_)b#y)RMuY}@JkGHS-&$*zBlX#bgVS7t_7kI(k8)3CK*xzy_zwqDh@UVl}}WwE{)a0AnMBV)I* zVVfAXsj{a3ve-;vE11n;Ig>4o-ByNeZP+$uyka|JxV>RJn9e&HyPaWq-gh%>&t_k( zKOZ(T1MY1)?+443+27b5WX2oP)wl9r79}vR%)`12V@}HA2*Zvt>=;}vj zrt@X6JnvT+c9mh*7ha}iY)d=Qq;8&AMA82p#TGr)Yw^`c>~!E!Kfqyb~RcVM}K?;7@AZ*%?k0fxB> zAH%ZaPYwGVrrAX>U&C@R-@uwa+VwAs@6CWen$ADLvXfs7`@@X)7p$w&as3xhWpjih z7&f9|y$l=4u#sW8s=Z-7UjG{d*cBlGjt$G@?+43O=nut7a=c3^7_ zm&Ig;O>Wo}upIQXupIRCJs8)21I&Ok8a9(*GaEJ#mL1P(>}CsA=if}3_W_OJ9EQzl z*j$Fq4a>pID|RjaTu=(EgPGs31q@pdmYpnY>=uFL^-(Nt;8L(WpUay8SAb>5D;c|$ z4O<1)&jo~1})_`TlYZ<$>4O>V1HtWylyMVbg8=B4=Y3FL5_}G@Q+tiG=h3Q*t zW#Beuz-`s8?bIDM{PHc@Q^)HK4TClnQn^id7 zbUp*dU2bJ@p|QKju#3A{_~(VkZdkEP4ZF;+%MH5%mZ#(@!>$<(|GBK!n*nct%Uyqk1))?c=CgB@}prt8TPYb!(h$z(XKzg2hfIf{mbGv)A@H; zKG6JQ?EZ!2E2I&5-@a>M#`WJwz--tXmPzWo9*f=YVww$o1bmz#QR%uau$KR_SW93X%-V*n zW7xWet!LQ!uw0p9Lu0s+VH=yyn;N^#jNKNn?s+eZZK}Rq_2{mbHMV2=73!=7z*?#xM9Ja6n? zfaOG9?zRj6eD=E=R_s;N`3=L~HhteQ>|Mj&8y){S&ETS^(CS zTwMPx2+Wx*YzAB`cc$;+hAm;(l7=k>>v8?JG%#npoawy0VJn!vE5mYft9JU<^)HLn zfVo_2n$Bx?I=2>QS*&B&x~A{?uz4seKJrRjUHVTTx29BSZUuzVIf z29~GfcvziD9pF&IPBP=20qgPkzcWqevkf~JmV>##*j)t6^M0|~HS5m>h#Bw-HLTJr zi>qLHv0i8FZh&RSHyXR*W@C5@EN6GSvAe^tJ7IZB?lyM!G?}jdvbfh6-UrLUJZS75 zGVEc)9_bpd^3N+E#_$Qlo`mJ{KWprsGwgX-PUNKmn9KEw>HMm(d)=@%P2abS-8(Hi zsef6#YdXJYI)4DmnS5;QK859Kec8>zKc83!=GFQgEIawp*!^VK&#+w8;l}QlG4P)w z`~$sb$c+b|JLgP`5c&G z8^ChL8yUNe4co-9O@r0>H&f zGmPDthMm>LD*t71b{CBLm&G}Toom>6upIOS#_mE`UeuSw0?DV|OU;0n8@nqEyV9_$ z47(Z@K}-G1;#y;PonhA-c7tIz8g>&*suljr;+Ae$vD*y0!?3$xxq^2ac8_8Aj*0)= z`95QKzhMs;_8?5Ms}gzG*gaCSPW{W`QPcVH+K{1|H@+*>uqO?B%CM(lx(M-K7S925 z)GwIMFBt%OI{#$Y&$%;we}(1B{BHXGVc4Ir9@l?=nF0SXo&Sa9 zDvZci8|Gj}g5^u{(K>c@{dp%FnAgIX9mDqOAIsSFHEe9d#_7Sh{_EER@_p}yjc3^S zhV?gW0$8rhM8WF(>%BdRF`Ue>DPXx;QyMmvVN(lh`RDC-fpsv`!gAEp8#ci7ozbwF zV0kSRvjO!PX&gZ2f#t=u08EcuVYjehi@|cxOTcoqmaJG;{mWu0V2*HkSPo`oSPo`2 zSav)JmL0F5tm&U;Itr`$^Oig;J6YG*tq04FH!yY^8dhv%;Ks1*cr#ARz0J9YJ~{PV%XE*SOamyu29-3{BruszLqdmFZIVc=lH z;*YE3g>)b+Pm>$(VAJ=ImJg{vPeP30;f5Un%Z`sWcE=cYEKH^|{PQI<-LPUqP3IF0 zJISz?)WZ;PB5E&zJ#kFziOdZZhm?4 zVSmDMfW_a&@E^neHS9k#;0SzpIG-s;G^|&}y6VqIae?_LH7YDeJ(?MCbknzwu^UTS z(?4H01k6sxQNwCZc(P*HxQ2~q*!ZxngAd1^Vj^QWv0;)6(&;E45sC8KkG;9{bW`*U-%x>76rte$@FfX`yjp2M| zzy%Cj$goAsc#F1})Ss{4HinCv0hcs(OTls?%No1ox>@+=YnQrV#a1wEMOZHN%EoRL z!&Zgm00)hQ{|whMo!2&<*E4KG!!|K&b69nd>eQbPBGoX~Uv(>sEluZbjNP`dT$$~S z-41Cy{PUL%f%(9&yXm}#VS5_3mtlLua;5hh%Uu5rHl6o3>;S_KH0&T)c3igY+V$s; z(6nKv{D&BJs9}e}a`}%ic1LCw*MCO=bA-ocL+p-&WhW;XyP?MJBx83ntjG1=slXh} z8L+%Q&Vl7%&V%Js@A<~=!j4^Cf1dpq!%Gaitkb!@OD;F;3d64K!MOgr3Yasw#<1&p zbmp0pVK>0?6PR0#-EFWQ-~V@~F}w?w%YUD-yFUi3*KX_h@enL0_lU4o{rU4N!0hBH zGvKqZJjc%&_Pk*)7*@Op%#L4$OMG#_s)!)$6ZSt`CgihlYJ**vGJ3 z>Q9Z`XKL5<&yx^i_@!ZA8TPed-@tM(-x<5&dtfg0Pp0!Q(|NdIzZmwbVZXr$Q0vd1 z-7$uL8unMSv#fPK#BS{Vh2`W%;4dC^7I5XCuiygaa*YJbok#8J9RB%6bi+n7Y;?oM zh`+y-9rrP;ujxFtVdI#-;~F-8i%I>pc!x$SRQXSV>f%vI`uD$IcgZ|s=Dz9Mh%r<{)@H!%U|pFI;10%aXEWX|u+|k)U4MQ9(hRt_>Aa6&`x>^NVS{1aL390g zfH6GKu!9U6VwgLmA8hOn30CJ{@ApHE;bDdy4(rZ@@ANlzM;Ugsu$KR_I7VO{%(1Zi z*6c~f?qpbAgr^(3GYl)v1m;Z6Htbx}`8>nUH|zq#F05Er{dp2%3@(sDyKDgen8w|VAu$y2x=;9V*c&jnI-LN}N-@A<6J;v_dCe!-! zK9A}Afa(08v3uCqJ!0%0>tdCEK2l{mKMBk8`HZo9*0ARcd)|!qk~>(uVmiMH%X9a- zv3tX?Hw}BM#iagx53n(O2bLYbXYAfL>;uC->>e-t^U*6~_=#bk8upoCpBwgtVPCrI zzpstqH->#{*ms6~56iCy46E%r^)HLzz+CEIYs2O-xvJK0hW&2XABO#z82-!RuLOhr zZP-7C{cG5NuzWt~#ou4=Jnf2Xc(-M2uK#kzqZq?c4eJfdeMdKTV;DAOo3-m-7Jb^V zt~I}s1IwB8GpxVqJAq*nW)|0f6J-dS*sw_qo7Aw$44WL5t2HI8$MxS-rt{Q>O#{o7 znZei%Fl@#StLtADGXZmi13QMTIVp=-44c)k*$kV#2jlv0jvkPwU53qN*xZKA1Iv|J zz}PJa>+$)&g^l4NhAkSM>vdZF&+md8wxsF1l(1I)%VKGP&2jlA8^e}`<+I`n#%@K! zRx+$u*}zo{TNRca4}#??tZCTVhOG@%D!8XvW*guwrM^c~@h&n_;^fwufPR8n#!H)%!nEE{na5;XbgO z@qWf`uwnZfc0d=a{FlXnU9cIgu=3d?KZ$mpEmv8MBJh8=JEo@DG! zHtduZllt@ZJI3%d!%jEs48zVe?5u7U{`vKrZdkE%V0qrpGj``2c7b6R8g|jx_|I1* z7h{;GR?Vmg0n z>^?VkUl{gfr*B<f*7?`_{U6i0F-}j%Bf|2+>}A+Urtc_X*YaN$qYA8p>1{fXZrGTnZy&?PGOXxp z1{}w*aSa>a4A>u*pKecL>?W;PSN(Y-3Yh133e$N?!=^H9YBS!nu=d(&`sWW18^amR zfHN63vta|xc(cOt%elqe#&BLU;CzP7Z~881>=uHxGj5&O7H#&``tuCN47j8ja4Ex< zHf$NgmhEDd|FT#PnCE>3!&d6*9I2GW%CLOiSRIx#Uek=Xc3}ow$FOw`Th9!*fngiL zaxj~=?4vAHqa3YN3muGP7DoM={I2g7!P<%87D-7Ng`>o=zJZrz5J-R_3%VfyZ6 z?DjTnpKaar*xh2-t*|_Gx5Ik8{&$Dze5YY| z8Fsf}_ZW6>ht>7xy&qtX@Bzaf>U3`HiL!XuutyAgv?y;Z zhUMg*4OZvhOoiVCGM!&A>_x*~GVEo;UV+uYwEUOFYsT<(!`?9LO~c+Y>}|t}cMN>j zu=fmm->?s0b#`@CKQeY7SFEf4Jkv3TpBna=VV}dY<1dZfS8CVvUl!jO!*31y&am$d z`@yguVL8C!XJa_bu;GUNV%V>+?BsW2_eaxC>(3ie#_(@gKBoU?>>B;jp_L=%2(TPX zuP#>kFN=|Z*-39$KHQE8>pFhpq>o`^8P*q;ee^4g;kbs42g{lCH+B;kHlgV|am!BX zUlx;?&XXE8nPHO~HicnRcC+x$pWEz)6`LBCGoIGiO=sBjrtbh_H)B8iXD2h8&a;@# zvl=#=VY9>Xyw3%z{C4Wkw;t56dDyC|FrOK4e!~_pY(c{oN(}#ejY5LK7BOs5!xn?( z3NB&nmNaase&+SRrOklLn$F7^wmdACdL?7Ga+|g5&*%Tzu&#eutZLY5u$=KAW4DH3 zYi1VLe`{q3TN{?0tZUf%rtb!ZZD`m=upZZc8ymw-4BOPO%?#Vzuq|M%`FjO)K0fVtE=!SX`d)v!H!49Eu=4ciNree4sg&cC_j%3?p$d9Y#o zn*k3rb_W?YL|Dr|zX&R@4yH8hU|7!NP*|?k5vK2v#;!QV7#?fbaj@+81YRsQ+n=`I-c=LxA{ z_nQG9G!Sc3y3zn<(j$!W^_5mzkx_=ngf7!{W#_%)4J~!+OSPt+jWA}BMP#EFUpPf#qO&o4%uE7T15Ho6ciqL+tt(HkM(1 z4IA6AabP{J|M~%QcH^7Q{SBJ{ma8z4v75NV>iYBKvjf{JeNw|FGi-9hrZ8+uSa*P2 z|4nTSr|Dr>7SkCvgXufKuo+>wssqEW&cEK5 z!q{zT*j7!Z>!0ucGltv1vXkwM-S&p;TjEK*J7d^_BYb9MBk+h8=9!A%-1l*kRo){PRucz?|KY-G-Ij zQHC9D*fEA3YuIt);y)iokH;_vGt_iG(Xf*YJK2nPYHio4e_5PX!&qz8tt?JA>(56m+ptxAs&KtwH^6d$HyOK|VL9mAvRz#N-EIcF)3Cc?`S^W*9+2@KFzi9Y z9)k6_{(IOMK4RFThCOE3A@|>}Iz6ay_?*(9< zr^veYpA~xg9i zQyMmvVN#i2pfBPE4{R|sy*#12Rs4LbzZogH84Z|n{i*7DDXiUroe9BJ55h8=C#F@_y$SaFk(6EaPyV$Ve5(6(a z>@veHH|z?-t~BhbCe!uLx8NAVYYe*=naaHSD#Tb?RRhuh+0$>#}&m zus02R%doc%dnYmc^LL+txzz6`!?63nun!IU$gqzM`y{Ua^5yrZ#_%)4J~!+O!@e}^ zt2S%bpI<<2!@B;wC2!cbhJ9z)_lEtDSzQ192+WsZ!?GcE!wvhzuwM=P4VI_)4_J@a z|NbtuA{u{jqRpscwtxP-&}HKF}~^C-*lc3mb070^qthO$z;5i|FW3E7*1)}RIu!L zS{blTWO~zg2E&R02F_^MOt9>D7Q<#YedjQ2PS}X~`fqMyI1em4p5NFlVAz6&Eu^ej ze?Ee(u&N;+!8UAB!xl4aaai6tOB%akDFc@_Y#GCrHEcP?;3du}&9^`t!A{hOKAV`i5;_*oKB}Q~-0vn;64Q4ciQsKP$2| zEI)nU-t^s}#iagav7<5E$*`Rb+r_Y54ciSS35Wl(*uxm^Y1m$d?cF_KHQv6)Za>2Y zkB|R+0kXd_JixF6VUkJZc!;r!DbB}_Lu$KD{mbIe8ZuP##`AZ>4ma!w!;UoUD3}I~ ze}4bh7#?fbafTgl*a?OWHSEOk&Gp|&#_(jrPJ!i>eul9-)3CGJtX+S;gs2TWv8>eH84BA&UC)sup11! z5tf6#8P@Vq*PqXRo6ffzc1Ndkdzaj8?Cvq_-X4tWzx#T?vbf){2Ml}Au!jtL*sw=n zBh~rW`~3-H_@rS^8TPbc&%ko|pA);5e?I$d3|}P)u`8upc8UmNxf zEYJIQ#;*9@z#mNKpN!qlh7B`pxM9CEnbx1bW^N3BGwgT6{($9-|1x%ecd^Ppe>|iM zM*Yj;U&H=0tkJ)x;}K!`x}p~>pRh(PjA3uXMl)=5!^SXdOvC!XV%JIi%VKO`E@3~@ zd0b;RzOn0X*aY1y{FlXq-LPU488$I22RNy*o6NAu4V$7r{_{LdiD5n~CGo6E4dVYxE% z_BYpm^BKbh%zz6Twvb^9oADNH+qLV@Uq5WaPWcx%Yzf1bgyrm(Hg?No7T15vW(Zr( z47h@^ThXwU3|rZ-RbV}?|5gR&)jtT9vs=@!wPATTtz+1_9ah)BEY|D5_Ud2Xuni2` z(6EgR+qehg`frmSuq-x(<%~Bsc3T*>rD0ncwso*N|9Zb~V+^-7Y&*lYhvlGmGdse>>|T1c8lv0V|Xbn zAC|8$c2^p9m0?%6nAD#yA_M06yv}sK-mn`?-W2KVL)!%$Yn{8^)TfZhR`}A7VG3-@XUR$rb>%TWl=Qj;|%dod$xq|N*yZ76yU4P!)Zo^Kg zKQ!zk!#+0b6Ijmpv&`c9?~4p!Uzq{FHtZY2zBS{0Z|r`6^|=1~(RBXVu;GUN3d@J! z-;CWK9lN^zW$|YRwpaRJhW!o8bNrvNYfR8Hm=Ss~uKz{^<^f0UVOSQU7&fY5y$u`9 zu+f9n`PX}U3}ZMZEFVC|Hg@B{@`4-J*o`Nw<)1g61=hjzH*5mKCWK|j6B{03-@ z;N*r)VFsMa*iCKNG=@!Ev99{_JKn}{dRWeGfU%nqmegSkrvI{-)pVXi4XZif z57QVn7c2)jkFlHAuwp*bc>!a%pkWIcwyGfah4a3$nY%RmqhGoa=8oTvcOzO{H zx-o_u7`7oSS78%acD$KkTXc^X{`u=S-LPU?8Md_{wV{{U;c^p@yB9SzP~} z1kC66)3PCUryF*LVP_h4mSJbZdR+gVV+_wV>^xYW;|pN9G8db^mvmTN|FXEW1KX?r zGQ%#1Wye>-vg2!v-E}<}*MBzv^D*E?!*1%)nJ2Es?p9-WyJ2@m-#Y(#zu#pByxXvQ z47=B``wY8ZSj#`ZG!D#VeMk)J03U|sQa@(w9yja>!-^*je9Ew=4SUA0XAOJKu;(k* zRezod8^ae3d&#hu4SU6~SCuvW^NZxbd<=LCmZ#($?Oe?XAKy0YJ;UBNtoQ(!=ksG@ z_=#bk!gA1G8oRGx*~hm{JFS0Nd}lg;Z`cop{b<-vhW*^dD*yc6cNdJh@^Nd!eu3pF z`3;t{`_tI{ZN~e@UH|=S4F5B%F=5XG*9eA}tguw2%$j9p*D#)jo->Sx9qe?t7{V}E}$-~?vCiHzOE#%@w$H(AX(^)HLb zYZ&XVy794W!=^NBDp<~V8e=zYV)*A-Z-T+5H*5yO2Eel8nT*}ch7Fw1T>s62VV?Kd z4V%jhI5#W@Gq16mug%)^=U1)Uuv7j83|kPEoh)qFqNeX+nZ@GbH+F+yc{%wxz8=3((=?vK3 zIh&gCHt%7__1_jfAWujQ+sX{Mjj`L-uwhJs*a1XI- z`R9oyFwfIIVps>bFD%ddU}LwxVFwsi9BAM{h7B>SFswA}V8afnSXce|2(~di%&@~@ zxe7-byQ2&{T3ORSAHh~w)tW!SWY}?r9dFnPh7E<~po^0XJjHZA)v(hHJKeA|3_G*Q zwEksr7BEM6j_G`^u{+<`U101k>awf+m&L`vy!tQeGK@LludyZ(E`7(Qy)V}?C$*b}fE;8V3-r~YN}bPZ#zRkyNu#;|7%dk&VJykP8J zglT2)Uly+z!&eP^&9K*D+3}mk?k&UKcGrLJ7{hlBd(W`<4f_D58Ark&!t7b1 zwqd9IpPK={Fzidy_iJPKO=fZZ_icu-?_fFVAC28lhW%{VFvEt!dc6Mki!uDwu-^>( z-LO9l`?JI9`t!G+fO#SP(=lxCh<^?H&#=ZsJ)hc*0PFU_^!08oqxUG`vAKmEQ`K|jbjGvXV|!gjVG+-pZ9+S*1_~QYy!h3G;AWnCN`{? z#K1`nn+%pqJ*Bam%CM;on+De7_kX9WV7>q2fBrPD={&&L&1Bd>({~nS&HD5Hufobd zAImmucEjc{Y)-@Gg5|YU%wynurt|!UEnwJ!hAm{+!cC^@pWk>khKs^-)QcOtC5+ut z#%}2@R{1ZBWx8N<-j_84E^pY1rteCIt!(*xH7zW7xVaruA>E z2h6u9eO>-|r7VK20}dWfF6$?l!Q&T_>t9L^9)EDU{-e{uM{Pa$FL=@D%@6U2TQrW= zdV~%>dg~E7+Qw)-LPy(}tw-o+>(hFKj<&H{kI>Q9xAh1;7XBOj#r5hDAR77Gg4H8* zwDI>Nsz>N(<6AVVN9btd%Pgu#=xF0tgsMmAXyda<)gyGY@k(y>2pyk4jF)k%M}TOY zxb+AfZIiSfp<{+(8tNA<_a`NtlIW{b5`A?_CPnKM6q5mT3MNPE6ik8EDVP$iC7%kd zC7&9tC7%YZC7%{86)qan0kovkqqU?nptYm}&|1lDq4)&S>2 zYk>2kHNXYXb%5c&u^^>Xp&D zs#ihlabi`pF5=bDx`<=StBSPzH&(|$SMeZp?DhJHyaqpOh1Nvt6s?8UDOww?39Wl)b!trgxG9p69JK6&iI&$>ucBRq%>8sHGL&S6yVe+5v| zt_dBC)`Skp=l?@7P=|-1)#2f2E%^~>b#NqF9UO($gpNk*_{T&BI;Y2?)xmM-UabfJ zHIC<@N4gWxx@d=@b$K_VW|M{ooy|ilat>N6axPj+ejZvgJRhx7Z~ILQ z(0ZhN8?EELgVre+TJ@*?;UsSF^3amMht@fNAFbQ!1GMgv57GTv5B_U>#6v6aF!3i2hoEUug$b z5B_WX7UzF0@$W#*=nu4R%l%S@f6}f2{)N^G{*Bh-#6Re+iWCf}8T^ZZX7C?cGu{__ z4KPO2MLGhy%K`SfXh)<`*I3aDphv2a(CTnxv`*0|XboUgv{s-uI!@*Bh}&prJu;4t zmNiv0#sKJmW1_X>ebBll#_ArR(HE^5jg8g{j)T^O`k^~h&is!H&;Z6mtHbfpnqe#| zo#XzrbFUteaXgPuhZE4K$B7Bim^2q5{u>hkG~TnXYI-C^Ez008N?Et&U z(VE~C=z6^H-P)Prl=N91h6bXk7UcCb-?A(x<*z&Yl&Ax zYrrd^wIVB{b&;-uj=w)wWfcLe3eXa*hSq>qN9(bC5L%CPYoK+_#G=#$)P;ur69NTo0`ou8-C!+5oL?Ha%)wCjOvQ?v%O8CnC{JYIj%h_=8$Gu{%d0c?dGP0@&3C;vHX89zv_>2U67JQ5|EdcNtb8y7kaP{4M7vh-WORL19R3@p z0CW|fitd!C(KrpQ8J>>Tik*SheSaoeD|i-KD|j}#Rl&-C;~ao2ipIHUJ+z*O){M_b z>#_d=v}SlAy3Q~Xya=rcUW{%g*!17H1fUsSiq;G-Lu*Etqcx)|(3-%NXzu^#lwCz* zHAOA|jjI8g!8K@I<#8O)DY+J{0bhsKfUigEuDAiMYwSjJ?7H~;VRgF+FbO}!BW^dN zwdA*;HNacZ8sKecb$ENSk6ofu5SpvvqPjE=heLU2iK8;Q&ElF+1G)pPm(_PBhj*cM z7sdFxEAFPfcbxwts<_<)&;ag5>m1*Q)`0Fu>m^z|_tOl+p;qVt>~#trM2{L-#!8Lb zLjWBho@}aqn05`|5wuRpqi78{0?^~aW3=lOJ&rDP-#!7*5$hmsfZs;z`0t=~&Ac0@M|JofS~Gkftr>lQ)&W06tHY1bI^M@< z-6fx(^*B;|3eY+F4BeLu;}N&d(OR-E&|2~@(Hh`aXr02Z(RyV31})pTXnYIMjJ`u_ zgx{kzz#q^Wz>jEk@Dp0EC*nY(YhZoaJN}sep83qW9Rp}U!)ezF{espC{fgF# z{D#&5V_UXQZn6I-#6U{Y_#NGeu+jJft#kS(S{LPCXf65QXbt!uvG-KXbo^=v<5T^x;3Sh|3)0UHNsIb(13cQwWI?HS{;s-9E{#^ z5c%~uw~Rr1a}729H^xj3`k=J}W1;gL>d7V6h@L(BVy_8}jn;(5L018^{5SdmG{bSx zTC(xbI^g(d9k4%I1DpWOy?R6iCPZk(CPK#)wO{|A7{Dxa9ocl-OhUWPd7PYdzjJ|S}Sw`<d z^GE$B<%f90Z2=6_VXOfy>4LQDxnQB>U}3aYU=cL;_L^A~q2nzU@4sjUi({Y;mO$&2 zEQ!`dwG>*XXlb-oU>USlU|F;VI5=Ma*HyV32D<8(M{C4U5e;w!+BLux(Hg)?XboUx zv@YUe6@Zp}RkUWbS{h(=v~IgWXboTuv{rOYH23xttc8#SipJUib+`^XuTmXgU4$;u z_0XEY`e+Ss1GJvrH$-EDd@HnOur*q@-8N{wri(S90S#wNT?5-v+4ObxLEr zW<{I+8*zTu0d_;T=ZJuY^2q0VU39xAhr_T}hkMYj71|SBO>xVAV=sX2f_M!_r)2Lm zz&>bQRQuKg(l2iNp|#|L(Yo*VM@Irh{1dkW0J_KyMC&3u2(1}Kz#8BXv?dTG)o$#y)nTj|b$B%WwW7zM`^Nl78RK>=KuZ`7^uTf)?OMX)(Higx z=&nVJ=H9N*i3r`cC!wqPuh+oI0L|bOw9fIVXkApNp>>W=M{9s*pf$j#kWTTLasJnc z&!U6Q`PpcV_#CtbcrIEO<#}ig_5S`)ezEeREk+W;ET?Pv|)4zvbvCt4leg;s}mqt)R( zXmQxM7u{JC%zvy(Ezx}#Xh8R)bn?d1tpPrQZU-3q|51QO_!wHZ z$>V6IYk*Iq>wv<4;~79ZBJ{Ip4d^*^USw+jJndSc7tmUv z7twlPdkNhNpz_~%8K4edLF+1yDFU^RWUnH$0E?QsrdkyEB_9>78TUqO0HdL`0;8k54s7xKpQ^OyXbcQA z;xW-WXMNC`@mOdLurFFqK4YUbz;VzTV9^ht5sr&i2jii2O^lD$0sEshqY2QO(1d7B zXd<*!xM)la&Bc8sOw;JzGwJR)D{KgxG&m1N0g$-awED8`IFP z0Zxn75>JQLH8DL}D>MUIOFjVIPB8ZWi~!AGCbXVxW=3lT2BNhBv!FW}H5#*`HNe@> zIz_XixmS?X?-?b39VDMGFngXtDqyn>iu8- zZ>$Q?C!4FGbd>kY=*r$+#Ib5Y=PE_Mul`uZAp8f4iBV0dL#^o8o*X` z&_%R0S_9e!t-EAfw62-$&^jgCNB#9k7ZK`Z^$v8jld$Xiace zv`*P>y_kQEcy|nR&h|j7!#&Yj;=RyX;=R!t&^~AlU|+OWbiV?iB^`{`lJ1YzRek_k z13nO~B|ZqPB_4v-ibjRCqD2It8J1|x;9#_7a0psUcqm!}It;B9JRGeRI|3~gD;i7V zT^$~Yf$saG&|0#i(OR)%&|0x$(Hc-bE=-TVPQ{r2b0(2|D%75cRfF1`fLbppqfES}R<4e#w zMVF#Az{}7Yz~yKS;0kne{HFiLl>l{c6>;GkLF%IqT)9o zswrssZ`=rwZPB<1ty6F_T6e`QXbtdIv`)!wXq}SV(Rv)Y104xezyB2x-3ic>$z5oj zg1gaLp?lC8z`ba#(0ynP=zg>Y^Z*)vMddUCco4w7U7{#iy-K6~SRQ(EntR&>HaDXbt$CIR9(O-o-#O zh&7-Yyhpoc@IG2g`T<&}@I$mt;YVnl!jI$ht8@4XS_AqNtqwjz>mrR$vuj2}sg@@2 zIrf^s7ibNj_!6K2e1+Bkq5?W#oUXORUt`}noi-ZZpf%%f(YlSlL(3Evjqd>((GO@n z^8JX`Yr3D%8t~6(edaSY7186wFxoZX;pk4qnEzh@I;X#)HKX6qIt9O@b&mf)=S8a( z9L|*J6#S|7ar}>87}N^ zh_z(@(cURpqYTIYCNv|jheMXLrl5Y4^o8ZfqLy#H9;#$y24myLL{ zSW7%U?V3@4v{q<6M%M%;pj{IfiWf~FPUj@hdH+Aoe)^%2xGIj|>HsHXKn-Z36wt(I z-FB0pwE}S<1GU%Cq==E@{2y7zZ8Cr!cn-p|&S|`(p&3t(y=FWGT91^G@&Bs8l-Q3S z^B?mVx2XVm4t3E@og7Ys){Lh`tHbHgx<;l)>$aQ$9e;nMavA}g%|lB*00S-gjA&g0 zGodwrnbCS=j8{zDqN}HL;7HV8OF9b!=puQ4wvABcBd1C-&-KF0@uO)_?{$H|<)nc?y6oqIuEUVLr5$eEu}R0%#3j zL9~vyP-YK5sx_kcf`L3|8o(lHz(vuzNEb^E;x#4hzc}qOMMYx?fEp}`)@>BW z1ugkf$-&ZSO<)tq!+{RC5(B8iD50+Uu+K-f_JFj71MR;u<4MV@CpvY2;lH{7(Gtch7TJNeufl-p za8uo_zJBOtwmTfNg>`ELvYXvAxvwWJRau4b?%?HWM5!lAohE!y=svUVEpL7X)! z(DYwbKpm_@2hC_*G)YvCs95~2m)fsKyY7OxMAQE3r}hod)q$<$zY)L7rH9T9G0+NZ zgs$f-0*GryE$PPffYHb;-eS=JW6fv+n_wRmZ=e4+1!%;Zp|!*2Xr0q7(CT1Iv<4i< zerBcp)NLw$*73(`PC-#+MO&4T&goVRsB^eAI#)mkj9sGkvFKC}#fPqujjQ7d^^Y>d zZJU(AwrI_GyZ=uBJ28-6KWv}I+X1b|fe0whKV22b8EHx5d4RU>$bhPMN;*oc{dcBa zE3gZ?&;v_87wnok?3Nttj*jZp72E@@Qy9r<`<|))UUB}{2=~T77u7z=!MnhL3{sS3L`yYhXE2?4$Km&*Y)F575&JJ~7#{~@c>cM}F5}}JS#?t`a#zuA(zyF^) z9D>%8#iG;zoi)S!9}dt}dIVY`Y#fQs6#%s}J_?}@ zk4Ed19D~+f5fy3M#})~o4hP^ZJJbM&(?_3}9E$_3*l}pJKR&g`k*^LY{5MWW21C(0 zXF0$VY1fjTl=R8yc7}`}FVm@gtf|(?uJYeFg$`PQh)@GKm3BRJ#+t|#QU|ADuczbK zmUYIFa6B()F4Cs|>e;RuoX!9`hi9O*J)dmiO(jhr+Vd{Z{%6+XxBNHG0%*x%0L>_- zAZM)G@ND|$L%Hsnb7;>8uBd4H^Z(~!paY(V){;j6y6w)VT^(G2)(nR-xY|d5O(;GC z$|7!l|9i~I-vAxpLI%tkYe}O*xm~ZIE~3Bgii^=YrLl(M?@za%{a%6rV^)s{=u)(9 zv&+yL;N@w+E6|$Zm1rF=0@jMHOS#96`HzJYx2w_sSED;esz&1)v}PE`0nIR8jMfaV z#hxUqM~ok@0qK;+tL1(Fb#NU6=8}WjN5boq!`SCqkW5zhlOfZJ%- zZG3y`A5)HNdAONENYBLL0tQMAs{V`!a%$GZn;Jdyh6IewCM?(IeT z6rwX8^ZztJSLHKk&2T84HKPxSPBLi3zSac7UMu!2TX6$6$bX z#O)KbX80*uEA|;$EA}~BkBncSqhdw;6Sr8ktvRXmml$XUU!k>xU!%1G-=H<1Z&Uy8 z&^pE6qY1F{`u`72P(>XPjEU9?$fBtN{E>Eb_!C+k{EXHq9ER2eW8230r#&abG0+U- z^r%x3i%qH;2#XQ zx2NP^gic8er~yO;bV~liUfZLy9v9-cq4pz;%>1i^Fwi+4kw(oZzun%8c3pJQUn>}k zRs$Ldd#%{%asF2aBc}mJL36Jj{8t?*bz6=~qdMq~RtKY@HNer)I;F)JX@D`)0DaK9 z$i_W6`@fpNP#G{#EnaT!owF&7Lrq{x z)2?eI{M7-**p0X@SA(e-Kpjqv)+w0=t@hKRHQ?#c8qoA;UF0*MTL-$ze`5eZOFARE zT_OUU39VBwGg<=}h*pQQp!G674&~Z^R@$43y6L|$8$cb#ce!Z9Q8KN_R0O2~%$@?6 z1Fh}RKR*$vD$w#@tpVMq(LpnqlL6~Fizz!G9qHoSp;Itd3TSS$o?GTY$GM{Y`+xHS zG{gDOT9J5isV5g*L=n*Z*y}cqXSX_~3$z@@7;#$=O~Re;|5*s2CCg777N%WKCJ}(z zPlv<24Kb;vF&r*Jf34u6=&s-Yj{$X&#Iai!)nate_ORFUeGX{e)IXnH7iYY9{@Fg; zT|x)VXbB87fF;p-U|I^T6;3;_lY`~Zn(^{z4R8gtmUu<9 zF5;C^|6wGn0k0gNzt90zp@U|)Dw-s!2me*?5orLc(XRdXL1_QgY1c&@Q=})eL2>@q zlH7z7&0q}%&|R@+a<~>+?bk+Y0PCQ23fE2c>&5GD8d3kWuh*wtOSVB8aKkjChg3i!Z0tb0mUPE7;7(}GcxSXOx?NKL zcyYTOVC?^0G0=>5Lu&xLquU*@-vg}~?V0TNLg!th!glUS|*q9EjEc4?^pBL(qBw z6T7Y%VAFqN7=z@JX+=spXaEPJbz2^S)&LGg=ke5jT%xHe*79F{21FOpVGN)pJ3Ki! z0?oa>OY*MRkwvBf<@O_6;}u~Pw=mFEo-di=l})Y4TsY7;l~tC(+(J=0CEC+vKFLYv5!Iw4|q?)#0gVy(BvgtrduCLUkA~>*Z&| z@%!i1yhH$}GXVE?$$BfujT4*&0b2o9+>j;|I29CgfH(LiDCj8 zjTkVOPy>u9(288afck>PSajAYy)p$HS4g1t6c<+|gJCr4BDy-YUxU_?#|M}4Rj>wl zZL*I9Wc;FW9R?cl^=R(x2yZ}W1;)mqKCy@$q=(iUvDW}%+vyZ`2SD%qv6`D0Kq73s z$pD)1&9v)2za<5DD_ZRb(mzjWyFH?h2YuQZ$LUiS$!#=hh2nccG^5*T*MK7-9WSq$ zJ7}yIY4~p33D94^k0Tw&is}*V@$EP|V03C{z|VKlsN3>xv{vXIv~J7j->OLEzq+ok zBciQwFCG5ZHQ{}z1~df~ z(R0gQj2-qx{1dk)>7aA=6k6x(X|!&`XV5z5ak9~h%tJf(b^`IcU%H6nnGhB2od4t5 zs;=Vva{E>|P=~QgbdH~8Ks^#hMRdwSYXae~>+}E5F@Tond9)sAUO;O}Uqo|nXZR9A z9lVUjNA=*pYLRO|uh8E0`v0o{tw7$#k+GKWHS9IO$WSZvI_)|oZ=f}RA(XHB{^!U# zZf|0s+vqK{4j3no>`)!NjeX8ok0bBUuDjyhsK1u{Jq&ae??}D1BJb0#Q}6+rdwU9^ z0$QPKiAKkZc0E^o7|;K6#M~w;b8iQHRKe&P@n^K= zi|E~LC*!l(CTnF zS~95K1=E0kp*>eX7xAw;ejNW}Tj?VC4FfIN?`U=K2U-L8Gui)z)-`n#KD7VewATq& z=bwK7I*0$F^<3~DS_2A^OR5=;K)VJoB3c9Jg>D5<`EQH_P=_OV@QJhWy!K3W%H|71TwvY!yG0ZfF}047F9McSYLp9H|Yo#CVi4PY{~4mdel zOFAbf6RqGBwCkG6&xEFIj^F(Jca&tRG+;bc0<{Cm$M&gd)I}JT%-c@;PlM1348wl( zxb}`L<2Ee@y6@u(MKg*HI>2<;>$Z&mv|`iKu4l{ED<0J}hQAr2JvxO^+#*AKwvTiJ zl7ku1TGE-&dIdByntQuq@k***!o~Qi2Vz`khglFh$8qS@lhLfStNn1CX~y}}?XzL5 z2}JuoOVjF6J5wPx}pW^o~)WMwe&WlJZ z5EqTQMq*AifVt?eQxq4mdSHrIK)S|{*IzV%$Vel846Or3302RHL(M4KyBwf(m&}vw zi+KUus|Wuz;^d+k%}1jS5Z?u(_VcIy3!wGL7-v@va6#JTxKT6~N&_s6)(-JPB&Z$W zA_%QezDkZArB^_4)vOg>v}=5<7a?O^bny!W>M)-D>Hu-Up$_;IYghl+rrJMGS?G3x z5p4hsU@>~@6fB+wTmqddkmpY#O;XCOQH416$cVM(8RG^=Xh!C zb&V{8&H>4sH`eS-Y30AFfNra}B2v9918Rk$V%)1oBoJEdm&2ZW^@#SkNX->%wm1D( z1E@h<73;oT9&3GW7te10SB5KKuj8$V)+tyCT?O3oUws6lo|A|;GRm)j=m4>*b&agd zfI5Y%pf!P2(K>~zp<}%E=l{Z?zLL2*26~_wgjNS@qyh8!eNEamz_n8Uc)6Z6R8$Ko zGF}^lTvBY>FG3CFXI%pula9{$I`r3o2V$=QM}oRY*Tuf;{J$PROB@|^fcWf}uHyBv z$E3XoBOpC{Za}*xupwI8X<_fUM*VKppOk(2RCL>wuAw4!A4r zno&5^fJ5sVi7O}>zi8~n0NAv*(cAp2+a#{xG=SZ)*8qm63grXQ9@vY+>gE2<8X%n< z@t&!}UT9q-aaBwes|Wuz@-B#TgF4)s{<8A>cM}FcqY>x z0KG3-kM;YZ)jp0BTH&}%uPYe-8-uadi1$Zp00*G!4q<;FntMB-gAlq1W8133A+&42 z@t#quf91b=76PjdHsK(CyImJmiFK<(G*@p(=%PB9Mor+5WPd1H0~*R0%{A8a--u(i zIt+sx5GL)6W6IS2FdDPHu9=uZt-#^6eanB3Pd4Rk6Ly;65e%Tu<&H#iZwGiS(d2-1 z-yel>oSfqO$Ew@W0L|zaw9e_VXbs>vv|g|rkLKPE;Dlr!S47%A6hTEgum7J2&6(=pHroPpLkiw_#<0B6#k9curW!h8v* z2ePy1KW@xxY^}JR4N!x{({W%gZ1l)@PO?82&ApxRjFeLYK95GN@cHPj=l>S~^z?cm zntOY|ix9f);@Pbp3FE->zbbMu{Zo7Q=l?FjKo2yRqP3*)tJJ#c^Ls*)_)yEv_lpIc@u8FI$*OJbN=H9N@ar~_9*QD_ejnDt<99@fn&hd3< zt-$qY4d8~Xr&5?8%??7oL~eP(no zS}SrNS~6y*WoZzmMuK@4=&K7`hiKAZ-81l=Bh{_#twTCsR;s16^+o_l*y z#u}*oh5tr8bNOE-j7uu5Q2xY3d`3jqO#G6n+CP>8ejMEju=3w{0-%d(GITzJ#Qd%*8L*Z==- zGkYB?ME2fW#zAIgG7?HwNW>{4y3mkOr&2^2p;R=qwA0>&l2tlNODN9Crr-VXzOL(i z-LL2Qe2@SCx_vIkq7b~fX9trgY7^b@B za~c6Q=vL@1!rQ>6hrg!O5xgD#M5LGhS!o#61=hFZ#vd|OFVMOnTZiRmaJ0gDpkKO-Y@6CDu21n$tU{B9~17mah z<)7y7U>o!gup^e=1#`sy3BA4lBVo(uK@K=%{{p)x9|GH>hrzzg_6XP!{5RMJ=7s|o zHM3>^`H$fWC@FA*Y`Hk639%LY2L&$5qw&!A(PGeJVE(!ko|xbNb%W_p9ikav8}MJS z@07Z<^pd<-#11?TziZ+Nu*ZSbvj4)d&Y~IozZE_S(H=hqHh;F`t>9^hPGKTevD>%r z|8q_z8aP~Zna^EhxgF02=RP0znrF~fZpEpUFY4O#$fgBAo+X6-iyEhPTvZx${M76H4cGJQHii$b>ni-E0R64(wb z4z@u{jQCsq&5|6jf~AUprNLIXH7POwi=jJ0dq6b*GSD5NWx;)e`udyYx&w&^?}7ze z!SY~xv;x=)UJCZOu_D;rZY8iIurj!f*g$`?3J0uURj@5ytq52hYy;OQ?bigmn`Rtv zFIo$FiZu5hwV!OcxZBjrZd;|BiqKYykIYpy=-p*nHZq5N$^%e!^6-u)b znDYWhEC%t{e)*@_5_yjBfw}+BA>FDlm1GWL%f-QeLuv2nn=6`P} zWZTR|wm>mpG6Ed2JZxqUw})OMWCM19ZUfu%e_NDuH>Y4n8d$)~!RDV#f!Ls(pd0TD zt_HaVddZmwid|^n93_Wd@npbqQD%OpN!P7v&a;HG!d+ojgnP|y9I(K;=Gz^*J$}Ww zfMyS{Yal-~W`)_5vV&Jp#G_tGu@|~fnrPn{QPwu?+vg6b)eY?y1VJV z;I3v8&R2547VZc3WRiaD8h91-YOoDR3T*KH@blLhd;nl*K=d~Ua=;A^0=ouY4YolU zyUibkF4}_&f9Q7LHP8n|+Ujo(;efm0pSnix22HhUN7VHq`{XQNk6G2Z_uYyhV({hymu1O3erG_b`-g6&cGJa8nW z?iKCDWOz~+`D8jbJ?%X%%sCj!$L4=GI0^&o;k7hyM2;>7KTC@2aUL{wN@K7ibTj!Q z1^xHG#o}8~;37!_{@QQo8Jdl!qQDVKj$MSuV4xj(Ly2g{g+(*}$Lf4C*4uzEI3?AC zV@rf`Wz!0?$AifB8~O3rMH_iz_y3q#41iROqPz-((*bdD=hxt#;?=L&#$)Q7b5&{TH`{keJ%|*e< zU<*8@;3;6se+$?SO-=r-FdGOC`CDmV4>A|H=<=2e3v6?Wp^=f#sil2pTX(}#(*L|l zJ&gv=>FGs4w&mOl&VcR+ybWxR&jj1Rv%v1wy@cHRcv*lrB5$XG73SQ~{X9iyg~_o~ zmK_nAbdyi{ar$1{uRZ(qih{GjRiHgS2fF+HxuyL(z&7xm;E{vm|6Lq#&fg7o&d&o| z!F#~Y;rU?q>kGjAjd|>dT?ny@XvXEFz&o86nE~2+!H&@Tz@7`Hf}P{{gB`ICfIa(t z5Ii(E`kRY6z+Y$ahXA7xHBD~8a8Z63x)o;0#^2b$WP>k(-zm#TIWVBr-#kZScdJWj zU=M4z`UrG~^fIt>d^y-DO?x|#C0f6IUw`w_BH&|1!4+WF#O_4I9_IPLVMy_%xw05= z6}Zbe^h{>0!2|ux)iBtDY{%Qfk3)CJuPOLiaP`;`orF9akcfod-T#+MsaB9{IJ6nd zsf()46&VL?z;zg8g?S-?9r^@xnvB2y-y!@Y46cF65N%)-ItAIJas;oZz4?=KN90q` z7tZ*f%FHJraRYXzXNrJ548Ln4y~GB6x>Yce{~55yku2H9uK(WvgFVi;;Gy}(BJi_d zd;Gc5zB-iKZp{CA_{aADqtGMU7hv!>a1+=O_#)U5_!8J2ei`h3{}r$u#K7K|z#7u8 z(!fP@GngoMdpa3NY*1>#{XD(O^mD0TJ9JL2zxcp(guB6A5M7ip(DbjNz#e~n0);jx zzqE#pqh6K!|6P?);2eDefeu-!*xhhLupRg|3QW%}8g}5D$aBhiGhlFzz6FC92;T;~ z2)_fi!ta6|k#s}%f;=Z|gGR4jM{)Xp&h6aoaMH28%WLjNV$Mf5AMtNPbq-y!iEu&)c= z3UThl@48H|Cq`w1?a@PN;R@3i*?g;%JJoGRQJj&NKbtm*b?(1*v;($YT zcM;gl*OA62{n$lyPvO58Yyu!pFa-{X4p_h= z2&@8JRR4x<4D zFZLh}y0IA#W)y?6gz{t*17PX8Z8^JbPwhN3SNWIb{XZ^>{}zQi62h7SdyvQJT@#Pf zzNXajr(&#A(942iRtjRVyH$Q0u7=zO!*sh*4x45TFf8pi?YE{lH1I&QKG+e;fMt(1fNqaB1e?AR+Ko3FH=x&S zR2pnt1a1O$&Ue9ldyt=GwufgzG(H*Z6mE*Vk%PE11CNj~Hrx5cVj3Z~2f2=CgR)iY zqS~w&ygAqlm@UAa0}1Dr9H<7=>C_&?V0##B1Jci((sWa&Xe;y$BM|-VD%C|d4flo& zpPWqo4B!pI&D;(Wqf%n-`K}XjMvMFVO4}k4Z z?shYdeD|7O6xf4TBHaq6qRctm5291_DzGDxqI3$X!pPH^QLkYWXl!7}K;nq(j{+Nz zemp-y^35k9_P~;%+ab?wI39=&K;9@Kezn> z^e$x?{}Un`kjVj_F2{!g9D&2Y4)JTj9%x<%9(mZmBI5RkLm$>u^f#~P06NF|ePA}PBuqEca0qd9yrkIZ;s}GEj$M7e*Xrr6&?$=z~jJ<$hrWh?D!&Y z8jbr7_4PMzEDBC20^bC-fhQJ$CzbYZF6~b)?X&ycIyBJVEK14RgHuX_DMdjZPB;Hs zpgToxE&Qi~%^!nP19|?q{hU@9P6ylLGr&&C{CMDja8gE4q?{@*JN z&Mpnk0lNmyE&N#l+2DG%`;H>-o#3(S|L-aS>*Mnoczj;)ZulLM^Gf^ofNjwEU^{RD zIPafrN2lcAsuI!*VQ@%u^5}k@xxgv72!7YVd%+$V-v@TDm}Afh7)2=W7Eq_B&!9%K5(sj@4+5jBeyh1AAO99?x{mXa@YQ@(&}-?e8RF z9w{%$`9Dz~8~H3?1&GF357b>SN9cLX0tm-+YV6$Kwbz^n|~lL$G739&q;A*f8=rx-# zIa|QTVX(#5j5la+owC7o)qE}d?giHsoXhnb8W>3aQ#4liiPGScU@N>H>>N(R9pg`x z_Ma~JGvF?T>Hjxyz!8|rv5tZNzY)3>#AA1>&qB9>x$@~M|6HMeUh<;9`2q(lFawJt zauf7gls5Q_(D@sLv}wKs@G~QE$Oe8H`oN)9fAbX%*yFE)Jy6^X_CWJBunqirp?{;$ zzX^5(SI3)v5%%@BBb6Bzp@A*_7T5-U8*Br=16EZx-z~UqO5Fmz)i==Je2)XJ+V6wy z@ed09hhRtGM_?P2ORbi-6W(~F{W1JWX~v)SlT$DE)9lk(!B1%52qdSjnV%K|@>s6h zXBF%SWzox_@%#TA@{3U5lw>})zzFcV{%06q4}T8!?3dSedT9Oy^s)Ir%Wg*?L#t0D zG7{RLUn0ObXHp(HW1t<%&~5oGJ^2q)KEFbsLmUIFpg!rmEd1_r;Htm7G`I)sC0L5e1}uzp zSh|5|P3Xq=7I}xxLHzAe-6T4dd^o3nK!8J<6uIc~CKMa=NBHf~ePGk?P5d2_Kf&M} zO#{0oayILn-w)lr;?H1D&wl~i;B;Hd?@cKO9-x7{$zQ?fY`^@|ZqYbme=7?9J^{ZY z_z&nZpx69!{6O;{*dG52>~2#pu{;Fb5qucz2t5LJ1k&wBDIx#=hQT5G57;@%g+mK? zv?zEC>|T;?=#)$^@@9ZLgVX>2%K?R?dAtaCq6m1h(4PW#0+IJL*z)$n0jDe%EQS$| z{^pq?FxPHt&fTvwG!F%&{j;>M{LXno?uce29ms3-H_sJ|pD+0FqVRvv?a}%Z7~pXt z%OOW3=Zbv;`}&(BiJt||MuX~66=43^;U5(h^)x|y&n0t0Z;N`Mzde(&fVn0R;1tXa z-Bq3+AhLm%6EWi#O&~8R>E%E9Bw*|nHRe8d|;y9oj2sS zV4#oo|8oh&-KZ8#J6$3mUga}C0&VbirDi5tKbSxA#@_#1H=7qgL9JqYoXMv~$Td;7 z?-xWKf9*F8ECiS@`A?&K7UqC0tTz-ak_PY%`Ew@Ynh1Y2$Tg5(NVfb$U~Kv0t?6sg{sF^mAe@12HEw>m@pg{ZpA%^aG7FYa&7}tBQ|K+B7ZrsBG#Mf-meI# z0&7i}UUT{q6gWi}5J5+DdFZ2*kpC#Of)&OsYz=hC)}kB9gPG_rrM(SYaRPZ=jivu* zd)*CU(8$7>Zyd@p-9@_+3cLti8SGxL3fK`@6+Dbc^f#+1*}3@AI|5RIL}GQ82VP22XS@=*i`3=2}NEXHGR`eR+NT&6-$b z57z>tv;9WlRKAYT+Qq|l!1gekS~hrH=r*W};6Q(~9t_n&3!G@*&k;)vxu5TW$8Ntq z@@>%YR<2&G$|n|81FT?!;^9TKaZ%-#%-Y^X82L76(0(-kZ&(y;^nX}vgKgl(@blMx z(`5|8W&>Mgx1WIk@iqR*&srPE;K7EokqEZ3$i|`A^@?XDbfOO5jBI zczBhY-EAANGQc(QGUS`SHF)g)zil|+2y8onL7p47gYI528EkrT?tyB1=#B{Q6YT%~ zQx4{nI~Z)?4m5C%cLXD|>y|GETR~1-j1#e0N0j*6q6{p?JE5TVTSsVT=*GK%9r9hl zc4#*+Hg&7u&*lCNGhi$+)Fo<+c3 z1@8@Z&i4V^!+pUv_?2KsWIwPbt6uXe4%ou|!OnTEXqbKgbSpRz?40L{hZHmi6?v}) zj|?XN2Xnv;UNgqP+wP0NL!kRy?ohC2yTiaI}3Qq*re(whoPlDbr%D(>Q%|+nJV5i^|u&X{rY6GW$ z?eU@H*b#jTboa8ig4+lT^f$TkX@Pl=Xe`GNZO}c0aF|2>w>cGo)nlhHH?3BB^tI!E z1We~^1rsewtAPK%vmrKi)AQy0Kk~-+{~dwT;I=`TOq_yX^XKMs*U;$`M8xgSXz9d1 zUgcA7KHHo4y20CM;FRP|<{augIul@nb3LI(%p>6p+UEUpZQ<}wa~1+dBNGPUk#m^I z$wiin(N%%FWg=vQCQ500{U<{5sn7XL6mlE*_TuqP$Z!gBa{B@X_TT@QFy|A6R*<3B z5!ec8RiODNS|K?i+2R>*PyAg(`7JnmFi`|*)tf(Ky=x}3t8<=*(ae8S@^24wI&F{6 zo*;xC>CS;}1?Pf2@a#{Kda`;4bnEM7r1F7yP**&(>8kvl02}x&uzSV33qB7#9`Z4L zy(YU@7Wf_-I7H`zZQuokp1WLUp`blibc(0r%qT^~KRNc;d?5fA5t@PlUAIjBKLCTPG!{A{scJ{$gYes+Q{Z=mE{5*)#D~B(C|kWlgQLHx zzulV4Xl{^uLWT{(|KVc5CE%I@dz>>N8=TAU0|#6E?F)wNVMeMhL@2xz1+MCCiGNk- z_A_WR%5l@=_krwCCLcud?dxw}zwd6f6CrZZd}IO-9g#eS%U`#y&tp1HNg`nW?m?%4 z{`MvmFCsHi+2YGEz%`Phtp;1+8 z5l8Sw+K=u3=S0FG%*m)5)af`25*zqg1Ukf@EBNzZJHRg4`1611Ha6f3G;j*?w7eBO zj>pD%LxL6L-f+vC4EDe@CH;R^A)Cl!=Xg2>SV3yQz2K%2fiHs1|D`OEJhG+Vd*s`k zh!}qve&g%$$R4L#IU-+y-zoem*xj&qGY4$(*T6L*9O{bl>i|a}KiOoDz5(3|zBz$B z)l9GX77RAv+hAL~RLN9l9FkPJYJ!AELkvKPvdgU<;fdW*hVq=$%01{S@pP`B|a=96Zz){mn0Q zAo!QyQHZ!P32Xzd09fHM6rH=(ui-a6cUV~7Z=eslb*sO*l>-)V8+d4847$At{4Ll5 ze+RaKcNF^X!LErcX!-{B^*49IU<2xo$6e5EP;RaA^m=zua8KdSQ?RP~=3eM+%?$L< zbm}$Hz*0{>2-4IQjgm2zUtW zqRE+&1wLH(9|1c8Nui7K-_Y&QA&F@6pRneW+hTkXDl{9AN44$2e~Q3I!B&`*c$|2w z@J|Piz5jm(2b|;omIg7vDR>;ZBk}~;lh5{8YyKyTyr;7MHbZWg_k5pQuWZoM2;i@q zjGh6Q|5>n8@Eq9fp9g!cXn+654D}w*|IxrX3*b;^z-(YEoE_|tXWei9IST)3dH%)` zm_`C^P!<#p@tg?Yul@2*GglFq*LK*TxuLtJa^250)O!&OHegDG4!%*48xHwvzrl5v zELHBD$6$N>UJR=38ORyT+Ase! z^Fy_O1;F+&x6k=h?ONjL6fIczcS64D3qj|vpThL=n7f0DA)!qR_K$! zj>zI*{`v~}Hy5Xe?W4b0q6k>B2v`bi1GB4TflC8ibT0s72ZQD6_(0^7iB(U`tAbT3MC6%6|5`M`oEmbEwtwHuE&+l159ujc`sz=Yk?1*hs z4cUA({*(NbNTQwg;Pkhl2CgE9bZt>5lM^B1!_o zU<+Rkg9oBI@@4+_SUw%2Y*6HR$rU_OST1s?hYs-#k z^bOlbf3qA6_Go7es0!^tPBuL*QGxl z?40k0LK~c$)4V7>3vc@d_VqUha)2PT-zeO@2*@^Ejfe&27nD74kVCdF*g1YB z*bz!dZBTlFaU8R}IOHN+t#4re{;wFY9|}B>ybA1yLqO1mq%;N4gmZsRr4A&^;2qaa>-nz4mJl_Mo*DoG{*?*$)H81QIdxzX^W+IuB1Q z?N0)aB2NC_%mEvaZsMvvd8`39pMyR2PXXKGw}4$!Z!Ppw!JWP+%RpccPJ>}&VgGIS zvkLL)@blMsdUF=jK_AMC{&pbnNOukH)XnB$gCW1WX&i6_&Xj^7zvX4YGKf&C zf2N(!vk*{6GDjqnjr(;*I(z(f+B*g5Wp;1{tnH5*ef>?|PHzt)&?gST9%yoz&ipwU zwLyu9d)1PJ%ktJiU+dsNe{(h#T0lm+8gduq;Pkoe2?_Bzw0EyK7i!jz3uN#b6iZ zhYJ0};IZHTeLD#w2<^uP?ozM~_(%l6mv26ofgQoi!8YKdV5cCb=k5ES(wFo3 z7z_^S6<`~5CDVFoIsQ1n9$o{sz-tT6lTfvnxa!kw zjUNSA-*q|vw}qd8>Ke!m1U4wo_glax0rsfgDRn(`%g>w9oTB#qfA;WG#eh$P?ZIcj zo_tm$dNl&DbP>G)U2E2o$oD#%nEKqfE$G02IbBtkL~Hkrhk_9F48X$LD757 z=U|{sSM}2;@W|cp^Dkh~)mRHl7lE7&R|ndm7oLni%@^QygBcul&TfL}nyBx3{UY>{ zK;C`>mWS=>y(9W%=v@tD{7;pdA!o1dr@6Da3hV^XK4;4|_$##c?3Xtz*6DiKKKfe& zT*Y5SfQvG}emiUs{+sOq_-kMr_;s*z{td7r^iA*}qOJbsTSY*f3%(8A)8}`<{B=40 zF2GeDsh)hEFF1?bzJq=JO)fmzgPi;QUm-(Qa|;R`k?(;Wv8%|Zdqq|}wFuh?4D>e} z6CZnc6}UFAN4W%Jg`bAmAx=#>rCEU3q3;hI=zjkv=XN$ADR4-$;b4J3K!A(vhb01e zPnf%PmG`5*0i);tash&4T~4z-ZiUlu*B;~-jGVGsGe1U{`F~Oj%(>>+@BjXk1DT?Fov^i^yrRhwyfyQ`ht>@c;ITdmos!#- z<~{uji2*LFR{Uim?L~q#(%GPa&k!!+HXwGZ0Di*{t3DzEZyWa(IXaBo*g`= zSE@dGn8x*E{09-_gL`>`**(NOPiL0U60$R8IjM;yaLM zgMSa^uj|%#0#qZtCL4ina2E{jCevwbgYJfIgVOKa{vPNK@g5XRjmRj%1Rz;dcHBLlt0;>qVs$Y4zAeH4Ovp!2N)sLUcEIgVF%E`kOz)U;&GhaO1xe z0S|y(BRR3K#}AkGx$DL9{@Us5>u>&66#Ts?*q@4V5&i?ZBa{^rf9*FO=G@OX3l66s zukUT;4fHqvEDesKfkXBnbca0WZmxkWV9ft7_+6wcl$1RLJrTTC*sk0%#Ih%w4 z<3!8~a{fN9 zoCq+(TwsT2?m~YN*apo5wgEX2v4Qys1}lt1>ZZNsD9m*VGLX2c=S6{QU_NjyGAo!) z;n{(t$OdKWcH}Vm&uuhTFh2?`Z~-tZ?Kf4uAlNC&)AJ*N7!TbMTDZtt#5m*sq8#w) zavq_yL5o2*o&sh8u`5FJeCmG_I4->l6GVq=x=gG!XCxqS$Px#EN})>@Ht0AM zdJk#+qA+*QdE~q^zrSn`HlTqmPSH3539<1Jg+2`f9P%tW&A(xhe`eNSZcw}3Ml>*e zW3UBo0(Jz_-t>*}#wpG0XL+0U5a5v1U98&8a<1pGd^6g6ZrHpSxJ5BAZ$x&Ad&e+r z)f^Mmwy*O~dv2ofH}ovC&7VCYN8kV&JEA|unxTX3{IhNGIM03$8KNK;5FDXwK-3yA z|8~f8N+yF{6WfCa4!8Q79XMbGJAy4R`*!x|<ND_+il+az9P^(<#GK=uX>a~ribK1CTLT9Ao8352Ewl&w;gKV>`%pmg!{`6Y z`vF~)Is1*iUjCC$ipU|)xmq>IS2Ql9O{Kd?Ux9&+;2z)!YNo&c8v}B>>;_vP&E0Hv z;%j^YIduvKHx#xqB5zL=dZ?YkG1tgm(DVMeHmqr|HwRp``xLxy+hD~1O0XlgAGk)u z4sAyK?ZB%dKlzW+eD$xcDGV^?^?UvIKg5fCE+IZ1C^n?% ztnfe#um=al1KJH8$U~-%$g7LN2ZJ4v*UV4+ec%unoYO;#fWyExV6Ng(B4UHHZD;=M zn3#TPp1-ukSupri>`~(DMPzz~BXlJZvjg?)_q7;o`Sre`_WHjSzOHzDIM`J?8wPlw zc|CM&?v`Yk?W{0w$8kiDfZqo9j^uztbQIV*J-X13DFU+d>7ltssBTzfRjqNLKmXgq zHxvW%G@Pq8IkpFRxvW!?2agl{x1f4Yo(Z>E6_IvH$_PXXJYDPVh;lUd_MD1c5O`raaW(ck2* z7JKkkF@%09*aK1SU~mLZgHF)eZ}{s4je4@GGo@C4lWyja<;i5{@N^W|gT(-jz!`;J zXT!Haw}WSb`w{KyZ_eVts3@A|?FHxOb1Wdsb{m{~LY$)H*uCO*B3vWX@(=X4ceOgi z57NL9I2(nIz&T*2V1EoS{oF!-M`?c`%Cf%bx;q0%0{69G2pU0|BD-?V=> zctQ_Div%z^;kB9nTJ22;C9O8<3Vr z|4*ZQ>WRhOaND3<8MQ$<@$fh?(K8}8=pqd8k}eS#+y8$r3^jyqkOX&5<;)f~hF52k!?vq=Ou`QAh-1p!q)lzem1BnXWxBeX!2|7t_EY ztpYz(4EiwG2G@ZmyWuqgu9?m9`+rqoZBXfsSbki_tKyu#^Ea4WxEG9ybqe#KGELfV zB6djz5_^!-Yj?v-p?iJs5wNQ`0{Lsd5jYKDu7RwIo%73RZ-aW5bD(-`1Cj!VtSYz} z4{gv#k!J()*sT@Lz(~=1&Btiqq6&i*CL&J374Vy$5If|#2xWSnq;>>z&*;ct@_%J9 z;HskFYOpMBK3?e8BhV?RchOt}zf-U=;q7q7|7(ka>%jK-6X21>{r7~}<8(t8WxBCL z{z=-~K>y-W^f$TN%^}PD?0%ZdY{u6k(EPbU!70p}O|0-!@OzQ^Y4E_nR)0J6THt43 za1q`Bb_#9;d-BPu*AcrF?T%PBos2(A`@X?_{mti!z|Vu-;0s-Y#H(p;0^6W3g1ekT z|5BlU8Egl;TRI2&o39jxuY#T9o53C@z6SO{k{YnVUx)69d;{zheiIxAy7&KmYj7Y3 z^SP#kG|O=3_}lQ?p!x=d{GhQt{to;$Fz<@Pz|s7lE0xaacZ)%{fF0tT*;qkNuU+-u zgWn!!GuuV^edxLV+zN@u^$NyK9J7KS(7+!55bP8s1+IyVd`>}rQPHzwwqh4f{)Zv| z5dtmX$6$MWb_r=-(co^CqJpLU@=ud@NZa6_AkQiIDY$0^b$9w_Fu2Il&77j#$77GO zk?9a;8_o*jtRwpK%>TAHlZ_V)DLOa!1qS&hl3#*tK%Sm+1SeszBlN4nzjUtuJ4f~O zIj`S0{nrR^FZd1E3NOK3dwgr*zpe1!p84M%{ub<<fe?2cYtkhT|wm+ zlN|Bh?~B4ai@>`Iz8g$b+i!|2=Xw@+4|LbSyf+pvDWy33HXOa8vV^*VX#MeW05`1$z?UjyrJbwfYA7+gC8#!z{sSgHhEsOBph z=OG$6r{~kaWB;?5>nhJKnLU1(_U6xZ#bJ>}L0;)({wr_*{$ATC43886ry;;0+!0GH z@ZW{MPG0|kZiV#*#7Ciz?f*Z<0f%%t*dAvfafI?_GaEFcwEr)-hTJvvIQ0DfTwBSB zz!MyB4Ln%{J_WV`7t+QCKMmb;!!uwTIGJ$Tp=Y7D*MDMVJ{gGIt)7FyA-oZ39;mLv zT6e4G;dhb#4{U?;6OG0^tv8!r8%Ykasw^pU(l^5ttKPLvH@*=x_=) z&-`zXvSjl4{9F?Vw8x`?q_w|&*}M%(w{%M8MxG=5q8!Qi+ zc<%R$6akBZ9kErhuIAhW(PE|jB=D#vFlTWNIE1-?V2_u8ZoDMey<%sG4(U?RZQ#;{ z|Ha^LJ&ildc8yb5(Ew{j={@=?coYw_llQ- z?ZArQ>R>;Def@1n-S1;@ZJ?@cRzje=>B?Xmv>qOH9wxadu#E4QTZI z-uZjZNS>afOWuDkde*?w*eR9 zfN@?>;2OvSiE}3ZM5g_$kAiMs;K&AG=QOWWa0KeJ+!KY|4kQKDf%g8-Seeg;D73{J z6$9pGKDU643x9HIdfko>y+myM{Xd%^&;oK@&>m!2?kb*w(T+$SS}{(J9l<>8X1wVF z#NP*U!N3OO_j+t`if&ny=9|xEU>EJ?V2^xTfIS&y4e814wA}x11Gc0=>S0(Db(y_Y zp~qvV>}Al6w+4H9PLVpIy=_W^i*VoFZd>RMX&y*)6>kUK5tNIv`0Kt?mQq59NMEjibs2bZBTORh-4Sc28LewhcyxX?XTZh;iklk zAhh4m>jp*rb({LI+FlrFdD;0i&h>(U!>#^y|Gz&UM_tq8IUVP4D-3c`?TrHGbe|HT z6s;pLWdeu#^7{Ily5Pu2H7b&(*%yIb&M@$KTHByH(q$2A`YUPg6lGhll{e7eM!**D zS3GzX*aG)21|3jvPEO5#AaqCUAaEkwzW*&D&mz(z)vJrZgTYSGYryU$bu;=9==M0f z;T{LlYoL$r|J##ZV1|XM7+ZWr328#^q4pNA+b04};ar5=MYud*5%jl%Pa<$A7T0c4 z6Q_{yz{{kItlsqIH!7YxYtfD z7Wifwh+(D^iQHo8UUDwj2Ikbw_+;ccC8vN#MN0ms6o%Z<=n&@ZH^y&)-v+!D?3y_h z>=c~_wn3+ZI|I`H&)|R+ybbJ--9xXi2lXb?GvTihvjNGu4a_wl{@QQ!H)kPWcpx|f zh5LD~irK)o!*7r3CRUc+rZ37UW}MlPEaltkZ%@A+(z8+EB02|b`VmO8!8x1lLW2Zr*WcuJJBR!{s1DgXi@;qMpK&GKzI*@Ag)n#=xCrdY=e>pgKCnZa`vq+9 zROn9O`%C*={pKvH*PTiw5)M&Ly{zB^2yo8x6AI@4AaomiG1zO!3}iO&L(pe>{?7{2 z&0G@`O(s_O;Z|V3E)w;unys*|5i`4vo&R5g0O#~lumxOQis&PSKff33UT|4ypA#KB zcsca;_a9O?`7BiW>3&$~*XiW`ek=GW0-Tbm$Z!qi4oOF-UW)x#_WwQ5T!BCr&6Qwe zwBHz5cQ~#p{53_nM`SF5@Y}$T=ltIxzXk?(qbVpgz7~4**z_3SqP&;(?zY#J_Mgbm zX^#^^TbzN!MVRw{(?418YG6k+L$?jO9=cQ5`xFOU6rU~vQ?=D&hww9{{k&jzySz8V z{Mn3FO8fJ_Bd{1tDhtSUKzn=x3hZHu%=C$FX0rj~HA4PxM4-p|XOZTRABaJ+xGh5G zbUG=sf{H(jd>i;Va2L^xdY|Wj1%3hSh};Brk$w^E6;qy^ss=ixNr`J9tL<=x^`a zHA8M9aRd@Fhdf1PkH3Th_w)L!_m_(UUjYw_u+`r_x$F?FiTUnUFZ4sC^+e^XD6oMy zgB_8tf%^{j^*6b&I1EW5vM~{GgY*ig=<8r+uWs_no{=N?4d^x?cTf`b_8a-RpP;1= z^tUS@7vUwvfNvtu3Zl?8kQ6y$>l04%e~b1sX}>XGIFk0_Wj?uG-X5lkE#TV-u!8S^ zo%0w_4RSaBF8ogMEnrmk+Azm}Ix=Oi*A24HcQ5!J0$h~2ecvg&5SdQF_xlPveLn!t zH2=E>;*p2SAJV`c{|IaYkERnuq8Rin9ykTREDCZbv^}mv^RF`hyXrrJ!2Y^C>YC=) zMc{A1_GqG^*)_0k;g84T5v2dQK}Ig)6rp>;T^NOp?KcJ{#EwwrcRO$^*!0M=1MTns zurTfRfO2>HxP{{>SrviK`E5mEU1DW!a0+fO?R$A;k{NyrgK_TTaku*&bVuY4up{z& zu=z8JFL;z<%=-fe z>d0jQ^&QZE923wq_kkU;dZJO+{h5BdlqWoYLVzut26hOu47Wjfi?tQL(EWmW8I2>H zUNG!+(ck1ZoRpLHg@Qwd(C^2fT9l&@@nJ}bXkvPY5V4?f-BCh zQ_kUcN^+)CBV>=W2W0u}c7O7pu;=riBJfeLi|#S7Bao|P7C0Tc4LFLq#5J-aI&AO^ z_{Yxw|H}beoDBvW@Hp54p8z`rPZoNv6`MZ6<9Pk`U#!Y!qB|6+3U4}dOO}z z7-Wy1278=1KF|L>yC$X+F)N%*_)Jf)7|AF9c`KDeG&ce*AP){XXD=!S%>(AI{qj$HKYrqJFg%T=&Jw{7*e?|yO7OfC`0vfCkyK!jsmc z8`qrXVzTLr!|#+W0p_nO(j@_jSbP1Kf7>4fcE8TA)w#iRc+_vZ4TPjEpY^cbDY%GZ zmX}{du)Nd=y=-*C>{5`3E}r~HX+AN){XX-%J*ri`9Q>aBa-&i;z!A+0 z2c43a(7t{D!*BtzJO`}sgQY=MFc!E1{8gb9r@KJ<_|5`9Oq&pHa3(TpLJzg7rSMe4& z;P&ejdFvJf)&qAAC;#i01{;7!7BYr)yn2jp7z|Cl2y!l7~qMK%mf-fUN zb?kSRB+@`y!^m}_J!gtsXg=x?WA8?X>5 zFn$>gtT6BYcaFC%^liYNOCADLgVMg6|0f#jtzcUOs^2!-ft`}cU>m$W*b&(Q>iPY~Smy|2b~OJ^7+`~T23!6v;Ia9C*TS$DM!CUm(4CS@My5}uXss{~*udSR5SjVr z^9u0T{{J2vpiTP?LoO0oL4Nbuc+Uy=9f2&dtS|=Kpl5UcpK~&q3bTM!$(eDTezWTJ z$hH>>9D%*TF3M~`*r0dU-~Zc(23EK)*xl%r;2I(GPqY(S`E9^{Jp?)=8EPGoOg_%} ztBQg9gH5j?&JqlkZXi1V;Ppl?1BpX)APmM6ooHCVRD|2$yc^yrsrUWfO@1`H_TT@9 z)9v8EKg~fH;E22$Yy}4w17B144*@&GlQ78g4uw8S3H?7+?i|j*2%0rpE~=w{o?2w zI^51b`N;({Y={6`oRQ6o%HsDkeSVQa@cmwkI>uMzLd~*9-`z0t% zo9ljdfjX4FD9K(_y-pz`@jDZC+mqr-sj^Kya@pgVebDkJ&$giej@x%$?R0U2d{((OP~0 delta 27922 zcmYk_OLk*h7VdRjpw6HQ4ag!LxP!jPwruN!EEzq*$5{~+Zk-m;3%W!XsPg;ICCAAz zGEX=x?Y-BUk8ghSv3C3)|L_0&fB%2~)BpAIU;fMgx%+Rs|MK5<|J{Gv{rCUR?tl1S z{>S@&y&Zr3Z=1{K>ALwkonJP8ZEwfx_vX0Uo_FWX`)>RDd-JqA95(;;^1p3f5BuZw zUzg3t>AXGcFW1d^cR3w??_M^${oDKXeY3lrPoKN((Kl}X`mw)UZhif7*?ex#?)$kt z?EKpOo_B}qWpjVoUp{xoi(9;&x5rKn)1b#vWa`th>;`*hrV zo=?}^^L6jKxBFq={rCOW15Q@^vh!TGJs&r(r?Xugc70{@Od^-f_UYuh-QSjf*d3q! z#u9H&yYqGPcHVw`SlDLw@p(9Xf9#Gn=(cvgy`IkBu6p0zW+A@%duOk|@2A6cyFc$X zulv{QZg;f&>;7Z={7tmm%jvj%@~w})uzP#EY~F7lPXu^5eeAC9`{Uc@W&5#xv;WiX z>H!~q<>SNYYje5opMPzBlhp6s`LLsa*R<%m&1G-pWVpRtFZa!rG~PF-qvsrm)CF0*}M_r=Cb?kiuZZ^?ajT<``b&fp&z`#jdu;m(BHk_hI*LcC!K({@!2h-o|b4oD_D4)64hf z`F1?^?BAyYOWmILK6*PHUN%pM)2Z9AKIU}0dGD3>ryy8GA??DpWH=iOmXE=>CQmtB4` znM+T2nC!m09d0gse%^gvw__`xyXV_szdds)Px{)u+&72q(>MDg-6ffqO|o_)=6bRT zhP-`!Wg~2liJXroCT!I^&hY6%dwhQ1evxmc_bk}ov#5iWf8MV4 z;YRl~JK|&wJafyS7!dhi_AlcO&pQvb`446?H>5Q`T{g!PNsM}VoYg%YZY=Zm_GW9{ zn)7g&YhW=?_}IPBXodj^Ai$2ne%`m2#7@}64-?M3Aya(1U6?>t3ktgXahq#jPk)pC z-2Qgl{Y{+X)@*om{B#SF0MK(pB_D z+`lgTc_m~}^Rhc^ziH{b{kk7F4-J6T{pG+DNOE`ejAIi1+F#!n zpLPcD-EP3^gNi=vWN;vic-dX{Z@g~H6$0Mv%M01uj)xN@eK{Tf6*f`|FKQKgCV1Gr zUj65Czidu7p(BU?8&=vEmm-;uEwi>Lw*$5}s$x%~%yBbzNLrWcH*~OGV0yh-upM1C zyQBE<&208(Sg61llKi}QsvQIVVudQfzxnF}%!HT&H2j?B+d_H|0M5A4> zId_+syfpTfL<94@!h$r)0dp z+{1;<=_#`(v)99I|Kckz`x^xM7gPyp4?8}!Id22px3|{4KW=X?Ou;>OzekrAw@_~x z;rRsp-%niV`u(pB%Y?9@IDFcQ&EVfQer?ZpkMQW%TZjfQMN!a`nOv_23tDdSviUyU z&P?ZbAHDbIi@W<(N(wjz8o2ksvCaA3$M!776;Z!;TPtpP;nGoXqgh77g6`KHXFTs; zUhL&^6HT_CpNEH!2XLoXOQ*35L0$K+dk^0IEre0^@w8_aw;z76r?@tF_Cbw0ubcZnI%S(Y_41xm@0PlbD%zZ+P!VW zH;8_T9r2QXT|~BAXyj;XV`Wt5w{eRXcKi%fi%uJw5N5K!x7~p|?Y8iU=m#~>3DM?t ze{i=uIAX|9o5bwTmxDAR0J2=(_uS@RyHDl@K%f4C2D#^dnA%|U*X~~~{CbCXoFANC zB8j5)Z1a=-ZjOkHwLk4%e!>SO0PFiH^I`bn+ZDTnCafpkHQT!kl?mT4LXqSr&mF5j5xGPvNYNbS2I z{!f&%{oCPcd4MPWGiv`7A{Q<}D*`R5O^R{2uN@~m>Jlo&aWP})*!Y16&{9Z`#}z%_ zfaD2<3%0Ov`N!>=q*xzVP2v8$3F(D_C;Pab_yc~yq>`iE1`~w^A?W_P7guke^zXBY z_c@aw%2$|roTW$`FDmBvrrV=YbRR3{BD({vyuodPi+;7lu#tp!Tb7&O`2bmb%{JYX zp_C2;y7vDU)L}~757hQNRwYl^Bi13DaKwk*LJSZT;m74a5bOGW2Ju4dGumo-P?}&_ zF}|_Pyq(|%%+ngN$h4A0E zSj4M)jBDjKpqOANPOOH`mRv{(7cSq(?L^(;`|X2cQlw^WIq5iwIF!Y8abZ0DZL6_d6m{|$dXOFwvz?|fSs(Z?L<{bM)AIEYZ{<7>HOoy2Y~gQ<8Z&XHmz$J9_$*FB zPFGscw{^YB9Ux(mpH;SO!R@{jWhrO>TWlwdWsXqs#CC7k^FPM_@~>}bx-Z{lrn_^3 z2hCyl^DnWaRmL4(_Q+epND>nxW#MRXJP=>XrW3^dL7X6^bZGIjKV0fn<^)2{@d)-Q zna|!oH=jbwi9ojpQ3c@OD_nxZZF8&V^H#uk%M2q`q0|b7-23~fD9;Wcaq(hzFO|)s zua_KPSYmGUGJNckVxPWQ_7fOe?iZq4P*7ErFFbNlCh=30zyY_H=o-(L5T-*yVB42p zbDMR$SOfdUKfhS?S4K8GH0~8rqN6Sm-%If19UrAwSL>S`?{g{-_jySl8f(2vQO&F231W3B&Gl((7_< zyvzme&8*8tL#L;w9m_m@d@vTUx4@sNa$;XSk>|>WzuXQH02>p^K0fa)Y+0C_&Vj2Kd@1L_N

f_J-LE29}+ z`ZhuZqrN}L)jlA1iTa7~r2WSo%5up~0zk#0d&!W*bC>F0^LxRBe~epSZm+NVsbdu0 zSgY;7?4DVWrS|&>@v}#q&vv|fem}`z7L^7j!Vjy2J9Z%ZptG33FSQ7o=91|b*zp~#|qu`u77(SG!796a3HzondU*v-q$Qkmc36aJ$No=?)9=^R>qUc^p zli?q(r#CiAD)`u23BdWc&xoafBQe;o0+@=7y;q#h7rrcNtq>(dTfE;ZE|zhXO1jTWB@~fm zGQ1bRDM7m(i@XBFk;Xs8%A!=3NYN})wQ?u_JhI)fwOk+q5j|4``+LE~r(N)%Nh?a* zsGK}jw7oHMRUBDX>+q)x8*_OBgG#ttJD9qI(EAnI7U3gzOdgm|d}{wgNInIlk3oL9 zT(M-Y1U2?eH1wE0B2xm%=gD&Mk?N)xD`{?ze-Ck+K>bXZygI2KZAg)4MEp~s?k~rk z0GCUB-6z>s(mG2FLHc0|E|;S!>vJW8#YDN^=N>zdBnI!7>VCWznYKkvGciR1#B;+I z=Cf=f1lTM_EvH?{TRJG)%LKQ3^(P6WbrS0#F7;BL@u*}?{!;o5^0sf);@bExtW-<^ z248y~TrsrOoTn~p4=SmPJD(qn>2=XsqA%yDHjJ0-P6dYVdvTlY3OVZDYdY$q?S9L)**8=KcHr z8F={drJUjw$(i!(WX!_`ik=CT-(O{=b4G&KWbAqP+h3o!QCm4BGHG>qL{ceIDnlEkCFt)@4{RTq4>_RhI(}8 ztCg&@F6GJuX1BkS>`-y=r~W>V`th>;@vfS$g2q@uYz4zIzHZ}}))^>2!Sq%)&6Eh~TV zNVdegd$rEo4D7lSmr}C&9E@7^j*3@2_Ns&h21gXj7xNedbX?=d%-ZB%Iy`Z13p?ZB zrGl3osv981Fi9tya^V+xeRsv^36PXaBM0fBUQ@C1`PXvocg$eC^G6!*JHE-PJs2FumyqLXYGL~Bbz6jW}> zcq(A%Jk_VM&mzXL{11*YPb5^Wp=Vo>yP>nGN=qY?DF7X-=plJ%@lV|&BfQpj3AZvO z#+n_V2&ak}tgliTa&h^z_gOQs)R-A6a>Tz^In8qm_6)l9yv7k$Kw06=Jz3c*h7h}Y zH+b4_IjlXsi$b+T5E{bv)z!4I3On^|-XPYO29eK&ZBJ?ISKJK#MXA158;+JM#Fc4S zwMAyxF1e-eQj%LZftfJtFT^r24aXJGE2zb9DO9C54C(|B~0a53K~ zPkp^t?U%Cf*yj{e2JN!0w#9@WoG{J>=QbDoi(DVPmC44}O6Ciu!H^9^8!Yf2{!uYH z7^Rn|;a+%XhW%A@gWr)Fe&tKPAY08z|0zQGrCgSR`BAaugnzGUM7R*dd#I$l?rd@c z9ZaU6$FXiNtGz@}%z=U4-VXcMfr_df1^khhbobk}htuj&V;5~73#q#AgK)2)Pky_5 zM5CDCIn_Uq=kh<5rJIbq`t78^pWSayD-7y&l;Ee2hjR7#x5p~3ov!>Rws_)(AkeR$5+e33ecPqN^URaEjk<}2$LmizE4dai-E|V#@S#5`L z-@@j{PGPYk9vwCzQGDkaGWlX903=J~_4HAky8qhP=*#d5uCOwg_Mvk6rZpIpOo=5p z)ZZhU3aI=sH*MvFxM)issL>@=OW*#U5-4%00VUw-L_Vq>iT;$-hc)=`c7GUh_UP!# z_Ug|~E1%z1ky|Z^NQs{IlnSa2DQIftL*P1K8Jv8*G_f|ib-yD3V!+|{H~Q&QUn?w6 zm5BH%zV&?Hv?eY)tr0?H7yPL?3c8Ej0ig$N%C}^Gh5V`3m5zL#o=>O*3Pe5!Re$ywu9SA4FYXLi6-)IWvG9idHccoi&ICmCu%4-esb?k!j7PQRG-E%;+j z&xpUr{nRVBfh?~HW~)_yl%su3`N4&D1s;GlE&t>a6w3ejnoXAepbjxb+L@TgmfBAF z3~;g{rfNSQ#voK3zA80ZO}`OPs#IqVEeL#-z@24{YI`^IOLs^CFE;>HI%xV48XuH* zm+=T4%c7=!_zso_+#|$qT6{fh*vuft1Ns;n=9@SF!L&yOk7F^<=;~nSf5bZ4XM8af z8H|EHyi$K_oM;iLKlIOC#nmtbTumL7CkBF8-~lN3@gGR@ty(CQpN&^`X`3Ij&2Q4r zdw+&7qLCCZ@nf)adk@WGDURBtfo}! zQAwIM@U_-OGdmd8S;K7L8y-;M)GVZ!kaPEDa7%D34o|zT-=bU>_@^AxE%PK2G>PCZ zZDDe0t85!3`D6%}w8@y~eD0DQkMVwO-*7A@f82dtaxO2|^jf9dfs<)=zN zFEgtG+mg%JaB|@uMiq*)1%U7S)E$3H*F7C)D8xgRUsI^52NdC~=2^)=6>hjKF{xjv z1jv$C2KA^7(;t&~tdOz6YNHP;X1x49m0`vpbTFXD-SSqfAE?W9U4A1njQXQVj%0mg z$3YG{z4)@fP2Jl?eq@VS5LWs!lWOHzIYY%nBQ20)DhN@z%1|UZD=thNhW{Gavq3kQ zBA6S#PV@+uPBQ2OjVetgZ^m^fSl4dC=p;ch=}||RN0hEtscu!bLo%`O)rpv@QC|WT z9#)I|e%uSaZh$`6bk#mW<)z8Kl7ZjI`Vzur7G;&I6dNOCb)wdH)yXZUtK7*0KesNu zz?F?4wY7{#(PqZqMp_lv-KRnd{t3iRn2D-p-LJ|Y=bh?m(EAi${h@u;5or2p=t>wj zGkbE`<8eE+a1|oUjIxd>@h|g`iuV~4nxi}K_vHA1=_tAQJ0Z!lD<@+cm;K)=OxAJv zP?it@r(p~b>t?A(oYUd%ig8KPn<68v2!6u2 z#;!t;XP098I5>9!W-Wt?Du3T~w}e>`gLlN6j?EU(0jHQ*7W*#S%8Xv*H*V6LYB*6l z(T|)73jE2L|0B7wFwA{G@9Ts)uprGLqHe)6q+0y{STvQXJuQpN#B&}dT;Hc{`rFL^ zeZnjrALfxxA!BL^$^S4sII6a4qfMOcf7@tY8)am#apXl|Stcf^>u_JuV0>Vp8tmoQ%sj51awwSk z?UEd(IxQ}$&zP|O)Ftxq^h^uYQwsuH$Eqi+su^MoI4VO;~EZn0(V2xJ+(PCG?E1MK20Kmg#7~x+w z%Ad{ud_lOG2$uadNb)5P=PI?`k5g2f)&^Nau%8;>qwF-)pyYx>{Z(BsDZrTkCsQ@% z3P2?OlgUoZWu)gIyQ`G;?qNPFX3Q=a?qxiSn@9^(FCKJCf4z*Jw^cX$um7u2czYZ! z#>A|3^t+&E3I+R{qfnJn;#;WYPi`xldPE=k)x1 zlc~*mtC*k<)xNV}z%Y}fhEKM?x4QD`q7yaN$dBh1z{rjFLoDRWA~YIYJUrjbgZ^tZ z%9W1zz3J#xHXU^3QTX?V43WWz*IoiBnnd+VS;R4!R0AFUbHV7F_#XY1uMsjQ zml*vFQoDMlqaja7&x-y`mQG2y%2HE2ioPHBs=ut~BIFX1-@@Het@l;CZQjK%r|Hie z&bV9rN>scQ9>ZJAR5|%i8QiR4J;~<3P)a5)hlRHFoWx3t75qW!_A2kY?(feg`xLgS zeNP?Wd8+yZP7nn8kvByB*;!eY45I{Qc0J=6lZn*bFN~hdcV)waM#e@c-UpTmHq~Vc zn#3+lKA$uK+IRim*hpV$3JRau9!&QfOnm>)qLrO6OhTbi(zNsyW&V^>T>a=(XOt7b z{G#v9NKNT=Q4%|mmaWQFLx^AZCKM*!k{A6GJAH?LJ_-s`bc@nW1BqDFRHYQemo<|d z>&sYB7ilIt6kx}?%G^zrNT*~n5mGeP(rTCyEHUc8d`Gb~^vC}*45i2FA1llCMc+pA zn1JaWv|;4;UG?3Rc8|J9u3@aR6DjwvVaiY3C{cDDVUt*{bdh+^)9Ki|5+ao z87f=Ne^Bi7H=hx!Y8lXeEmpBKETPJHEk!kGXW;;O2Cb%*?!Luczpu1YZvQRKGeNvLS#_V4yoBC zz$}s7sqN*Q_=049`ZN_Vvh$2WxRP&9DKCc-0Yf+}dol8aa~C+3;n`>93ue(7kCmu~ z{+?8kRW$3gjK?)pz!^v(Y&9cF!_*&8 zSXQQI^Jqk`^9$LM1i@Sk6=zAa0#C37+Y!Hwc1^w z8f9`T$?u;%Ki}WCryBK{tIGUgh63_5M4J6XI!g-Fw^8on;)jGn<~m+)8pzbS4E+vH&p)JZ)4;U&J=fe3lYG}C+L zF)iqzY7sKDpU(RJ-+lIgr01U>nst?44ua1<*H~B?Cp^2CsNN!AxcvC5y>1P2l!|68 z!V~2*6WmNQ?>VqV5$S-YPCsT<$0nXt{9-ICD7N!h(-FvXP^-{Anw@(=Zm4&a`OBj4dnMmY`P95UsFW=C}R$ z#AsD1JoKkd?sin}{7(kWw@v>PD|c$_jU@%u7t}hqw4MBqCoAE$2hAlCz?YO$fKi}{ zzr+I-PcwuX<=gM(I%<$MMWHa8(aeRK`0~0rjNti%Au??b0UX)@9Lhiw$;|R*>Chs9 zRgV0$n-n6eYwwMfHhDQIq3@R-B;(lH0D4bCs^*sc!!EeZl^Eq8Pb*#u$Z3OuxV63`V z92~Fy!=YF<%c(0@&8Lx!oHw&mr-GHyV$5$8dv$)3?s#P_#aNK-v z9m|>8<*YHoWrlAjqETrR)CvgGEe*90j@^e zx_Gq<4iIg!fd_X+`^**+S5eZ=9|cou!`;GoU|!S4Umt{!`b9`jyM7thU40Un1uN0j zx;cSi{@Xdgm8CsOjuyJ6SE`n-i7PG_cfl7pDFdCrXY*4ifiZ-g4yUIs8Vx>P|1*^4 z&vTB_z2Bx{qD@>PssFvGkxg60L(>gX%AE#+xT_sYF52-|0X(jC>tre}@|devRE}3t zcHBdp3fAxfy{Rdz4oj`XL?Dl3qZ~y2-^>s8xn>ZI@_#Uwo)yETqKtzJPs}TTPf1p> zv7{C|Y0|T}uXx0R0;A|f**??yz15A3jXIsBa1$~$2w{&$;k1>E9-G0CKpn5jYl2b>+Du$}G|EOonHSgTGq#Ms^ZsK#!4zWS%9&}Gl#U2(Orc}B`C zhQy*Ny1@BcbP*&fUx!XJoY9n>io474*pk!@TJ4;70a@-D7(x^o)0|9t(1gQ z#i%BTC8$bI_B8c3RuJY8IFXQeg+r6--C^*#gsnDp&;J5lM9`qlM*qd3ZCKS22qT~) zb@zUgoHSBE`l=H%%++j+yX{>F7urkri6b|M&9W)GQLaIug4_?8?1KjaO2w%q1~*^W z{_6Zy9r14ksmhY?Xway%;^!_IAavT>Z*b3{Q0>H@R;u!4=(ud}c^CJ~Q|+tKa(D&{yW9isWlgB2p8AI%>~y*+$x6%~GfC^K8->mE& z&7%TadXzBqH{WrI!QpBkEF@6}M80@VsXa!iE?bplTX@{4sXKDqEPt1}&oa|{SRa{JemAVVT?m$Gz3&K@)Q0WDyK^w;^wbCH+q>+XHba} zhh8}PVZ$zhD*5Z@3`E}dRZ{aPA(rkkJH6GF zKi@^Q$;Qq8V-N{JWw`CiyhzP==ZLp5IT8`6TBpjbXy(LTB}YhLiM>k8jP<%xshhSw zK#g84dZ9*yFf#;Yq$Oxpdb>ZW4U?SgRl5u|HfeX+(r^Y|t-av!?}{c(rML_3i&2#w z;Tk{Da}2l6Yaxl*JhnAsO-;^IGh@g%rmAO=!h`%PRaa!L(dNge)8E{dqmB8<+C}Tw zJF?55w;Yv-^m{zRZ>Qt9KyBuhE7fBHBEa?~&o9Mi9N5W{?$|_UASY#agS*Z_!=f>I z5?^TXHXW@gL|Zi$+sOb!J4SlzVaR8z_G|@`Kz@7hDvJFn4j3=Nv<>a~U^)1IO0h=K z!&YsyxGn$Lo1^St2-QY(`{ipM4`=yu$vg!8HWOi{wqs;97=_v)oGtfr4nx)`nxOLF zrS6vYIu2Rc=y}Gbj{ur!m4zUSlyv%Z{>XLXThq+AO|8;UY}0+UTxNEx%124VivEub z`*EDd5U%VdZdZtv8_(G{gG93G0Xo3^PqAX=s}_qB!}R|<{=GOy`z$!}7>Uor#j@FM zDHY)yP)un z?oYRp2avU1$#FCPdA8dL52(wGxUG`2qU(e{mNrGmur8iRdTJS*(huUc=sN*d@+;M= z^hH-wrl9=G9E-xyg6X_4jx5LHR6JoMe{U9U(-+Fgn%`jG?Wx16I{er7!b8Eiu1S%o zwBtjdH(Ag?UIX`izdCkQC>l5~5vd~Yr&Xj%kj03#Sa~{fTCI`&#Trl@wnf)-zN671 z{IlY_5V$UBtj`*bUjE@>XoR%R1ZRC~@2Lq|Iral+E9{VkbT-1h4lB51jY2lo<<^&* zcVsv|t^dmNy8)UUrG7buc9n=ow%Q>vt;1>Fh+{vp@=@r_&lQ}R@BHk`P`Oe;@2?o2 zo|rdwIgRVAKld+^F|InV@*h6a#;YbqL#6=yPrjf3t&*s?v`&{!fyRk!HL+E!YFyYv z#+8-!wwKwo+FYesT6$1K&g$8cf{=$!AAJ$^=hB*@2tHPmV_6uB{CV?B?2|&EuDQ-W zjdHNA456D9#^YMKs}`6quJ@L>U14B;9S~VRwao2J4v$Cg{x1F(9;}GvFl={p303q( zIfREWv9gT<)+8CLtYfje%o55}H;aH)veq51%DLVe!z{-QX7J8;4gFMAbEp^1J|vJk zzzmbUZXqKgFAS>GxuKm!*WHuizgO-uLsLH1iI(?k-3;ZP7XvNpGr9Uz9Cdix{i-C` z#~&mtj~Kr*z&s)U9*@p?1D}jTB!-wPQKKzY={bX1iD4$Gc(U2*M>Wa?+XHeb9a8PX zLhCRV1o8%5ylBE`#2ncyBx6t`7+h#TY;&IwI=&HS~{VY1kOW8L+Rr?0`T zU`lZ*P-K{6XNLeHE1lkI;v8Ku$7I!o*An%gh_Ii4d>3frsuZo+wkevu)zv z-te%RkNDBQ|K0wRVDp|9YQ5SOu2?V7@L}k^EMkPr8)_W@4^%n0(H|yx>7_Ab>i~Of z-*t;J#NE=&N^s`S{q6zLR3#E+41~Zq1B7{pV}a*V^d(B=ABKP`p$dpSq)5ZE3{NJvjk5P zUzqDw?;w_DE#u?}enW?m;{;z#L#G2hGIzRYB5%262oegLc=5$v9a8?S?qgANc&tY= zjyRP+7TjBZxvb!^M&~|iY_6la{wTFpuj-y7$5N%01~&s^?XGL=rVa87#VmolZ`V{m zP5xcSK{%?H=M@}_kfYWjX_hI&j|zAS={dpNv73aA7<2D+CQA7xIFPIw+U8Y)DhI?d z`BQyEPGGyP7eXh2KRvB?-R2hVvfJuZyh)?d>*n7Y5fHkf z5HId99OGj>LEc=T-d*qhOQp(Vbss=6cc;Dm)l{bX# zg(Z`$mF1}K;1e<6``%dMctC62ESZU z&ITfRuJy==4e7+J)d^<3GN@^EJAZkBMfy!(s&O+?Ok{E>!hLl`0NW+Qie3#-S>J=B zHGVrUbAp9ak(tT#B9~RiURKwM(vRPRXVkT7_lv}fcFn(Fw9Ys#GgLv?F8WYn; zi36=8wqiqL8f$FWLuSvsRG6kVlH?lyF_+U!Gp;=I(iBuN2pBYT{O3n(E_PYuT1%k*hADMwo`k0E;*BkK@-=abf4nq1&!)j3d?vlk+mM%uD*u$bLEcwb$QpwEfFe0h<=~LInsEf+9*OY1$ zUj<}*WkxJ{kYOH9{ZA$`l%L_duj|Dr?z#NVYze?Y2H@B5uYYX-$3+A=zck&ATu|Kp{YY>5fKLFfw; zvO>FVUS>iK6U7?z|BTk>reZ0YvFI&dX<%?7sz*%0YMOcT{v5YqKHLt(c%ovs-^}bM zfi!nXcacGV^pMHJnF&JXUW~(fOSgs&djFZg8-JH_%^O8#5{IZ>Q8I3J@vgUb0W(i_ zI-k?U$E$u*B?6W4`f^7oW^gyu)+3v>^`4B91%l4F6b>uXGwZ1pujFKVXZHrS!V_7FhZRM~y${23u~AzyOE0|L77LR}=ii67PP@zec`6sXyJ49i))hxF)CQSJ#k(w}*2 zh_cM>Iw7)hIP_a>a`K!e1<`r`(s(s|UOrXC&+i1$i9-HL%QFj6iCFg0BFg|KzP6F5 zMPbPSwnEn&xo8mN@sf((xEmc6y{A_I90Ep4p9kG%q)0NE6%>OB7nffWcg5naZ_co+ z;W<032_r;stjAJka$%YTGni^6hJkv2)jD=V_`R)ifVEf;+D5$Raah^&tdYY^Axu9K;YxK0`?6a@F8{Rks`FIt9cbhVYXG&Z7Rj!h zTP5PqD=>Rek2Q>-2%KJJtj%AjJb=wX?|HZBgfF*3k=M}5gS^G6XQYb_VCM5S%?|B( z273jk0tW9iYp-0(%A?D+wN3y&9zysM7S>A7*VD^Z|C{I4YbTU2e9$(?1!`wHX@I<`AM)Y_^~uKsKBzmViyI=iK}c-ElN)CQ-=+QSWZ$j3cshKOPx ze4p_1;DsKK8l=7Urbzq&#ke+cUe^PlP{QLqaon%LsxR{|1i@{%`1tS!Kb_ z|1_jqT&(#$ug>X7l@`a_jhGGl8-^fDWN&8V3K&jlPq-+YIHSVcVy=yizk$Q0XGqlD zdgII%DN$pgpP4e4<|WDBqh~6?N@f0>g72zR4ymX%LrHQ0kU8gV5Io$|yjnvyZagmv zETnmtUC_K^t>|PWn2YzHL$|+z?7+;=f5yKe`Mmb`>24Ap!i5{n29g11Y0VxWwyl~V zFq*doFjVy>NDk?ziYZ(!Ue1AbrsQW&#E>c=FUrL)KaoM*zyxo z6OGD1JL;8`)9vsY*j#2bbMHk1z3UV?tK!4>DvmRB9*U`1_kS!6%d_3SM`_ZfWv!v4->L$RUO6z=?a zf!2!VaMLDiSf$6+a^tp&0_*L-V7&OI$oSDPcI7ro%k@*g)265mB^q(ud0w7M+Q|X}B`v!%bL8EIo znc=qOVI=$*J(}~lMGO2>mCFY#pd|e7M6urX*sCTUopYb-;ZC8W&O7WyZpsDc%%Z6W z@2YMjUf6F6e$ebsG%qB>EAAYTn0L34!Fm-9h`ZAoAr^8gqo~T(Q>^TR7?;OK_WxZ# zS;p_X{MtaL(+-&JgPyx40}SCwsas&9+5#JIKJ=IVwq_0!z zH)|BLH8Q)F)$Kt+p49ZWwAfpVlqj748_t&o7%dB{l@M7a=qkm^E+r4uYWiw8xR`9r zst3%xj*ogwF_Cv~|8jbQB}^t)`=Z?SKi`4a&dTrPU+XPH_99`@=S!z+dSlMnylZO`T-Zlmtpwm0;f2 zIWHEStB7Y|#mW;cs7Elc8cp5maG%2I%Plej(Ax#-XRX$+r_r0gK+cDi&uMH7%g)#I z00E?#k$~#u&r4(3|GY!xLSAiZ&bHRrU|YohD1i8+@u%;5e*gY0`1^MuozQ87v`l3zA)jm;vCtjk&;4p`vjw;#S z)I5!?HHuNiDU0a*Q62HVJpa5#;bWw}=rgN7UXST88!3PJ+)bBO*mo8k}W`JNUt!{+6WiPUFl)1znNs8{(3 zSP*0KxcV7jpeWb4QGmlye|p6~v(YmFLdMa?{yhsa_FtQ>eBYnM=lx&()LG4nT^!ZB z%fg0*h#lH4{m#Rd1~Z09<$e@pA>KjK1Q3ceW>5(WNHAsI1UM20Px0&uTrrv+Sr!9- z%W5(o4!~f_4uA}U>Y5HaI7hKPJUb=TgT21=$LA-=*v+Jk10QfRnNS;TG8matzo;}8 zrP!_))Ihj@k-g^q_^bI3tf0!!5oyI*OI$omgjz|YZ}~}*JG12xzYZ8p&|Iu{oqSlY z{^v?c$Tx4JXri#A5RZ3Kxx_=&ue@z+@rGV?r1Dt)O)u6s??>#(>+MYyY7|I&Ti0|H zdp~_Tp1PHk{3k^7q(LR$BB5FnQD_*5YRx;LgiUeDWN+l>vi>n3M2P`u>Oq0L;p!bYv+_$3I@8?XG`X+kL;u#4p&8RWXvp zzslxAcG2EQ)Z+d>Z!jgzL-S`-voce1i5^7!LHE1}a!hR%1T$ijopq?*RtaZH<;c~e zP>L=}U|7AU%)h+gil*aB2G4U$+AN8@X^d%_uRikoyKzc+65VQ;duO4h%BrS%{Wu_f z{2K!0Kjz--9W`P*59yB4336pye^@)c>OZZpgReoe zI7*pZaSMpDMY33_{{zT2D`jurY3Vk@qBvNBs6?4@k2gEQu~xy*W(n-v!! zOC7tP3-$jq$-dn`Ycr)ExKRoun}s{^PeDN9PIlan7V5|Wbp;Y#E4q$fnvW{yMD=DN z-_d|fGiG)Z|8iYutNfVk-A>2!YN=2h6Q7LI4nw4nQJgKA% zNpqA)Y5c#}{q@}E)9Ul@xvz7&ecpSmwbxpE@9+7Zb1yn~J*wxHCHQ*(_j#k^e@;2) z>>)$T@IOO_o^*EqlLq(Sg8whgbBhu;_PzHm{^zf+iz@uz|MaQxAM$_S3k%}=-(g`v z=xYCG@YjC}3wG!KVy6Fo6&CD+DEnW){}mRrH>?Y+a((I8g@Ett7LZ;x_n~8 z(M7!}=7j~L`TtVII{v4yU=07wbzA|OYdswdR0K+=Lc*s{_!GVCB^Wm_HVijw294yyyA}r^3DlDIm^NigEu-w0qu-unR zVR_ywjPC?k?#p$soZlqaUl*jmDl9CRVdC9ueCHUuM_~CJy#UMm+bUSj z`7;;~`6?{<+^{cTc^`ZQ%kkD3-|t|#e?J(z4Tf!m<#{(5_A@Nc@hdD}6H5^cEMJ8M zrD3^OWng*UZ2)ew9z14z=Nr3)u$eN#_j`Hz9z1L zb5FO1PdK z>julcKN6O6=>y{-Uxfw70&=~_8FoA@=iJ}eood)=Fdp(%Sa2pF*E`U#vtYTl^I$yW ztFT}wAg|?cSgvgZEXNyd>mwKs`6?_}0m%F1Ggywd7MAz; z23WqY+k(P!yfUzCR~?q))r93U*btWc(g>D&-wZ}N@>N*S9L7Vw3JdlEON3d~sF*IhTI0ysu7&_)@#z8Vk9bzBR}x!hpbBv|hA&4x`j zYzi#zwOOz{@BJ_y@>N*yps{-hmizLg@m&bZ^}b;2UWVm!^e!y#gB7s6uRb+)>tK0} zeuVLmufl@OfZWqxU^(ZQG}~}nZS+xXTqcJ+;217o+VVGRvyWLRTZuD7YNYi8K)hV5Zk zbHnyDtc77MVL88jj9n|k_BE`vVQpY}AG9@g`y1BIu=Xb20miPQVV#U`XISn*7vtO2 zutQ9|ZpQ8~!w!e#dAl3CqYUc-;~`$~U&F^2Uu>{wV{qvMTTKUmKBB;$LEu{#Zx zuO$Ou`Idu-un&u-upOCSFBjSIMw# zjc*lWR~44uSL_7KJ*WfAb?gGmbu@(KIvN?)*svys?Pge0SkA9GEcd>J@ofpqIqze9 z_l5D0ufl?MfLvR9!#cq79Gzgfe_det{cm^U+tc_S1IxMeHSAbe?$rs#?nJ{*g7J{A z!h+KQxzA@BHV~HQJqMO^KG*mTHg-cyybF!pFcWWt@f~gK3QfE*Fdp(%STGKdYa0*C zeYx7O39#IQi6-6+u)K#R8Q+@?n{3z=Sl)lrjNNotp7%~;H^Z>IV7Uh~4Vz_r?=|c` z!|pfnX2WvMbB*sjSpLl8DHHE$y_0}-)b~3CMEZ0^KmU~qnmgi_->~=Nrni$_^hBY_7dl|MbEa%tS z*zIrZ+QIUE=>W^MbuxCH4LcZ?bLnDOSL1sqEYEwmu{+Yl>u%UlhV?MHDd`~uZr^0ftPBZLuSe|zfEU(+yhMjBTod?S~4~6A>%uyy@fnj4{`OI7j z%X3_A*m%RPf#upJ!txw97`vMcyV9#_k@& zW*K&`iFZFN=l38iuhBz>J!;q!u-w1-hCON6Q!pO#Rao#GAn)Z`pO|=SjPK`$eF@7wT?@;-T4&gICf<6( zHo)>6Kbv@)4f_R_d-c1q`vaDH6(2@qRuYzLD`i+|!?uLwT*?@`vamdFIb&DeunMp| z$F|0uBN~XzV&0b`UJ**AkYdBmUEc|%l(^de5V+8i($7KHWilVoo?)IH|!4MdlxL{JkzjQ zhTUh_1F+ot*@n$A>_Nlk8a5A>_tm4u?lD;I)qGg4_bFqy(8OB=%dZtL!E!FI!t!g+ zTZX*{%ej1D;(Y|mJ^k3Q?l~Cx2Lh| zW!TZiw+}4ma;)(^&iM8-b|;v4CmY}XhMi{Ooo?6}#&>|R8))p#GInPhc8+1^ns|d@ zdB0p}e1{n}+^`XbU1ZouSl$Ptja`AU8w1Nf>lkP3u7Kseah0*V29~d<6AYVZ;$3gp zO~!W;jE8&`7ECvGw;OhcVRsrf!?3#yyW6mtu)H3#47(4Odpg_1n*+;h^oa3&9G2&u zZ`f0?yhaNQd)lytCf+l~?pb5^ykRdIw%D*&U^(Yk4SNlid-^6U{~4^0V7aHC!1A97 z`3{!n{ob(khW%jJ23TIB9}WA-`2Gyb*O%Xo-5;=gKU9(*yytmK!?Il&SgyCM@hxXq z1;Z*CRvDJ(sA5=E<6F(xRfpv|c7)|#)iHKE8@ooZyoYv!<(!+sa!+?RzI(!QetW}m zer;fRj{RVHpB`Z1b%y0!4mR<+7}nLqJIus8!uTF(?2a;aJq+ti}AhHu&J>8na2!R zKKrv^xsLl``RvbwDyxz_4Y;_annTHf*_JD`5G2tTuL^8oMuGc|E=|>>I<@!gBA| z!E*oB!*YH<8oQriIhS9I-S39QA9&<;MZO9P%EI!z6=3^??7A4%)%bRU<=PH6zDK}vuX-5YUdH!m!+IOvV_yy1IbUS#MjE@(h7}s$F@}vb>|(<%f#uh&D`9y( zu7>40CKz_Di8l$B*W+g6JH@bDVL6v+#_o1lp5qQE z8}$k=^s*mC2$5|-;&ZS2;-a<9HL>|0pgr)yz( zj_-}#dc%Hz<(_Udc0a;$ew$(WeL#FDmG5b`gyo#Kg5{jc!g4OrQiv1#d+ zupWl>G_03lM;q4Lus((zV_08UuJ<@&cf4W!3_HQF6Ae4bu#;gqm;T1?RAYC#VP_iO z0fr4UY>;7R8Fsc|=NNV_EbqS|uw3u?#`gkaH_X@#H+B~pyOD;CGHkSA1+e^@aZy9}0laJjL&!muk18*kWEhFxvgHHJ+vY@%V;8g?Bl*LH)kyV2N9GIlo`HrcQ# zhTUS=t%gm7<(#J(yXmmJpYJqwGfcd@jonPc?lEkZVfVsv&i5O;2Mn8S*c`(iG;A&` z&-;*Jj~MotVNbyFy~=#Uo;2(!!xk9!v|$SkTLjCsJ!|ZqGwgZe`=YUX$*{$Sy=>Sk zu)P0Xhvl5#G`?>c_O^+)#Mr%O*!zZkVAxVv&TpBq`v{h=u`7+;Did$DVQY-`_;tz-PrvB%RMN`UmTcoDGkf_-erwl zIaofw<&9lMV^_(rZ4IkzSQW#n8n&Hb)eNg{*!Hm8ml}rcWLPc3>cDcZ>Kazhu=B;&U0^wv zZm^uo5wJXOcUYeHD8qUf))SWJ=xyx!!1BC(joq<^9cS3_usrXH#_lA;PB!cm!}=R` zs$r+Wa&2cAHo*7}G;EOZJ=?HzjqiDe4K{3uiFdwX7aBI)u=p2A@*Wxq%WG6%e8(8y zvBvHaV|S@xm%;M9R~Wl14I2;3Yk9R{6O8Xf!>%>#I#|x{Mp&MAl3|lgyeWp=Vtl6> zHqH1>H|%!fd#ACRVc1>9ccx*pVEMW>+r*n=*n@`6HEbR%_xWL1UXRC&@8htX-vVRz zv|$TjdERG?-LuB-d1Lp2v3tqbEjH|B!(M^q*N-<~Ip?>G@7spGV|#{*jmHZ8TOrF z-y60bmiw~7*ljfIN5eK5_LE^h8@Ac7Utl@E-;CYwhW%mKpC;ZG{E}~eZ7gY6DZ@&` z^0jGeSkAeu@!iJwRxoxI4XXspb5u5VRSc_Y*mj0hgXL>Z4P&>XVLQQcezlBUZDUv0 z*wr(3I~&%(`0i@#8X3FBhBYy4H^Z75*37WoVYx5O4QpY1Tf*{Q+sD|oGVxj)wx419 zn|SREYj0Qw6R)GO>jcYn9AxYcHmr+bT@5<~miyez*d1or;jo-vcf)!b-(JS9k736e z*3Yn$VENi~vSFtf-_wlU>9E|FGmYH<6Ynfoz8;)sdI`17aMj7EcgC0V>iyQ%V9ac@y6~dSf1k=V>iLrU2E*FGj=z? zaxRl#d2ifee5b*!9Ng`_ijm8&ZRUg`)+M~%NSPH zux$)02g~zTFszdC-PW+m#jlem^fq>Vj9p)2 zcPuRT?*v%R`DEjJiedc?JJrNH1D5L;VC>E^@y>?j`=KEw-ucG&0>dseY?xugVL9iE zjNM4XMj1BRumV_~V~nvIYuLqxU1Hd!usp{&V|Te>R~UAsVdG)>=d0HmyX#2ZOihqPW-{uwxi1ZkT_eL98`cDt^J{ACni;mcVS5b2Yh%|2mg{Y6?DjXTo$>8pSV!aA$*=ulITu>3mM)z}?k*rA4X zGwd+K4u|D=yTfw-dKlKru->qI9qMD)F^2Uu@s2aBA1v>KlTEx+4C@ce^_~vP^`2?i zKof6}VP_fNbBx`&hMi~FV8ezOHq@~54Z8rA>m6q7h8s4*u!{^EY1k;kMjKWD%WE{& z*j)_E`(T`5G)?0&-@Fl;s~@3pzcZk}Ne!E(-z81@(}&oSSy z1;%$FEMHF-8TO1}&l>g|Ea&`!v3n7gbAH+QzG~R(u)OAP!*U((7~dtv?mc7ofw5a^ z*oTHKGx0t)cFPT0Vc1F&?-Rp5HNI<%-RH*c3&XxNzF!-=Zw&j^u(gJ*GweIVzBg<= zEbpO>u)O~^8Q-6b?=QypH{<)e@hwsLpYxW4ocvnugVa<@K#&?CKg;&#?L?-Y&4buNoS=#wK19!*(;Q zsfo8cEPqD3m$BQ&#A{{PzQ(tWvD*)p>uqOzI~dl{#Oq|(frfQ9>>$GqhUGQyYS^L1 zw;L?KRvZb-_4Y9FdYO1f8`j&fJ|{WgJAjEbe3Uf z8{cz{-Fb!$HoilR-T8)HV0?!eyWz0hzmdjnl!;eh*cjtG7MAC@)Yx5S*f_&3H|z?- zt~6}CVOPQO`MAc|O)zYtVb{X)zPb^X=a^*JWD{=+Ecb7!@ttOTryILF44Yxt-G<#` z*uAiP7Va~44;Z`Iu-vP8#_l1*9){(fK5Fb9GwgB0o-k~_VNV+Nlwk{CIlqO5J!5>I zHS9UVo;U0T!(KG(CBqgQ_OfBGz;bP`8TN+pebcbF413$KcMN+MmUDT}*u8Jq2Zk** z>_fwr8TOH39~-vZuoZ@_gyq^kF?OpB`_!;ChJ9w(=de8Qm&WcZWA}}*`_{0vhOIN~ zJHx(*<@|myb{h=aXxNX2ZGz=Fel~WSjoq)t?l)Mzj{j-wN>ush^>hnZ&ZU%LTf*`j zWnlRlTOO9L*%b_{Xjmm!o}-Gft7_PGuv|xVW4FCwI~d;`jonU$)ikV@VYLmbV_03o z>KRramh0Wc*flV0SL55r*flnGyBXHZ`0j4l9)>lC_o#(GVEknuC2eZJJqn$3_IPhGYmV^ zumP}~%OF_ZL+2Pa*u)!R*icxW;{wBm8QjNMFQcMmN0?^VNIGwgN4-Z1P|Mi_81^14pSKTT zc`ZLOY`I}8Vfh|?6)exO#@Kyk*yo0Q0n7P)W$eB->>J~|*4V8x>^tMT-q`(M*apKk z8up`Mn+*HOu%8XvY}hY`{c6~6u-xZAjNPAxm8km9eXxaLB@HVD%h$=RU^$mE#<#3t z+Zf;S#;$^46%DIo*tUjMHmnLP*H+EgRX1#VSnfd$W4EJWI~i8fuv&)IHmr_ebzwQb z`o?Z&W7oj2hQ_y%VU1zAr%jDrGg$r^WD8i%c`xI;x3Ozw?DmD_T=p||Z4KMsuy%&E zH>`tU2N>4Tuug^@Xjo^%4l?XuSnk0gh8<>n4~OOTIMUd4H|!|GdKlKzuwI58ZCGzu z&hHpletkRM`1UjG1j9~*_2+I}MiSIK$YTY1jbcJIJuJjqf?e?mS~R z*svjn4K?h1!!9uFLRjv>aKkP#z9S7AWqb>aU7=xP3>$0M#fDvC*rl*s+c;x)xnWlr zcBNtC4ZF&)t6@2p3C3=sVb>aVor!mYvAfZ*n+%&|*v*DbhUGeLF?P2aHr24(44Y=y zbi-~p><+{3gyo#?g5~S;J;v@{6YoC5?l;=PKH0&kA78~}mVXqkWDlD(Z>&EU4!`_7DHG0Ry zTVj0QGwglCJ}_)4Ea$w;*nMQ!$A&F8Y=vPfVR_z9jNNKj?#pM!?sLPwfaN*9GIn1Z z_KjiR8nzae=lIUpeQ)f3Fm@XZ+i2L2hHWzJC&PX=Y_nm%81}1Szrk`3{xEib8dhSv zfBKe$<zuC1@JJJzt{3_ISiey}{piN@|E zV|NNH*LE5#@1X%E-ax|!8FrRoXTx&N=Nh~73>$3N5W|KVc0Mf6d!ex#X4r7UMwoab zjom22MjKXOSfOEK3>yo}wOwNDE;Z~j!^RnQxnWlrcBNtC4ZF&)s|~xxunC4ugykMw zXY8(r?lf$MVRspJw_!64yT`Cu zhTUt}eTLl+%j-Ma*v&EQLBr-6HqWq!413tHM+|$^u*VF0+^{DMn{U{Yu-umghAlL{ zi;Uf~#_l=8o;U0T!(KG(CBqiOaviT2_L}j1-LN+dd(*JD413$KcMN;iuqB4QXW09O zePGy9!#*@@nPDFp_AxB?d4;iCY1k^mJ~3>yVV@ed#<0%}``oZE4ExfsuMGRzux||e z*08mPtuyR9!@h^*d#(+}Zlhs88n(%>pI~`?HygWO4Exov-wgZRus;m@)36fN{`r~0 z7OSwK9bmb(os4fy!)lp$ zb&OqI!|K8Eyt|lqyBgnyhBY#*v0+VMdERD*H8;L{8rA}q_tjp;Zf{tgcVA=I+ORf; z?Ppk9!}d3j2BObu@OJ3_H-U&W0Ui*ujQ%F{~>r|DM9(u$=Re#<#m+M;X?` z#Or13jy9~fVSNlc#<0GI9c$Qeuv~9HV|RjKC&F_7PKD+7E@v9w0fr4UY>;7R8Fsc| z=NNV_EY~~O*bOmksA1=uco!PGVTKJiY=mJK88#A@>lkh93SjxVG1k~!Y}h4+U2526 zhK)1qa#+s!N@F+Pu&WHa+OTU3n_$>P!>%>#I>WAqZnj}_40{lkbDjswb39^vA2sYT z<2&EjJ!#lehAlAcX~Py8w#cw&413nF=U{p7zGUnc8}_ncuNd|!EcgC(WA}!!d&{tQ zjPJXKEivpp6Ym39-UrK!??;AxY~rmjb}J2AW!NW%tv2ja!`2w~nPHzB_Jv_z8upc8 zUmNxfEU)=mW4F$*?~LzyWA}q$8w}fM*pG&7GVCYAel~0~EZ6(1vHQ)i-wpf2us;ne zQT?CumW2J|XW+1W&bKypWeh88*fxfhGpxK}6=3}nZS+ps!@)rIB0>}>3IF{}YB=hx8KH8QNRVNDF%&9J71H8X5?!}c(& zxnX-6*21ushV2E*ec8v@wK8_Cja?hV_A{)liPz59wKuGTVFwu25teH^(Aaf0>>$Gq zHu1U|yF&~+)Ua-b9cI|!u)OcP!*Xps4C`g$9c@@|!}=I@jA4BZJJzt{3_ISieukX@ z%k`dQ>`pf96vO%(cB)~g8Fsp1XBc)SEayDX*bOr5EW^$=@y<1N=NUHGupuVi`Nr-7 z!!9&zn29&Su#v`hlwqR{D=@6kurY>>HSA);E-~ypu zmiO8OV>i*TYmM*qhTUk`B*P}d^8M8m!)`I`R>P(mcAH_-44ZD)?S|c9*qw&WfaPoP zOk;PCVY3Xo*RcCwc`Y9>cC!teV|?ctyLpB^WZ1)oJ!06Su-um?jNN?0o;2(!!xk9! zv|$SkTLjDZ_s<)<7ht)L#m4Ss!(K7IuN%8J413eCw+wsRuy+i5*RUn9yszFjb{`nF z)UXc?TL#PhTW;)D7`D=|Rfc_H*lNQ*h2?rbGwch)zB2K?HtZY2zBTdI8N2Tc``-Be zVAw{(Hko)o8N1EK?ia&;HS9N7Uf(~BU5V}g`5Lf=VI^TX-j>F0E5o)ntc+o04co@B za)y_#;%27Enzvoy^Y;IhP5)jt&Lq9!}c?*tzr8c z*3Pi@u-w0n#;%iL2O8EHmiuzBvFl=3SHlj0<@^pac8A0Ayhj?l?uH#@SP#Q`8rBPz z=k0Cm`WSYMVSNod7M6R^50>XW(Xf+Eyi*M8Z`i3O-s#5f3}ZLI*bOvnkYQ&TcD7;X z7su!-g3)+^`XbU1ZouSl$Pt4J$OhV+d@nKVGUGeW zu*(g*!muk18xPC-{u*O9!LW&jT?@;v=QkR=n+%&|*v*DbHf)Mvw-|P-VN+pweW$^4 zU+yrzcN#Xsu)7Sq8tuSmQEU)EiWA~|HYhXF&FOA(-usp}N#%`@) z>kRwOufL0v8!cR zZNusqR@bn4uw3uX#%>qG8W^^#VGRvy1k3X_F?PEd*3__Ou)OBY4Qpw9_k!hI_Az#? z4BOYR)`qo#`tU2N>27mank~8M}jFIlr#P?hs?w&Db4g*x| zme;bUvFl~x^@inok2Uf7nRq9dcqbXVlMOq?u>L0AX~yn!!_F}5Ov45kHqfv^hMi^D z*|6Nd!NzU~EU(c8#_mE`-dDqo-3Y@jGHj$_qYN8uSb<@MhK(_7tYH@$c8Ot^!g3GB z8N163yTY(54I6LRRfb({*foYtfaRR8HSBuhdxK#&8g`RmlMK7ru*tBT-z~=OR%3UY zv72V>ZZ~#!!1BJo%h=s**i6IjF>ID$_ZoH|Eax)Y*v&EQLBr-6HqWq!413tHM+|$^ zu*VF0+^{DMn{U{YhCKz#`{ij^?%y-U_gTZ9GwgZ8UNG!M!(M{r{9ZP8uNd~KVXqnX zIxNrerm=eqmizLqv0Gx;dxpIa%kwTZb{`tH%&?CP``EDMhOIDcB`nvw8kTcjW7y{= z-WP^_X?(vncHbEGtzm0nIp^<<-Fm}*Fl>Wi8x8x>uuX>jWZ2J!Z8q!|!+wS3_4wV` z{bAUjhLxyc-v7e#9Hk7~()ey=*w(PTM%x&>a)y;RtO6|OSIMx-#a|NCWbXNzRe8V-LO3j zYi`({hP5!PB`nvww_&Y}@4kk$Hmr?d`x(~Ou>B2d2g^BkFm?wRyH3XLK*Ks4c94nJ z#jrz+@1cfuGrotz@_z1a?0T4ZJq_z+*wKdd{s${jq7N*8Mc@C^`}mnhF z|Gb9`9)B+QKYunjc>LST|M~ZqgKz!UlW#@54Bz7!UolqpuV?7s+x+zm9b@JGdWMd% z@_#);$5@5Go}pu`;$P3uF;?lXXXqH?pB*oHhK@1*>Aj+7=osUlUMPBojxqjv%A#lJ z7~`*zD0+sDG5+XI(KB?6@he3|&(JZ($FoJx&@sl#RnaqajP3Z>GjxpY^w%?VEKjV# ze}48ff`W8C!dKTLe04o)qP73E(AxjnXzhO;wD!L)TKioOt^KZ#)_(7d)_(7T*8Vm? zYd?2IYd;&JwV#d9+Rw&l?Pn9T_H#G1_H#JNYCoGYuKT+gTKDVjXsv$_wAR}kt@~$B zwARxCt@*Y@=X~;dYCd~muleka*7ey3t@*V=YkvEpHNVzq&94nw^V<)t^S4Fo{QIMI zzIJGh-yW^;JD@fG0cc&Hj%e*iCv@!5c*fU(JpZjfof+5teh^ys`@v}4?_JQk-@Br< zouzeCZwpSz)TKOctH>%-w_-M>emb^pfx(|#PuxbENX=s4s5xeOo0|7(AGpmlwE zqIG?Gp|zf)(OOS$w60elw651NXw9!LTIV|!t@9m+*7=S{>wNvtI^PLso$o}n&UX@8 z=Q|m#^PPg$`TCA~aSmGd!?|cZ59gt^KZDWQpCM@N&rr1X=X|vG=Yr&aVe%h_)_R7cbN@7- z5h>n9Dc(r5_AmOY`Hf;+^BaxU^(#RC+x&%$Yd^-IwI5^AI^V@;o$nH~&UYzV>$?oC z@yDSx{^e+me+63OUy0WB9FNxcSD{P&_2gTLt9k16?HaW1*9mCduM^Sw`VjrqeqYPD z?$_(ky1v(=EC2Q6TZtQZ>i)eEt^4&RwAMEXt^4<8v|jHgqvP5Z&-j|cQ~PlXTKjP; zTKhc}t@Yi8*8WdJYrWIax*oTqwVpfBdLHjYYdtfPz6pt@F=8>--O*%m4M{TZy?mwSV)_x?dhb>wb9{t@%HK z*8TG+TKCIiXsz#YwC;~5(0cynqyMe`CmGjzpF(TB3(#8c(`e0iAzJstBDCiB3|jMh z7OnX`ht}7J=h51~7tq?T7tz|Em(aTZ7NfOaFQfmh{#O{+`d>wBy|1CQ-q+E3zTQaw zZ=$t-Z=p5b+h~pV4qE&7E?VO+L2G{Rp*7z7XpQ$lvR|6UKSb;GaT!|UeT3HjPe}eB zqqX15(YoF%(0ZO$qV;^NLRb0g$+r@p@YMQOqjmj0MQcCSp!NFw8CuWR=Vt8eudWS|JTX?8?@&4En4$ii`M+sp*8+@Xw4^HPc+{5jOYES{_7dn{r>}6{Ue^P z#|FlAJvX9ty?;b&{P5TMH!%(>dh)HrPdv51Kclst&1gN3!%}^}Fs}Liiq`)9hSuxD z@95kQ&HoR^HUB@+TJMF-ulYqV-Je^ab9~*eB^lKHRSK=wr_yMhZ%eeU&sJ!ie`~bX zUj`ko-SLdCvS_^?ZiCkSR1U52%A>X470|jLDklF*Xsu^kwDzwuTI;ET*7dH6*7>(X z>wMMFT3_rZUEk`A^C)^oz43XA&bK{-dVSadjY;wTtAW;fcSP%aJE3*HnrI$>n@_LD zwHVa+wb5E%9kj-)i`M?tL+gI1pX_%=Ykzk^Yy1XiUEf{Pctf=A*G6dF|BcbQUz?yc zzunN9UsJT!+YGJyb$7I`=N@S7Z*#QvV^6g1*A{3Vf9to_*OEcacQ3T&yEj_(&Pi@dz?|x{lw=G)h+aIm~D?VfpP8M0V!TbwDzMD zTI)R!t@(FG>wY;1t^46%w8ra#*8R~Ht^GX&t@$2`)_!zD>wJfywVuP#+MgrPn$MAF zy?%E`>-Fs@wDvRhqvqcOt^MeU)_(LtYko(gb^hLHt*;MS>pKRm_4GyS`X7td`i?_u zzOmlg-+F&tn_|xRIv#tyUQU4N{QVf$>(>cr&F4h4=5rES^Enx<^`3&(di$ewzEjZ} z|1`A5I~}e4KLf4(KNGF}AAr{W4@7JHL1@kIEVQ1Nvy=TfXuTgh7p?i6ht_-sqcxu) zXw7FRTIW9>t?@2E>-FbCw8k5T)_x90>w1nrYkx07>-;0p+K*9aUC+^I?SBDU^DRVc zKgOW-JdH(bJr|?3KbN4jo=ef1-(_f>ZyZ|p_vL7<{|dC$8?T32-<4>sZ#-J-i}RxW zyDG)II>oyt#f#S$&36J?^Ns5VkD_Oszqp^!{!C<0&ue@>r0ahzoto(U7vf=n*V)h-QRKkHQ)Qu zn(qT>&386h&%+$F?ym>Ydfw)ucOu_-#@9Tw_WL2U=JzmK^LqrX^F5mE<9ulUL+kzT z1m@KK7NB*1G$x+r^B7wFA5Zf=f!6aA@wMOc8P|NCMC<%dp>=&1pf#VT(Rx1?@9#8T z%%}ZXh`siI5nA`>GiddX_mirhWnAZf4z26)JX-UO`Sp760^_`x9dH#?=&374E<9~$K{qiwd=Ua}}`c|N|o|R~gw+gNE zeS+3_tI>KsK1J*GV+~r@=QDIA>WgQ5eU8?CeSy|~e~H%ozC!EzeT~-Z;WubKzu%&@ zp0#MrZyj3m`wp%1e~;Gq>(P2Wi|dQ7|1pf~d_SP|{B1z%ehz=lXCvdKG>-h^>_v5c7UM5HpSm2#gBe#ymE}|{wbg0 zS3qn1714S9^z|h6hrV7`!d~my7OnGFM(g}l&{|(rwDxN|w8pE3)_B#?n&0+l9z{>S zmDmBH{i=c1_1O{468!CYrsriR#&x~p^+@C8`;nU1YrfHcov#+-ntyGy)*t=U^Scp$ zOp5ENgV22HqBWm-Xq~S_tVyCBrR0b29l6|JwI4bj@4xZl+4MWYn2F3(R=cwS#nas7KD^!1|!TI*|x*8c2;)_V3v zYkvEnHNRG9z25DMuF3rIjIY*coiEm3``LzZ?bm*2jo%in_3w}7QCwd;gvM)+*8Dr5 zHU0r;U5}1v-A|p+x;_V@wf@d%&F>(z<`?zpemt0Q-S1t{ns3-^eq9;Y{0>3u{D-1- z{%&a9zlWi<--n~Mo+Fa~k!U?{-O)P#QE2UF4>XVB{`W*^{=LvT-_huQ+n>D|*ZtWC zt@$5=*8KXS_4WK%wD$8jwBDZ{kJkKtCU>o`ALF|IC!qD`cPFB?-zTAUy-!B#c{~NJ z>)ju%`Jal`^L83q@84oSXg(XsSJ(e^>~lYLea>K9>x-YKWqVAD*Y8Y(_J06c>m7*J z^Bk`an%^MCb-f~g%_qNq&HFvyADl%zJwIooHQqUBUC)T8{fyUh^*obAc^Xt3j ze_onz1NJ)KV8*pSL(sauL(#hb{MgUBeo?Q^7yCu$y8?gh=ap#f-*~k4FXHR<cD@ z?=|SZ?N>C9;{HrT=y|;st@T`o*8HzWYkoJNbwAyR*7dy!t@%wtYktumUEiA-*L)|V zb-kycHQ!s%n%}Kx-H%hzn(u9By}y`-*8Ls#ry74cy|nx}G!8lvwoS zTZy~Sn%~`M&2J`J`*#mo`!@@%{ks>f`~N=lzg?g2XI$%h0Il`SM(h5agVuU$6I|wHh5b-t(2x*uXc zYJV0muK7HT*8Q>&t@STLYd+7Q^?W>w*8HABYktq8HNO|odOlu6>-l&It@+01lX@N& zGp_x58Lj!fg4TRqMQc8Jmc#tG%iKY&|jl!k=jFQ;e+oJ1 z`44~X|M%FdUXRxG`~jWk)BGk7U-R35z1F`Wjc-iz{fO4{w8_i|Dn2hiA+(;K(Yil3 zqqUx2(7IoLP4>T`wcg*+y8eHlb$|Sc)_UV+O6QB$Uyautt@azKMf;% z5v}=ELhI|zwrI_-GFtPig4X=1qBZ_@Xzfomw8pEB)_S%_v&6;wZ3nd0QvEwtuW8?CR8b-}2%JV5i?nQ_f;7qs@b0b2LNu4wH~ zL$vn05nAhOjMn^{p!IsZ8(Qmciq`s@p|zgf(VE{LXkDM?XdXpRzLnS$q5W!s*7z;a z+K;`^+K;`_TK_(1t-lpo>mNWdT7OlvejdIr_F8XiwC2+Wt@ZAQ)_U8bwVwUadfwY5 z|MqCDrvqB+IRLHobVO_ZozU9f1JP`uqG$9s|Gq_M#x=i#(0aZPM(gWQ7qsTz6|MCj zg4Wm1L(#fk-O&1aei&NoI~=X=CyzkudL4<@db^|b_4X*V=5sj3>w1LN{`A0J>+gxy z`g@_Z-lNgFKJoh(nt!|=Yreg)*Zkr<>wfCPIFI7}e+)wN>5JC><-8=2)BEQEjO%_Gh}QKRgx2f9S!lgKIUBA0 zJO{1obuL=-JrAwxH5je)4MA&vhoZH==c6^B3(z|Mg=p>PFtqk_I9mHT0-;m(8vh=&#-D}O z`1hhU-hF6|cRyO|c>t~FYc`rk(UWf_<{-5H52E!v%tdSe=b`mHJcQQ#9!BeWJc8Es zcoePIkH^rO|Kn(V{dfYc{h5!}{yd4+`JX~-e-@xMpQq89&qB1$w+PLnxIfV^?N5Au zqOV8Wlbo*iGuZ2Vao%;lXBpS|o=YJv1{~}uFe+jMq zUX0d!UPkNwe+8}czlzrWzlPTSzmC@azk$~I-bCyEd<&iH)%xCMT;4){@;d)}jBCBoU%mdudgtpCsCYlTkI?)+KXw5(J z)BKk+uKBM(Yd=?_b^TVMb^Sg;>-w!m>-v3))_m5WHU4L4U9b3kbFF6rz0&+Y$6oXQ z0I*M|*^YrY%N+OHqcx}KZRTF*~t?f=hcUEj^;IK; ztuH?B()pJXPse}5UibIUjBEbCGp_xwfY$T!N18A4)qefSxb`c4-&X61&r`L3F`4EY zzpttOTQIKvCDH0%3a!`U(r6wO&>FuiTJMJ&ke~K*8^*Q% za%k;uL+aJ@SDta5zXDq0RYdE4jNd=k_>~yf{%niZ{41k1|0-x*->PV>C+<%*zouv& zf4kq)^SK@VdLHBFz1rVujB7pB(VBlBf@}WUGp_khU~bJnUcWW}c)io#_lWyn&2IaUBv z)?W{u*H8Cb{WRasXsve_w9eN6t?_q7>v?R5*7>H8ulBnUb+Op>==kj=ko$2U_dzL@`=#bH=s4$WQ0rGsW9LKXg2G^*TKCUBXti&J)_(7s{9B`S{x)chw_h4>i`MwDpEaNO^Zj|f zbpHKQymn~apY2op_`FE{J20;6a{yY$JEHac#PvY?-6_RC5Uu%lMyvfnNynd0(E1Nf z{#}xe>yyTd*9Z0QioNE42s-yquP?EGb$uef#yb>$oiF;Q`#tRQ^-SYMeD&`}JneV9 zpUL@Y|KjtA9AC!|Bc9H8I9l^R0&a;;|I^W0?-~DN{z1&I_X}sH zcmvRS{v)6NRgd;F?)SCcfy7fiDCy|0`k%$P)_*oyuP6EXe@?PLH^n~>%@jpXzLgk^ z*7b|~Ssfpe{D-17zw^<$pDsYF|AonZ7+ULz>ye&^;c0vXTI0w4xz6u?o}u=!Uio^c z_wO;D?KOVB|Bn5s{uhx?-Vb{IHV{9L>;BHy`;o*`|50eY-i$`;{NbtB*Cu@(TI-9?-_$<#lfJ%PkG<9(`RV*OFs|3j*blis z8vjP@_4PN-qt+Yu|GGcp{j84PgukxeB(#p_*O&PDkmeub8bA7{{x_%ilhN9*@Yj06 zKi8}0ZwmhT`mN{X7RK}SO8a>$;~H-&TE}C4J#V)$uJNa#wY~}TTkXSN>xtjj)cyM# zTK%UJFW0O6jQ-?t-A}jUujkwcM){O?8U_31ve))(hr>x<8qwZ8kYr^KRX)DxfY=z7NI=jtE#|LWf# zt>X_6FZUZ%eEpl9=8N;K{Rpl3%)wsIXY9WmU-R9-T)O`GJblBsuIGcq)AJDbySg58 z8Q1*gp>_X1gx2{UM(civ`+eOHk1($1_tE4Z{nz;(V_esFDe?5YJkGeT#}jDvkIz$8 z$9`5lKh6JS@_#CgFF@=4>eFal|AlB>uSICB{~5II=g3FjuRqJU_UAdY*7rPG*XISa z_W#A?|5EbL>${k79>x3fWrW6i1+Dw>RkYSO0bi|W70GG-ah^5*u-AUQMm)8TKQE&7 zz0Np~;{4t~sQ;U2-T$%Qb^I;Hb^f=}`TDN&y~DWX^DbJ~FWxU}yd{k5etHkB`!#+a zUi==6dDZjqG2`0*c>ksKFK1l$`wBFV;`&!2G@n&yjsFQ+uP>|7Bn(xc`q#u>*Ei0K?$@ChYd+hd)&6Jv*`h_y$S42(pRs>+JvL*n^Z%0a zjq{-E|10C#-wn*6uV25J`8EFUjPod7k3SH)9uZIb74_?S{E5AehqLbQxL&CJ7L05B z7}xbK$)MJge_mdSaoum>ulSmB_E3TQjKf%b+!W zS+wT24O;tMF8P;F{uR)=zbYpCO36Oz*X!xFX}ogsZ^HS~>roZPb^WU*`|VP^YG}Pa zRYz<8w@>~%BwYio{n`<&`R|0*cs0>FUoCV#PkKFyKhKUy@%lx7^gQPMRvTm8pZW8# zI%Yo2zpfeA{SotNesTY;#;Lh?RP!F{<|}-`*{zv&KG}wisl>r(tgxqEp`6pY5qOYIv)P{ z{f5>b<9a?rYyK^Wr|Wf5y1vEhh4#N?%4aXM&bK#OUmx~CYkjTI+K+husP>Vc=Cd#M zdVb^mtFCA3G+!IE*1I2C`<1V6Z5h|skCT^ukH6;I4z2Y?Jl!90 zz1IEI9(x@RdwqS+`RtSY^Xq8`;^}$16ML=a0LFFxj%d9ebwX=D4oo`Ev+jrZeH~r@ z&e-dE9hBl9jMjR)pf$g)XzlMIXkE`klm7#OIzJLB4qxy+^cAH}$yhaPFZo@ll2h1U6xM(g_ZPWFA!nosp~o{wQ%`_(ta zJ2u5T4z2kfpLG2Gr1r00@{iv~((x0L|A}ZFKPio$jMn;3LF;+zkJf&jiq`94oOe*s zlW!$XL+JjB^QAg|zOU=IEn4$C9e?fr87bbGXzl+h>eu`RFs}U^nBomWYrSWob-$gR z{NwX#&F>t>wVrd)I{$fSwI7VudWIzXq3C>GH2(RFYkv89mG9p#z+TV)g=p>9Fto<+ z&g8n@@%~8b8IHZaKacsfo)IZtjO+Sd#JJ`^60P}-LTkLyXuZA`pf$e@xo1*Q0g5 z8_;_Hbt78qkNZEJFFvo!?{{@QZo;2Oala=eo%c_yhvsuL_PKw0-e!=GuE%8Twf-q+ z?e8sU&Hq-k_G2nq=f4fD{g{^grziisA8t?fcc3-jJJWc4KC6Fzbq3?QpYr?ryBODc z?>6yr|Fj>Gul8dm{`z@DMVxg#?@96Feh5^&zO&N!2F7&%+?&SlLuwQq`lUQZp5{BbF+H(oDwe>}pV_9y;+4z1@=#x>ulN8{!F@)(1*vK{G|slW#?zpKCtvGOqh$30mvV=Qrxr{=J92<`?y8 zf8J+Y*W&}U&Oe&wX}wDs*M5J9)_C!Hk>}I+%dpS&>hMzjRzc11GeopP<{!IH9*MH6TE40=hfBsJE{W{r4JhlIZ@!U_H zKmL5T&i^g;xx9I#Q$qP-!Z89e~;F9>(RPC`FzCh+pGT%*z5Iw16t?X zh}Qh0e$Dqs#&i9;e>bK0ah`O4{KUBS>u0pa-;CDze@XVgqIEybrC;j*8{=B~E^C+&bCPL@0h1Phr(dyrV*AMMq9maJ(<@Y;vQ~Y`v@m+5v?ce)xQDudR}9Gjkhb~`udnZ&uEy& zqdulAzP>d|@f)MHpH0wuzq1>fM{&R6^HIHDi}g&($$$n$1H~;?m z9@uL=F+Zp{zor}?%#DiVtq7zoIhQU$XCx#-0x}p7C7hYwbmc@ zx<6t)b^ez4>-xm~s$P%c^Nk!|=Z|_c-xtstFZ!o?FXq#FVm!y!^Raib-v`a3cs=6v zMC)nAp!&z(zoPd0CjZuGy&lHvspi*)alKybm;BqJ)jmF71QqAEKSKMH?}y|3>HSXJ zA8UWxrTF_(ukN?_{eGP<-ruT!++S$?_v-J%>Ur&gvA%zc_pkp}Usvok-XUo1|DkBD zFRmxL9+6M}d_(jIBhVTz=F`t7k7Qi0FWu1^|ELr%zn=GC zT=VOh{NwX?-9Nn;&;7)t`17T>9_soZPchoRqw&{#n_{o|Mt!n(h%k}Df$6>GQ73URHyq|Z&S@VzIr`LXmz3$ie zeak$b)^j|;wLb@8ob9#$aXnW1e)#L>D<`0HKeb=69$If)Z#90LSFJbf_4<1vgSvlC zLT7u8ANy1DI~il`&nal#KmF0_e=1t%I}NS$}K;jiwdTZt?QZRABDfJ?^5Q#rRW*+#rqZYzYwk0_hD$YkL!c>H}1!^f5S1> zd`F;l{)^CRKN8KOIG<4n-G6ajb-vMzYrgUGAN4OlfQp_`U;O^C?x#Y=)jsaObU%$@ zQ1c&~>@P-ZJ`b-ozS`Cw99Z>*QvU&3Iv*ZmUfr}bZI>~nmrFTa1h3}b!2 zHV&=+_mfP{N6-7^80+iN6=<#JO0=%$c(nHSDzx68T#e>Y+@EWbecb=(_ymOZVb-hdi|P= zzn-TlXdUm6&i_zq(fxW$vcDD0qqyG2G*k1P%Aof5wq!pIt*@uk)A;RZ-H-A9Rj+?> zJ^8nO+<|`{*YkKM-zv&>z|F*`RAnZ2h(`uul0u3`Q|43 zd1zhFxc=$<4>7Li?P0Xm7k}S_zCX;Lf5e}+(buO((tPpz2f7}QGOqo946X6w^)~M( z&Hr)i_4WM;wDw~@TKf_Aw|c#Pl5u_i^AuY9u>h_0Je|fDqIJFE`mOaWVqEv@GidGS zvnk$lX!Vaj532o&_kTLy^Vst!-jA{VI$yj$`M3VX{gd`5?w2%P{C?`^b>hdomiJ^A*JzrX(- z?6n^UqUe!{rUw;HYe_!O=4 z<=>}T!?^b2Gqk=Qe~u>Qq9@-%k;@{8GetpHb_Umi3_Ujw8?#FM@TF+Xv z{=94*TI=}^t@*|83+R51{jTRB`mgr!d5Y#2{nhy2wfqNt@Xy=*8wWdXCwcw^Zm@A*1H+4_VM|; z#{Y#uUBBp8*6JVELtPJdz0my@uiv`A;`iq?-}wC;Jukm9S&py${*7_X=l7%|AANm} z&j)nAxL?x#{DHsD|0i14BMy`LZ-LhNOQJQuxWCu)Q;Kn2ztU(uA6ueH@o(2#_1`M_ zZ=L+hpmqMTXw82cw8krk)_lsNHJ=J-wph`VZzU=w`%1}vTeR+{*dN-T%8cv$RnQv0 zDq8d34z2lAOZL^#8h`ubzXMw5Ys-45eGSI7KiwEq`yCnA{_KR-d}=0L3$5{MqqUwo zXpLVNt=EfsXgzQB(K_GGXzkZ7XpP?ht@-Yn{2QV*zedTwF_S?a{g(@%#1qc|m+0ru(aWn!f}7+OPQYUs~S*jOYI7 z{2dwB{NnXW^NIIo+W$`2tN(#$weLXxG@s6lt3C*w^U?K>_0sFZ!Px8i#QN#|O}szV z{TQ$3xD=nKyxwuYrt@{dSo_-*t?}~x-64!?y@w|MZfITq`1iPUzWBUJ>zhiy^?V$L zKPE*_zWw9(Y2*6vxA@reD6aPigyx^GFGn)2=d(Ln_sdae&94VquLp5{b^UuXp7YiD zqyL(3FYI{~=X*3l=kJYH`#xx0|6|aaPh79nKIYT?+Bez9xaNN>->8WPy4ljaos=p=Tj#WPy6-1 zK7Tp|d(F2$TI-4Prq_$;r=ExZ_4DBH*M66#p1glF-l@#5_1%fR_T#ja?{BP+)*FA` zSJyN0)%AOf#nb*TBEIgAu-AUa^;q=;;%k3meRcnxPJUX?8L6LfJT1BkEV1JQ*1+x1EF%lA*ine5+wo)GhE zzWMK89>jb)p4TI^o|m&yzGtJgo{i+L_UACJ{f_n1ex1uWEh&2Pt;BgL{$RAm8 z=ULBxL5g39&hfO~F^ubZADjFyMr(eTptZhB(YoH3p>@5-p*8;HXkEW6(0ZP)MDzGt zz1pww3~E24|N8mmRcZWcwD#v3v|j%wpmlvGqP1VwqV@IeI<)3{J(@>xzBeGWKk@q$ zx*u<3T<44XGai4t9%;TeVXx~I*GHZIcYO6cO~PL5%jfB4#|zWDQcTF-51{%L4k@7V8}?{vmh$LH&s-|dWRzwb!;PIRtU z`&pHEI)C&-$K&Ur8b5v>gUjF6Q}~%lH`Oti3rTF)v_4*O_XPWPQ zjBCAdJ<#j%{VCo9X!W0+{Bu3=`!Z@jC)vk*dOdrPaXtTY(OUmJwATL+TI+ilt^Iuj zt@TBI8ZY@B!jn?_!N#pOLwVzASx?ZvWb^JZXwSVuUHU4_~ulwl(##JvxYrj5B@$&0)oL}8f z%aZ*^Xr2FKG$|K7`BwD#k@hpz7n9=swj81BwF0gAtwd}7vA=XY&Y#}jtxEQvptYZ? zlaAl_(ESzhG~cGor}fAC;k;jU{8QrV`p3V2r1|Fk9lxKa`K`fU*ZVWHetrRlBd*69FRmxrkLb6) z9>kyj(ftth>UykYevKFRcRc>K-!y*oTd&Wd)qfrFvcK-9?-jnt>}{er#b^DA0E@5=W>abEQN$LBBV zANx=Hm)}4Amhy{wwg0~}uIK9ybk1M1p(HJ_5i)A{~L^~HJ9^=pL2r05y> zwrBIGfAmY^$NXAvUUdy62|83Cv z`cf{%E05OmSplv6sEF46Tq)Vd&u8`iK7QXw{p08B+V99;&u5Hl{dqriVRPtt+?M&Z zzRKu-TaPNqzA9Sh-!6?;OXHEB&X@l_!0Om*{@bH9-yPEU2KJb)XIwwjKK2tXMNhsJ zyoBhCRTr)C>!tDfXx%?MC;MH{T2F)IzbjhvZHU%< z8lg3Q<76M}qvK5&*Zmate|o;+eh8D|{jnQD&-eYT7beA@XY4|9y5HmdovvR~{B=Jz zLu)TbwZ4%IsveBi_1l|xTHi)`ONhU{zUljiRrFW$e=P0q`21D((>^KRR%pFn*%z(( zw?^mr^XsSPANgv1p>_T^-@2Z0K6QQC{J+Z11AOtLwfV=lQ#@bNai!_jP-o*X#Wn&)4hy z{_M|YkZt)jVE5PBjCVY_f5ZOg!@uj5^PP^jx4-LP&+{Su#@}#0WdFkB z*G1@htOsuX%=h{u=c}Dx`p^5vn_|!XxEa{}lHU_C zAK9LwGVF&OPk8+1HGYw*U#`zO{=Ocw1>@~+OR)XD%y`SgZ}V+Fa{OWaQP01-ewh!h z@2L4}ezJX`q{Hn&-|yGI*WP^O{E7W-MY2#j#ro&?(q9}h43B5GM&JF?`v*&*Z2Q}w z?|zPc`!k<8U*vjC#&`R3eA{B*cwIo-w>i_Pri%4|Y9v0DJqG=dsOK zpO4IZv3~flza0O1`_TIbIp5@ddmQY1a(|j}?0de%zT$UmL{WD&FcEO(c*cEL4cLTfr zyMvwY9$@F&@$QM-`Sp6g7joAt{MdiC2dE7B-y7ik_W_&VeZgK|_5(Y=N!arE{gJz$ z4glkFiuvyNvVJ+ftR=Re_1=7a6n(E3 zOE8#!8m@QIcfNi7H|wS2os7Qw`(qgM_y^$2>*J-!p?r$<&g1j`Y2$4F`}~&ue;j*W z?=!#6-?R9#e(oo?{ZC9Io^9aA^$Wk|`!dG6pZoW{FGucpJ_)w}>_2#aEbF`Ne+qpl z9PVFU0k-_pVE%^nyAojjuL9fuDm1^xe+IeZzq<5qEWUdG__OG{|F0>W>+Ss0FuwGQ z{e7%gwxM3OQ7e2u@p@E5?Y-wj~rpMJOeM&!1id_AAOh}`kz`lb8(OC|qu z+4~CE_OhOskDHJ~`4scR@qQIKe?vZR2Kaf7!r503CVbUy%YVer{`zRzuEp>=sW+^)BSRH`Oou49-sYP^ONJiC?Ed?8 z+4~LH{vIg(2f^<5hf4opu=lspPxe1M#!(rrKT9Duex&T>_sX14pZ__TVz{2aWxV-J zecW$(|A^!H9s17aQLyFxdmFz;?)ppwyB~7A>Uuwh-2M9pu(!v51Uudd#B2NG3qOv& z=f|JGxSV3WH(!4)`+pgsZ~p#@-2VSo_zAG{{X5w6rLUJfiQN7453u=p3hewd-&~J> zA~#?E0=quB{^#*em;GnJo*$WiuE(UZHyP}B{|z=D&(`?oYW(vhe*tWJFM@4v$LjA~ ze=xrzt@NC{Fsj% z4?4bsAkX%uL%;Ls@z$RnefQH0BgT9C@fzgr=kCWDM~pXLuSM?u&idi_F0c9gx)JtV zk37HScrL?u=kNZS345;pB>Z`PydJsn%wQ-V9v|fQQ!LN*5YM0Pmtf~N3-%oE`qeL^ z_7|)ld#+!fKYj!D-5=3+zS-V1KepHB<7dSle}fNSW3vIQKRej*_5A>Ilzxs^+z-+3 zc+6LRFWB}w|GXc~`q@8mysbYcLic-)SKS|*GrsHTde6mp^Z79GyPvat^8Q-x2k`j3 zzo+xHzg%Cm{qE1XEB-?m?DabLcR2sUv1kADAZ-85U-yssc_ZUpzj?v#hy1>y>-8jj zc>H|m8_y56{~Qn6{$$2m{-)B;`7Eyo_mZ6XdvnE?dON=KhvQkG>@NszzRY*Vx4rHM z%e$U$!M^KrF8rC#g^+hWY=7Yq@tLosnLi!B^t63$9vwMT*b!K{N?zl>t%aO5s&j*8jQ=qhp({-e9X@>mEW>p=a=(Uw!a*5$J6{R zkG$ivy;0}$%vbuy^LuOZ_4pOA?|8y@$J738f5po0eE4ub<^IVY@BT}^j;F`3RQBKT zKg8?!Rz{z{!H2K0Rm$F~g>yZk(d6v%}3tX zXPo;F9N&iMyS^KN9q-0q+t2Zdarl}RpZ1^i#CQ|z_xvzFn<6*f3~atP2RolFz=Sm1 z-%Y;eYZ=ya{)Xo#W<~CJwk&^Jf!&{5gKh6YY*{`5YfEwVDqsf*!{i}*zxTQcD-|b!tz~^+y1U#*E`oQY=5`1w>#MJ?g6&`o+Z!r$MU_9 z`*>|{u(xmffW7~a;{o?;uE#n4ebM*!Z9lN{-@otyVCS3X2VKttOaCCSp`FMB6|&HssD_j~%&@r?&N-<&TrpC=*r{CXGI`RDqj?dAENDe8;R@jMKFuHVVn z>wIlL_m{TZ{w871_DAi1_W1wb?H?fyx5t%Rwm*CX!(=bwi8ycghjbA6!mbH7byto!9O#(O==`&C@OY>%v;>&b23 z^D+E5p8utuQ}D-l=O25H@AS&|Ci3z4GmuX!zNy*Idh7hx!dTmLJfrr9%ty}O^mx}h zznA9tNA+9xyY;iaJD+S%UGF>}X8rKx{Q7$SnIrh~`u0BLwtp7b>&?#er?*ez`Zl+>bgn`M<>d%e97Z2l*JZSNwm`5BLY&&Q7-cYh{6^O1fw9~YzVcuqv$ z{9J*^Ud*&YpUG9<{GN{U$@T`5!+y^BWY4Ef;K%*)i4pmkk30{{-*Epu&(F9X$=~|@{88fd z{$kcA$9oz6-5=@Kj@SM!N8j`PlVJDjJm4w#t@-yUeP z&h;qgJAwRNzt5oG^>sX>u6KJrUtRI#c^=#U>`Zt5Uo@Ks!KCTWQ##2X-^?fT^ELEczgxlP`!=xSy&c^B!#@rC@#_HR zpWiofKHorY``;X4&;9Z(Y0AC{qLX;1%nS?W8Vcko;ynZJ+Q}rAKdwx z&mSOnzNvTb5A}HG^C^;XK3Ab^`#;3KG{Q|k?^Sxk?zYpyA?k{`41l!)Pz>e?NCI1cB z{gC|^$M*no`|tA^4yr(^zntaH=~Xxy+82V zisyG=_s^pv;xRwD9^(9SKcnTpXT0N?Sokrp^Zx_b_WoG-aj@}fhHU2rU`*#C^wf^&E?**{sxu4zZLGG7wJTIcp z-*Ef$62ST|gU$adV2^(l?CnFYXIb9&WBvzy*DHX38v36OY=6^(&3E?itv^HQ=X{>? zpIGr^|H1Wn4deM6eE1rhvFzo28II?*$lXuL*ZuQ42KFN@jBlNh^NP!?_AGwJ~IKPwLP`|>j52)*SkKxni+kM-<8FiC2a8FYit&T9{&cg z^UM8iK0f&@WV$}Hmi{K#w|q9_{0-|jJHYkm`{U<8ZocLOJD#~pJ~!BW%me0c=r8-% z9{3eM=gbYk6#+>_;UWm@@0^_ z-npJ-`I+$J`IPIAQ;R?I+3~H9vioJ(@;B=KQ^%9_s{LVNSkD|^xId%c>yzy*$KbZ- ze$4MZ+5S@SZ~prDW_gs&SN6ZWy;}jf?XNh(zx!z=xzInf1*U#g-9$B9}zp_7RJ`<0} zFNoavY>NN(=lF6w>GdGjgBjJN&GN7%D|_HV4;=i3u+udmIo>%9g39B=L?^Zw^) zBx`@Wf}vpWNq=rx<8ywk`LMnGp0L-eT#xg7+N$hr4fcAp4cPN-RKBjy++}}T#xq5S z`>Xe1jDH%=?>uj0zP3YX`St+wwFB7oeJ9xbgiptl{x{BgXnV=W{kKHLwVjkv%lSuJDH^EGPSVF}~L?^O5a~=X2IK=X*Ni zZT}3g^IMo;yPW*ZUh_^Ycxx z`TSOm@Ad!N$i2P%4w%1TeZLEEe`kNq+nZMlXMfiD<@%xH&+*;V___ms=Hq){+q;rt zdp>0Uru)zNuM9cP?@Dm%JD*%nvA^q4HoslZ>~A=q?^pa;f9yZ}o6nq2aDV)O@$Ro5 zR{nV(SLSj)`LFH$sQl-80{=9uXU=!q-$asi{Q12f*Q@*GPK;Uqu8J?mlkU&E!QQ@R zduG0WjNJ3VA@3}rdLErt7-*+(nDRSr2>(|^R@AWzL_Ii=y1NXzv%Ktco z<~Q%ZcD;X&zWIxN%YRYvjHe#ff9b!qL~Zb2-iKuV?nU7KUy<=%-{a5qxDS2z+x=kk zJ0357{wDLo@rEz+dp>gS-{kla3I-p(2IoJ#UOzTsK4Nm%-~Y?=56&<5*N2;Y@?W-3 z_LuiXo39(e9iRL2ml$ff^UeM5=KELZyZ(EVpZU!3gvV$7azDKQFu%XXp5y%un7?6u z4*=XBIlpK92a$U|Ok%9#c?h}V$$ayG=KzTd*FD| zpPjG!X(Gzje+*!Md0&z7ACPiN1ugJ|$_K)}*eE1s6@ttw@FU)`5hiiL(EB!;!H$P7xH{X4Hmi-m${~dkz=fYL5 zCy`IV7ufuqL%#MG`;Pai(m#%R@=wG3|5@_X$G@NWFXZY!eakPYdcTa^ z{J#P=Upe36{QLUGtH}8q)+6)P`AkCQ^`zIUp5Ld^5AOG@|IR<#J6sGteEpyMyZU;; ze+aJaIo|ZY>yh)5-ah5`vCK#E_xhR_tvlcRzO(tv^IW#q`=iq_-u0TB@y6lH{&GFR z^~`*?{`A;$zWsYf*}ru@xjy9ad49p;XTYBQWxTg%xgW@U9)a9^yoT|X&j>a@uPyoO zz>a4ou>I$F!|~o=UKt7Po7`5z1fhPpV`aa9EIlu`*hpHed6A-G6Tc zJO6pXUjOp^u;ZPt^v@$l$3H)E*Q<|@=f{}+T|+Z?{F@l>dcL{rEdX|X76d!Kw}4%b zg~0YVj$+tf*SpuVh0EVt!S3Hhz};VtZ`69{{maAQx5qo5?0i)R_dyaQy>^a_L%l~o}U+U@gB>tKoT55Q`W_iXto+I$*^Fu3? zzufQU_3{DchvQ!neaD-AasJ`g_`s4+#J~B@{-yO-D*LIQ^?Sa(1AWU^2HV~$VCSFl zmaj}Qu4lI2wzn$d%}1Y~>iL!9Ij`TVG2ZiM^@?weiZAaMc6|?JvFiFl>0plYHJ0OF z=QAI`^E2_eUfExDJjvhr_3>B6d%iA4a*pprVz4~hYxh@n*hBm`R~GD^LaPHIKTBO-#mY9d9DYTpPXOl`dfc}?71G}!0v|)z|MC=u zb3Pj*cfRQl;~WpzU)~qmeA(Yq@Zoqi!M^)F>y7(m68@}z9DKRHn=;<|n^iuWgYAC{ zaL4EPcO)O{Z&~C2m-p|vA9H-${>=BN`$c?x;ym*2@$S#9h|m0O4W4Sh)cUdp#ymf! zhg{>Msi*nx`~SAVpV!y%)gRj;H=o;q-LKmh-l6p02{vClf}0QLzZ&E_{+-JH&LhT~ z&s~r^pR6BK^OyTio!@-;H_rR*9RH>8>GfeH`py1!#lPdVw~p0>Ap#k&XCeC%0x zFR<&mca7f%Z2vP>KHb3VlKoL_Ui*+209*VB;e{l&}&^Ydx=az1^0yD#zD ze&6qt?UnuQSNZM_#>L>n*VuIEdwn?oIUx`Czp{O^zns4?p9iAvcn<=bpM$~f|3gZC zXpPVLOs~hyPoIy-{#DO^=XY57@B6n?U&o*KwYna89?1D0jy=nd0DCjA7H%W`5>4fgHP)HA@J1lI~%$Aox~W=ugqu1 z|6%maclyWc*Ez_|-??D-L*75<_|HRbzVf~y>-X`)Lnu3*T;Fs&S+5=csOL9q|9t$L z@8oa$7a(u`tRH^t|3dV8{d9dMV!Z9SA1}whk6#W#-}Wb9-|=4rHlL&FXZ??$@AYm@ zw$EOVE=KP9T>>_r$=CKiirn~PV8`3*OU`e1{`QysulCpaEjM5JJr?i(=J=ey!H2K0 zn`u_pub&6FwBpb4v*+K(k$ZpO6JYmm?74p?LWb*~{a@GnGR8Ze%fX$m{rC2@?cYXz zj^~ruv;RE*YJZdA#PUy}?|zEh^$8#5{|fXi55ML+*OM&I^UUTi_jfwpPnZ8I%l=hh z$NQO*Uxz*O-}k>y!CsKC|9Azx>{y zw^up8YW*K!&-hNT^F6cp%=-~MpVCjL3_g5~<$cVK_b%+2|GUe6pKty#a`#u}m+O=J zZ=6rgPnzGHU$ej5KWjek!M@|o_Qm@L={M{D1byHC@l&w<=Xo~sH#hN`pP!X}&d<6& zeSDnv*IWK`#=HN20k*$ON!Iz^Tl)8v{{3L*_e-$rmFpR{pZ5p4pMO>3e+_oLy}y*- z7c$?!DSr=?y$1_F1ctJ~C;gT4$+q_}a@YS6um5wdLmbk0ZvL zpF;@N`0)|??$19VcRqPPkn_#)W5?tEpYY#ah)(|N_c3MvtB*$zMSMbUgwwf$o})b6X%=yn(w~8@>l$k(%_SP{|0uyJOOq;FOI#A$Nus@ zW5<*It=4zH=6%2BtLOLME55AfZO{BYiN5Q34*Fj2UnW@hcg}w~-+wUP`nf)ciNPoQ zL!|5S}Xh4JQZBZ9TPf0ll4ul|MH{`!8wr;)oKo&lTBNnq!j^~3z+{b7!0 za*h8txcPQJJ&WA@KL>XGpD%kafbIW0ICZ=)BKP*-C9vZg2bs?2W#k>7<9P+S`MVKg zUZ3(joB4YcefQ^oz>YV+Uuc|%35=%$JHF{lp7$qse$Rm1`M(C-`B;Ave3}2%QFcDr zKl1VVj3ewh-(Ej5UmZ{8qwDcn?3v%!fo*Rlu-AjvgUwIg?_qvsM(+B~0`_`z70G&h zo=0`QiQn%(`@dCoUe5J3t+tYb38d;=JAQw^?Dv``GVN9yzhU#6yxsS zVE13Hx0~;`V9)U`1a^K4gKh7vV8_1*xbriA$;b0O@wUF_!=l*p{9Fv|c-{tfKIdWI z^2JBQWBx|PV?Hup+#k8W!u;g?M#t}XMs5F{&l1FE{!)MQyCib!zkNhLmgjjj$Dj4l z@h-)9&!60nWj>ZhZvV@a|7F3ppY_fD^S%SuFYgO=e#y*ufLiJKh~D{++n z`EAet%ooeIq5mzPon~>oR}`P&+w1EcV2@A!uHTIC?fzQ`;C|klczb@deUIM0n_l8>z5uM{)~HnCG*+x1F>&D4g$M> z4+fj>KHfM4x#P|LkNLZ`jzHc_k+F{{pQR5Q*XzY>pA=l zK75Ux#(3kb51!w-zUBB%N8kCJ0k%B**Ot%8;$@ueAtf4o;y>}3&oi;-{&^qR@&hsE zcyqjJKF>nme4UE^6#kKWzUTfp^L;i#^YwwU_d#&;?fgH4-2SrvV0?DTd%ipa|F-vG z^vzf9uQVUo|8l+0Df{Qv_^c^aD5Nv*Pz2EUpK<@foRO55~+Q&0ZqvX_4C ze9hk%YW(ZLo^N?yhxK!Qx$Eit=VJfK<8wXF`QCti=a>GneANAC_TT++Blf($zl<^S zy({r_ysqaY>Sg$^j=4UB__k5bx?{oI?$gS9Se7Awk$L(O} zm+OP(V`cKUe&&Pg^L4PdS2-VKe!qd-{&T-0ln*|9jpcp-%hNCJzudoT|KG%(?Z=<% zpZb`u6(H04-(oyN2A}xv=QnddyYovt9{+8|+x~aJu6O)*Jf6SbMc?(k1MK{IJ<9o8 z>t}!7{r^42yB~9Y#P-f3e)IeN^7jL9^Wpj@ANS`E(RaQ-0z1A#n19B3-l6SVp5rga zcPIAj|1PloXTG&P*CX-TUg~FicUQd8cYS}1+~e;7qcZsLHTIK==VjtI|35AJxnJ4& z_Ims?^xf~Dz@GQNe_r-}0d_v4)??RyxzbO*&7bwNUKywV%+G^xK!}4+_z1t|C);z! ze=peb*mL}ehh5OYC-!pxdB^8^ERH?b`@Ztm`%~E-nE(50eBMuF`7e-$@9 zuU8)5?^FI=jeituzWR8g_wTd4b-jPjc*i#pZ2q#p?RfG&X#1Pi{e5}=i}_8zdA{ZM zj$DuQx8;u!Pxqtw`#gV-ygx|DlU#?HE`#-<;Z2NyJf4Lsv{>t;i)}M@j^Ap_hSU-F>zf;rC_T2Mx9{ijC z9B;ed`u(rj{y4t$v)B7R|MLX;9gq3S`T3T+zB3S?=kwpOXZe$0%l}dF_51mrLhgLC zK02O%BKQ2t`DW|?pL(P3dgOSf`LX?+-?9BZfB&zt_cYl3_6*qh=X#gzO+s$}2ebWf ze=I^jIo`=-|KDK8oAU?u|18+{o&%fz=S!aJy_S!9KF;yvegns!{<6QGe>s0^`+fi1 z3&dmlFM_>39Emgg%l+uryZSQ4p+q3*Cgl&I{dJ==zpI4?~ zueUGR|8;z?GM*HM`zv`Lwfp-&$c@un)}IdSc&7(je+IDm&i2sqYe~NO@cmM$ul)s^ z@7I8Rd^IE3{W*!;>~9?8S)Tb|`+5J9`FSn&JU-9!xL&WTcxD3gH~8>1mh)%!cL@F+ z|Le>C4(L0cnUS;P4EO)`s`)!hjnDHM9{&d9&S%u~klvqKsn)ky8Smq>3B=<58#Vvz zzw0%g@vc{{XL-K%`$xwWAKCvff3atNXCq$Mr~5zK7jMt9-rN87^r!93Ui{ud{VkuP z{AK;I{eIuXJ&bof=0xB5%mpTd!H2K0^Y~bQZsf*Y&s-0-d>-^&uQ!7E8}{eCfT{77 z-*dD6B*yl9_55C*;BD`G^u0dh_XKP&{o(nQ^A(PNKI9A;eE1rhAMAd76WIRW47UH+ z_x@F|^-siR>$@HcpznT8eD>exXO6?()cDE%049h0hfn)Ue4g*wo;$wo=LOMs|K|Rp z_HRD(K0nvvEsS@+E(Er}i%UQIE4H^V`t8s5^ZwS(&*R_9c;n>nd@`TfzWX`)-rpSc zyh@LE{d0Y<<8wbQf_?K5f95Om+x@X9a^sv2WRp5PpO@>W9zPkG*WU?XkI(&?_P-eR z9e?&uod23^A8bF@cbw1LYW(70*DLE`^JRI?pEh6KUZ-BxUjpp-BR3y8{zL2fH5Q zz>aqVu=&^!Z2q!6v;Ib9?+Tnl!Epa4`-k5CXaCSR=hKWg#-91k`5e!m{9duw%hcce z=K7$=_woD61n2y-J+%HN*mFFami^7Z=4bPgZ(IGf1#;J;;~hu8+F#;#d|Q^kt-zLV z4Q9yT!`I;W)_mmmkBzq(q3`^&Kjr)qul>DDyjqANr#kVuq^5!@D zTc`}zuU!D%zsUMwes)D}JnDGD<0oL-@#g*Nu1DXW`w+$P_B8R5^5DbQ*lu9+u{+rI z_o(>xEd9N}=67!5vAw;KcRgIc$;9A#ugCm$d^z67#E{Q@0Oo7o5%%pr?+YcQ!H2K0 z8L;p9v?$vv4U{5jwJUV-H+G2ZsFo_T%D`CH@E&+%t} z)_Cr+w}0s$0CxY*128@ix#Kyg@WEi;U)1sB{q*jiL(q5rhk`B7?~B_1`P9h%Po{Xy zuj_vp_V^q2N8YdG@wt8rC4&!NgY^}a!I~9(ID=d6`IP*OPn$+R9D%<3cM?8apV+hi zoc}T(M^XUZaneXuL__Cf@|HSe?9&El-FOSdl59401 zv%h1$vOj8nd0(~VU60KF?nj0U`@8eW`!-yUtatX;@kihOPX=2*@fc@($MgRHXu+wD From 708b21a63ed448ad7a15e62bc53d16d1f192e0f1 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jan 2020 17:25:20 +0100 Subject: [PATCH 10/16] Add tool to extract vocabulary from the old LM binary format --- native_client/BUILD | 49 ++++++++++++------- native_client/enumerate_kenlm_vocabulary.cpp | 50 ++++++++++++++++++++ 2 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 native_client/enumerate_kenlm_vocabulary.cpp diff --git a/native_client/BUILD b/native_client/BUILD index 250bc450..1e4a66eb 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -27,20 +27,6 @@ genrule( tools = [":gen_workspace_status.sh"], ) -KENLM_SOURCES = glob( - [ - "kenlm/lm/*.cc", - "kenlm/util/*.cc", - "kenlm/util/double-conversion/*.cc", - "kenlm/lm/*.hh", - "kenlm/util/*.hh", - "kenlm/util/double-conversion/*.h", - ], - exclude = [ - "kenlm/*/*test.cc", - "kenlm/*/*main.cc", - ], -) OPENFST_SOURCES_PLATFORM = select({ "//tensorflow:windows": glob(["ctcdecode/third_party/openfst-1.6.9-win/src/lib/*.cc"]), @@ -60,6 +46,27 @@ LINUX_LINKOPTS = [ "-Wl,-export-dynamic", ] +cc_library( + name = "kenlm", + srcs = glob([ + "kenlm/lm/*.cc", + "kenlm/util/*.cc", + "kenlm/util/double-conversion/*.cc", + "kenlm/util/double-conversion/*.h", + ], + exclude = [ + "kenlm/*/*test.cc", + "kenlm/*/*main.cc", + ],), + hdrs = glob([ + "kenlm/lm/*.hh", + "kenlm/util/*.hh", + ]), + copts = ["-std=c++11"], + defines = ["KENLM_MAX_ORDER=6"], + includes = ["kenlm"], +) + cc_library( name = "decoder", srcs = [ @@ -69,17 +76,16 @@ cc_library( "ctcdecode/scorer.cpp", "ctcdecode/path_trie.cpp", "ctcdecode/path_trie.h", - ] + KENLM_SOURCES + OPENFST_SOURCES_PLATFORM, + ] + OPENFST_SOURCES_PLATFORM, hdrs = [ "ctcdecode/ctc_beam_search_decoder.h", "ctcdecode/scorer.h", ], - defines = ["KENLM_MAX_ORDER=6"], includes = [ ".", "ctcdecode/third_party/ThreadPool", - "kenlm", ] + OPENFST_INCLUDES_PLATFORM, + deps = [":kenlm"] ) tf_cc_shared_object( @@ -181,6 +187,15 @@ genrule( cmd = "dsymutil $(location :libdeepspeech.so) -o $@" ) +cc_binary( + name = "enumerate_kenlm_vocabulary", + srcs = [ + "enumerate_kenlm_vocabulary.cpp", + ], + deps = [":kenlm"], + copts = ["-std=c++11"], +) + cc_binary( name = "trie_load", srcs = [ diff --git a/native_client/enumerate_kenlm_vocabulary.cpp b/native_client/enumerate_kenlm_vocabulary.cpp new file mode 100644 index 00000000..79a8cab6 --- /dev/null +++ b/native_client/enumerate_kenlm_vocabulary.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +#include "lm/enumerate_vocab.hh" +#include "lm/virtual_interface.hh" +#include "lm/word_index.hh" +#include "lm/model.hh" + +const std::string START_TOKEN = ""; +const std::string UNK_TOKEN = ""; +const std::string END_TOKEN = ""; + +// Implement a callback to retrieve the dictionary of language model. +class RetrieveStrEnumerateVocab : public lm::EnumerateVocab +{ +public: + RetrieveStrEnumerateVocab() {} + + void Add(lm::WordIndex index, const StringPiece &str) { + vocabulary.push_back(std::string(str.data(), str.length())); + } + + std::vector vocabulary; +}; + +int main(int argc, char** argv) +{ + if (argc != 3) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + return -1; + } + + const char* kenlm_model = argv[1]; + const char* output_path = argv[2]; + + std::unique_ptr language_model_; + lm::ngram::Config config; + RetrieveStrEnumerateVocab enumerate; + config.enumerate_vocab = &enumerate; + language_model_.reset(lm::ngram::LoadVirtual(kenlm_model, config)); + + std::ofstream fout(output_path); + for (const std::string& word : enumerate.vocabulary) { + fout << word << "\n"; + } + + return 0; +} From 1e2eb962485555d8fea364d7ffa6cb1dbc14f317 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jan 2020 11:54:01 +0100 Subject: [PATCH 11/16] Update all API consumers --- DeepSpeech.py | 8 +- Dockerfile | 5 +- bin/run-tc-ldc93s1_checkpoint.sh | 3 +- bin/run-tc-ldc93s1_new.sh | 3 +- bin/run-tc-ldc93s1_singleshotinference.sh | 6 +- bin/run-tc-ldc93s1_tflite.sh | 6 +- data/lm/generate_package.py | 2 +- doc/C-API.rst | 8 +- evaluate.py | 5 +- evaluate_tflite.py | 15 +- native_client/args.h | 73 +++---- native_client/client.cc | 17 +- native_client/ctcdecode/__init__.py | 15 +- native_client/ctcdecode/scorer.h | 13 -- native_client/deepspeech.cc | 2 +- native_client/deepspeech.h | 4 +- native_client/deepspeech_compat.h | 141 -------------- .../dotnet/DeepSpeechClient/DeepSpeech.cs | 69 +++---- .../DeepSpeechClient/Enums/ErrorCodes.cs | 3 +- .../Interfaces/IDeepSpeech.cs | 30 +-- .../dotnet/DeepSpeechClient/NativeImp.cs | 15 +- .../dotnet/DeepSpeechConsole/Program.cs | 18 +- .../deepspeech/DeepSpeechActivity.java | 2 - .../libdeepspeech/test/BasicTest.java | 10 +- .../libdeepspeech/DeepSpeechModel.java | 32 +++- native_client/javascript/client.js | 23 ++- native_client/javascript/index.js | 89 +++++---- native_client/python/__init__.py | 180 ++++++++++-------- native_client/python/client.py | 27 +-- native_client/test/concurrent_streams.py | 24 +-- taskcluster/arm64-build.sh | 1 - taskcluster/cuda-build.sh | 1 - taskcluster/examples-base.tyml | 4 +- taskcluster/host-build.sh | 1 - taskcluster/rpi3-build.sh | 1 - taskcluster/tc-evaluate_tflite.sh | 2 +- taskcluster/tc-tests-utils.sh | 40 ++-- taskcluster/win-build.sh | 1 - taskcluster/win-opt-base.tyml | 2 +- transcribe.py | 2 +- util/flags.py | 6 +- 41 files changed, 393 insertions(+), 516 deletions(-) delete mode 100644 native_client/deepspeech_compat.h diff --git a/DeepSpeech.py b/DeepSpeech.py index 67971b48..e6d3a929 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -882,8 +882,7 @@ def package_zip(): } }, f) - shutil.copy(FLAGS.lm_binary_path, export_dir) - shutil.copy(FLAGS.lm_trie_path, export_dir) + shutil.copy(FLAGS.scorer_path, export_dir) archive = shutil.make_archive(zip_filename, 'zip', export_dir) log_info('Exported packaged model {}'.format(archive)) @@ -926,10 +925,9 @@ def do_single_file_inference(input_file_path): logits = np.squeeze(logits) - if FLAGS.lm_binary_path: + if FLAGS.scorer_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, - FLAGS.lm_binary_path, FLAGS.lm_trie_path, - Config.alphabet) + FLAGS.scorer_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, diff --git a/Dockerfile b/Dockerfile index 58b27891..56afdbfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -172,7 +172,7 @@ RUN ./configure # Build DeepSpeech -RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} +RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ### ### Using TensorFlow upstream should work @@ -187,8 +187,7 @@ RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_s # RUN pip3 install /tmp/tensorflow_pkg/*.whl # Copy built libs to /DeepSpeech/native_client -RUN cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_client/ \ - && cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ +RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ # Install TensorFlow WORKDIR /DeepSpeech/ diff --git a/bin/run-tc-ldc93s1_checkpoint.sh b/bin/run-tc-ldc93s1_checkpoint.sh index ae0836a1..0602dada 100755 --- a/bin/run-tc-ldc93s1_checkpoint.sh +++ b/bin/run-tc-ldc93s1_checkpoint.sh @@ -21,8 +21,7 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --n_hidden 100 --epochs 1 \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \ --learning_rate 0.001 --dropout_rate 0.05 \ - --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \ - --lm_trie_path 'data/smoke_test/vocab.trie' | tee /tmp/resume.log + --scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log if ! grep "Restored variables from most recent checkpoint" /tmp/resume.log; then echo "Did not resume training from checkpoint" diff --git a/bin/run-tc-ldc93s1_new.sh b/bin/run-tc-ldc93s1_new.sh index ff8751ed..8e9cf4d4 100755 --- a/bin/run-tc-ldc93s1_new.sh +++ b/bin/run-tc-ldc93s1_new.sh @@ -25,6 +25,5 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train' \ - --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \ - --lm_trie_path 'data/smoke_test/vocab.trie' \ + --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} diff --git a/bin/run-tc-ldc93s1_singleshotinference.sh b/bin/run-tc-ldc93s1_singleshotinference.sh index fc30c48f..997bf08f 100755 --- a/bin/run-tc-ldc93s1_singleshotinference.sh +++ b/bin/run-tc-ldc93s1_singleshotinference.sh @@ -21,12 +21,10 @@ python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --n_hidden 100 --epochs 1 \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' --checkpoint_secs 0 \ --learning_rate 0.001 --dropout_rate 0.05 \ - --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \ - --lm_trie_path 'data/smoke_test/vocab.trie' + --scorer_path 'data/smoke_test/pruned_lm.scorer' python -u DeepSpeech.py \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt' \ - --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \ - --lm_trie_path 'data/smoke_test/vocab.trie' \ + --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --one_shot_infer 'data/smoke_test/LDC93S1.wav' diff --git a/bin/run-tc-ldc93s1_tflite.sh b/bin/run-tc-ldc93s1_tflite.sh index b5a7772c..f7daca21 100755 --- a/bin/run-tc-ldc93s1_tflite.sh +++ b/bin/run-tc-ldc93s1_tflite.sh @@ -20,8 +20,7 @@ python -u DeepSpeech.py --noshow_progressbar \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt' \ --export_dir '/tmp/train_tflite' \ - --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \ - --lm_trie_path 'data/smoke_test/vocab.trie' \ + --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} \ --export_tflite @@ -31,8 +30,7 @@ python -u DeepSpeech.py --noshow_progressbar \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt' \ --export_dir '/tmp/train_tflite/en-us' \ - --lm_binary_path 'data/smoke_test/vocab.pruned.lm' \ - --lm_trie_path 'data/smoke_test/vocab.trie' \ + --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} \ --export_language 'Fake English (fk-FK)' \ --export_zip diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index 4d064fdd..d8f39c4e 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -50,7 +50,7 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, scorer.set_alphabet(alphabet) scorer.set_utf8_mode(use_utf8) scorer.reset_params(default_alpha, default_beta) - scorer.load_lm(lm_path, "") + scorer.load_lm(lm_path) scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) scorer.save_dictionary(package_path, True) # append, not overwrite diff --git a/doc/C-API.rst b/doc/C-API.rst index 0541247e..6556d4bb 100644 --- a/doc/C-API.rst +++ b/doc/C-API.rst @@ -7,7 +7,13 @@ C .. doxygenfunction:: DS_FreeModel :project: deepspeech-c -.. doxygenfunction:: DS_EnableDecoderWithLM +.. doxygenfunction:: DS_EnableExternalScorer + :project: deepspeech-c + +.. doxygenfunction:: DS_DisableExternalScorer + :project: deepspeech-c + +.. doxygenfunction:: DS_SetScorerAlphaBeta :project: deepspeech-c .. doxygenfunction:: DS_GetModelSampleRate diff --git a/evaluate.py b/evaluate.py index 8df73966..cdb13e31 100755 --- a/evaluate.py +++ b/evaluate.py @@ -42,10 +42,9 @@ def sparse_tuple_to_texts(sp_tuple, alphabet): def evaluate(test_csvs, create_model, try_loading): - if FLAGS.lm_binary_path: + if FLAGS.scorer_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, - FLAGS.lm_binary_path, FLAGS.lm_trie_path, - Config.alphabet) + FLAGS.scorer_path, Config.alphabet) else: scorer = None diff --git a/evaluate_tflite.py b/evaluate_tflite.py index 8b2ba453..bdc5f231 100644 --- a/evaluate_tflite.py +++ b/evaluate_tflite.py @@ -27,17 +27,18 @@ This module should be self-contained: - pip install native_client/python/dist/deepspeech*.whl - pip install -r requirements_eval_tflite.txt -Then run with a TF Lite model, LM/trie and a CSV test file +Then run with a TF Lite model, LM and a CSV test file ''' BEAM_WIDTH = 500 LM_ALPHA = 0.75 LM_BETA = 1.85 -def tflite_worker(model, lm, trie, queue_in, queue_out, gpu_mask): +def tflite_worker(model, scorer, queue_in, queue_out, gpu_mask): os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask) ds = Model(model, BEAM_WIDTH) - ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA) + ds.enableExternalScorer(scorer) + ds.setScorerAlphaBeta(LM_ALPHA, LM_BETA) while True: try: @@ -64,7 +65,7 @@ def main(args, _): processes = [] for i in range(args.proc): - worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) + worker_process = Process(target=tflite_worker, args=(args.model, args.scorer, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() # Launch reader() as a separate python process processes.append(worker_process) @@ -113,10 +114,8 @@ def parse_args(): parser = argparse.ArgumentParser(description='Computing TFLite accuracy') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') - parser.add_argument('--lm', required=True, - help='Path to the language model binary file') - parser.add_argument('--trie', required=True, - help='Path to the language model trie file created with native_client/generate_trie') + parser.add_argument('--scorer', required=True, + help='Path to the external scorer file') parser.add_argument('--csv', required=True, help='Path to the CSV source file') parser.add_argument('--proc', required=False, default=cpu_count(), type=int, diff --git a/native_client/args.h b/native_client/args.h index 6342763f..a158fb18 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -12,19 +12,17 @@ char* model = NULL; -char* lm = NULL; - -char* trie = NULL; +char* scorer = NULL; char* audio = NULL; int beam_width = 500; -float lm_alpha = 0.75f; +bool set_alphabeta = false; -float lm_beta = 1.85f; +float lm_alpha = 0.f; -bool load_without_trie = false; +float lm_beta = 0.f; bool show_times = false; @@ -39,39 +37,36 @@ int stream_size = 0; void PrintHelp(const char* bin) { std::cout << - "Usage: " << bin << " --model MODEL [--lm LM --trie TRIE] --audio AUDIO [-t] [-e]\n" + "Usage: " << bin << " --model MODEL [--scorer SCORER] --audio AUDIO [-t] [-e]\n" "\n" "Running DeepSpeech inference.\n" "\n" - " --model MODEL Path to the model (protocol buffer binary file)\n" - " --lm LM Path to the language model binary file\n" - " --trie TRIE Path to the language model trie file created with native_client/generate_trie\n" - " --audio AUDIO Path to the audio file to run (WAV format)\n" - " --beam_width BEAM_WIDTH Value for decoder beam width (int)\n" - " --lm_alpha LM_ALPHA Value for language model alpha param (float)\n" - " --lm_beta LM_BETA Value for language model beta param (float)\n" - " -t Run in benchmark mode, output mfcc & inference time\n" - " --extended Output string from extended metadata\n" - " --json Extended output, shows word timings as JSON\n" - " --stream size Run in stream mode, output intermediate results\n" - " --help Show help\n" - " --version Print version and exits\n"; + "\t--model MODEL\t\tPath to the model (protocol buffer binary file)\n" + "\t--scorer SCORER\t\tPath to the external scorer file\n" + "\t--audio AUDIO\t\tPath to the audio file to run (WAV format)\n" + "\t--beam_width BEAM_WIDTH\tValue for decoder beam width (int)\n" + "\t--lm_alpha LM_ALPHA\tValue for language model alpha param (float)\n" + "\t--lm_beta LM_BETA\tValue for language model beta param (float)\n" + "\t-t\t\t\tRun in benchmark mode, output mfcc & inference time\n" + "\t--extended\t\tOutput string from extended metadata\n" + "\t--json\t\t\tExtended output, shows word timings as JSON\n" + "\t--stream size\t\tRun in stream mode, output intermediate results\n" + "\t--help\t\t\tShow help\n" + "\t--version\t\tPrint version and exits\n"; DS_PrintVersions(); exit(1); } bool ProcessArgs(int argc, char** argv) { - const char* const short_opts = "m:a:l:r:w:c:d:b:tehv"; + const char* const short_opts = "m:a:s:r:w:c:d:b:tehv"; const option long_opts[] = { {"model", required_argument, nullptr, 'm'}, - {"lm", required_argument, nullptr, 'l'}, - {"trie", required_argument, nullptr, 'r'}, + {"scorer", required_argument, nullptr, 'l'}, {"audio", required_argument, nullptr, 'w'}, {"beam_width", required_argument, nullptr, 'b'}, {"lm_alpha", required_argument, nullptr, 'c'}, {"lm_beta", required_argument, nullptr, 'd'}, - {"run_very_slowly_without_trie_I_really_know_what_Im_doing", no_argument, nullptr, 999}, {"t", no_argument, nullptr, 't'}, {"extended", no_argument, nullptr, 'e'}, {"json", no_argument, nullptr, 'j'}, @@ -95,31 +90,25 @@ bool ProcessArgs(int argc, char** argv) break; case 'l': - lm = optarg; - break; - - case 'r': - trie = optarg; + scorer = optarg; break; case 'w': audio = optarg; break; - case 'b': - beam_width = atoi(optarg); - break; - - case 'c': - lm_alpha = atof(optarg); - break; - - case 'd': - lm_beta = atof(optarg); - break; + case 'b': + beam_width = atoi(optarg); + break; + + case 'c': + set_alphabeta = true; + lm_alpha = atof(optarg); + break; - case 999: - load_without_trie = true; + case 'd': + set_alphabeta = true; + lm_beta = atof(optarg); break; case 't': diff --git a/native_client/client.cc b/native_client/client.cc index 99af904e..718fba75 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -374,16 +374,19 @@ main(int argc, char **argv) return 1; } - if (lm && (trie || load_without_trie)) { - int status = DS_EnableDecoderWithLM(ctx, - lm, - trie, - lm_alpha, - lm_beta); + if (scorer) { + int status = DS_EnableExternalScorer(ctx, scorer); if (status != 0) { - fprintf(stderr, "Could not enable CTC decoder with LM.\n"); + fprintf(stderr, "Could not enable external scorer.\n"); return 1; } + if (set_alphabeta) { + status = DS_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta); + if (status != 0) { + fprintf(stderr, "Error setting scorer alpha and beta.\n"); + return 1; + } + } } #ifndef NO_SOX diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 3fab4eb7..8ba2e9b2 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -12,12 +12,11 @@ class Scorer(swigwrapper.Scorer): :type alpha: float :param beta: Word insertion bonus. :type beta: float - :model_path: Path to load language model. - :trie_path: Path to trie file. + :model_path: Path to load scorer. :alphabet: Alphabet :type model_path: basestring """ - def __init__(self, alpha=None, beta=None, model_path=None, trie_path=None, alphabet=None): + def __init__(self, alpha=None, beta=None, model_path=None, alphabet=None): super(Scorer, self).__init__() # Allow bare initialization if alphabet: @@ -27,15 +26,15 @@ class Scorer(swigwrapper.Scorer): if err != 0: raise ValueError("Error when deserializing alphabet.") - err = self.init(alpha, beta, - model_path.encode('utf-8'), - trie_path.encode('utf-8'), + err = self.init(model_path.encode('utf-8'), native_alphabet) if err != 0: raise ValueError("Scorer initialization failed with error code {}".format(err), err) - def load_lm(self, lm_path, trie_path): - super(Scorer, self).load_lm(lm_path.encode('utf-8'), trie_path.encode('utf-8')) + self.reset_params(alpha, beta) + + def load_lm(self, lm_path): + super(Scorer, self).load_lm(lm_path.encode('utf-8')) def save_dictionary(self, save_path, *args, **kwargs): super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs) diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index db58d581..b2e5c817 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -6,7 +6,6 @@ #include #include -#include "lm/enumerate_vocab.hh" #include "lm/virtual_interface.hh" #include "lm/word_index.hh" #include "util/string_piece.hh" @@ -19,18 +18,6 @@ const std::string START_TOKEN = ""; const std::string UNK_TOKEN = ""; const std::string END_TOKEN = ""; -// Implement a callback to retrieve the dictionary of language model. -class RetrieveStrEnumerateVocab : public lm::EnumerateVocab { -public: - RetrieveStrEnumerateVocab() {} - - void Add(lm::WordIndex index, const StringPiece &str) { - vocabulary.push_back(std::string(str.data(), str.length())); - } - - std::vector vocabulary; -}; - /* External scorer to query score for n-gram or sentence, including language * model scoring and word insertion. * diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index e8b3dc02..0a61f3de 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -310,7 +310,7 @@ DS_EnableExternalScorer(ModelState* aCtx, aCtx->scorer_.reset(new Scorer()); int err = aCtx->scorer_->init(aScorerPath, aCtx->alphabet_); if (err != 0) { - return DS_ERR_INVALID_LM; + return DS_ERR_INVALID_SCORER; } return DS_ERR_OK; } diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index 94f6664e..4e017653 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -59,7 +59,7 @@ enum DeepSpeech_Error_Codes // Invalid parameters DS_ERR_INVALID_ALPHABET = 0x2000, DS_ERR_INVALID_SHAPE = 0x2001, - DS_ERR_INVALID_LM = 0x2002, + DS_ERR_INVALID_SCORER = 0x2002, DS_ERR_MODEL_INCOMPATIBLE = 0x2003, DS_ERR_SCORER_NOT_ENABLED = 0x2004, @@ -129,7 +129,7 @@ DEEPSPEECH_EXPORT int DS_DisableExternalScorer(ModelState* aCtx); /** - * @brief Set hyperparameters alpha and beta of a KenLM external scorer. + * @brief Set hyperparameters alpha and beta of the external scorer. * * @param aCtx The ModelState pointer for the model being changed. * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. diff --git a/native_client/deepspeech_compat.h b/native_client/deepspeech_compat.h deleted file mode 100644 index c83bcbc8..00000000 --- a/native_client/deepspeech_compat.h +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef DEEPSPEECH_COMPAT_H -#define DEEPSPEECH_COMPAT_H - -#include "deepspeech.h" - -#warning This header is a convenience wrapper for compatibility with \ - the previous API, it has deprecated function names and arguments. \ - If possible, update your code instead of using this header. - -/** - * @brief An object providing an interface to a trained DeepSpeech model. - * - * @param aModelPath The path to the frozen model graph. - * @param aNCep UNUSED, DEPRECATED. - * @param aNContext UNUSED, DEPRECATED. - * @param aAlphabetConfigPath UNUSED, DEPRECATED. - * @param aBeamWidth The beam width used by the decoder. A larger beam - * width generates better results at the cost of decoding - * time. - * @param[out] retval a ModelState pointer - * - * @return Zero on success, non-zero on failure. - */ -int DS_CreateModel(const char* aModelPath, - unsigned int /*aNCep*/, - unsigned int /*aNContext*/, - const char* /*aAlphabetConfigPath*/, - unsigned int aBeamWidth, - ModelState** retval) -{ - return DS_CreateModel(aModelPath, aBeamWidth, retval); -} - -/** - * @brief Frees associated resources and destroys model object. - */ -void DS_DestroyModel(ModelState* ctx) -{ - return DS_FreeModel(ctx); -} - -/** - * @brief Enable decoding using beam scoring with a KenLM language model. - * - * @param aCtx The ModelState pointer for the model being changed. - * @param aAlphabetConfigPath UNUSED, DEPRECATED. - * @param aLMPath The path to the language model binary file. - * @param aTriePath The path to the trie file build from the same vocabu- - * lary as the language model binary. - * @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model - weight. - * @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion - weight. - * - * @return Zero on success, non-zero on failure (invalid arguments). - */ -int DS_EnableDecoderWithLM(ModelState* aCtx, - const char* /*aAlphabetConfigPath*/, - const char* aLMPath, - const char* aTriePath, - float aLMAlpha, - float aLMBeta) -{ - return DS_EnableDecoderWithLM(aCtx, aLMPath, aTriePath, aLMAlpha, aLMBeta); -} - -/** - * @brief Create a new streaming inference state. The streaming state returned - * by this function can then be passed to {@link DS_FeedAudioContent()} - * and {@link DS_FinishStream()}. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aSampleRate UNUSED, DEPRECATED. - * @param[out] retval an opaque pointer that represents the streaming state. Can - * be NULL if an error occurs. - * - * @return Zero for success, non-zero on failure. - */ -int DS_SetupStream(ModelState* aCtx, - unsigned int /*aSampleRate*/, - StreamingState** retval) -{ - return DS_CreateStream(aCtx, retval); -} - -/** - * @brief Destroy a streaming state without decoding the computed logits. This - * can be used if you no longer need the result of an ongoing streaming - * inference and don't want to perform a costly decode operation. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @note This method will free the state pointer (@p aSctx). - */ -void DS_DiscardStream(StreamingState* aSctx) -{ - return DS_FreeStream(aSctx); -} - -/** - * @brief Use the DeepSpeech model to perform Speech-To-Text. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aBuffer A 16-bit, mono raw audio signal at the appropriate - * sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in the audio signal. - * @param aSampleRate UNUSED, DEPRECATED. - * - * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. Returns NULL on error. - */ -char* DS_SpeechToText(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize, - unsigned int /*aSampleRate*/) -{ - return DS_SpeechToText(aCtx, aBuffer, aBufferSize); -} - -/** - * @brief Use the DeepSpeech model to perform Speech-To-Text and output metadata - * about the results. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aBuffer A 16-bit, mono raw audio signal at the appropriate - * sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in the audio signal. - * @param aSampleRate UNUSED, DEPRECATED. - * - * @return Outputs a struct of individual letters along with their timing information. - * The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error. - */ -Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize, - unsigned int /*aSampleRate*/) -{ - return DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize); -} - -#endif /* DEEPSPEECH_COMPAT_H */ diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs index 754be8ae..e5e33370 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs @@ -82,8 +82,8 @@ namespace DeepSpeechClient throw new ArgumentException("Invalid alphabet embedded in model. (Data corruption?)"); case ErrorCodes.DS_ERR_INVALID_SHAPE: throw new ArgumentException("Invalid model shape."); - case ErrorCodes.DS_ERR_INVALID_LM: - throw new ArgumentException("Invalid language model file."); + case ErrorCodes.DS_ERR_INVALID_SCORER: + throw new ArgumentException("Invalid scorer file."); case ErrorCodes.DS_ERR_FAIL_INIT_MMAP: throw new ArgumentException("Failed to initialize memory mapped model."); case ErrorCodes.DS_ERR_FAIL_INIT_SESS: @@ -100,6 +100,8 @@ namespace DeepSpeechClient throw new ArgumentException("Error failed to create session."); case ErrorCodes.DS_ERR_MODEL_INCOMPATIBLE: throw new ArgumentException("Error incompatible model."); + case ErrorCodes.DS_ERR_SCORER_NOT_ENABLED: + throw new ArgumentException("External scorer is not enabled."); default: throw new ArgumentException("Unknown error, please make sure you are using the correct native binary."); } @@ -114,45 +116,48 @@ namespace DeepSpeechClient } ///

- /// Enable decoding using beam scoring with a KenLM language model. + /// Enable decoding using an external scorer. /// - /// The path to the language model binary file. - /// The path to the trie file build from the same vocabulary as the language model binary. - /// The alpha hyperparameter of the CTC decoder. Language Model weight. - /// The beta hyperparameter of the CTC decoder. Word insertion weight. - /// Thrown when the native binary failed to enable decoding with a language model. - /// Thrown when cannot find the language model or trie file. - public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath, - float aLMAlpha, float aLMBeta) + /// The path to the external scorer file. + /// Thrown when the native binary failed to enable decoding with an external scorer. + /// Thrown when cannot find the scorer file. + public unsafe void EnableExternalScorer(string aScorerPath) { string exceptionMessage = null; - if (string.IsNullOrWhiteSpace(aLMPath)) + if (string.IsNullOrWhiteSpace(aScorerPath)) { - exceptionMessage = "Path to the language model file cannot be empty."; + throw new FileNotFoundException("Path to the scorer file cannot be empty."); } - if (!File.Exists(aLMPath)) + if (!File.Exists(aScorerPath)) { - exceptionMessage = $"Cannot find the language model file: {aLMPath}"; - } - if (string.IsNullOrWhiteSpace(aTriePath)) - { - exceptionMessage = "Path to the trie file cannot be empty."; - } - if (!File.Exists(aTriePath)) - { - exceptionMessage = $"Cannot find the trie file: {aTriePath}"; + throw new FileNotFoundException($"Cannot find the scorer file: {aScorerPath}"); } - if (exceptionMessage != null) - { - throw new FileNotFoundException(exceptionMessage); - } + var resultCode = NativeImp.DS_EnableExternalScorer(_modelStatePP, aScorerPath); + EvaluateResultCode(resultCode); + } - var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP, - aLMPath, - aTriePath, - aLMAlpha, - aLMBeta); + /// + /// Disable decoding using an external scorer. + /// + /// Thrown when an external scorer is not enabled. + public unsafe void DisableExternalScorer() + { + var resultCode = NativeImp.DS_DisableExternalScorer(_modelStatePP); + EvaluateResultCode(resultCode); + } + + /// + /// Set hyperparameters alpha and beta of the external scorer. + /// + /// The alpha hyperparameter of the decoder. Language model weight. + /// The beta hyperparameter of the decoder. Word insertion weight. + /// Thrown when an external scorer is not enabled. + public unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta) + { + var resultCode = NativeImp.DS_SetScorerAlphaBeta(_modelStatePP, + aAlpha, + aBeta); EvaluateResultCode(resultCode); } diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs index 019564c2..30660add 100644 --- a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs +++ b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs @@ -14,8 +14,9 @@ // Invalid parameters DS_ERR_INVALID_ALPHABET = 0x2000, DS_ERR_INVALID_SHAPE = 0x2001, - DS_ERR_INVALID_LM = 0x2002, + DS_ERR_INVALID_SCORER = 0x2002, DS_ERR_MODEL_INCOMPATIBLE = 0x2003, + DS_ERR_SCORER_NOT_ENABLED = 0x2004, // Runtime failures DS_ERR_FAIL_INIT_MMAP = 0x3000, diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs index 734f4240..ecbfb7e9 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs @@ -21,18 +21,26 @@ namespace DeepSpeechClient.Interfaces unsafe int GetModelSampleRate(); /// - /// Enable decoding using beam scoring with a KenLM language model. + /// Enable decoding using an external scorer. /// - /// The path to the language model binary file. - /// The path to the trie file build from the same vocabulary as the language model binary. - /// The alpha hyperparameter of the CTC decoder. Language Model weight. - /// The beta hyperparameter of the CTC decoder. Word insertion weight. - /// Thrown when the native binary failed to enable decoding with a language model. - /// Thrown when cannot find the language model or trie file. - unsafe void EnableDecoderWithLM(string aLMPath, - string aTriePath, - float aLMAlpha, - float aLMBeta); + /// The path to the external scorer file. + /// Thrown when the native binary failed to enable decoding with an external scorer. + /// Thrown when cannot find the scorer file. + unsafe void EnableExternalScorer(string aScorerPath); + + /// + /// Disable decoding using an external scorer. + /// + /// Thrown when an external scorer is not enabled. + unsafe void DisableExternalScorer(); + + /// + /// Set hyperparameters alpha and beta of the external scorer. + /// + /// The alpha hyperparameter of the decoder. Language model weight. + /// The beta hyperparameter of the decoder. Word insertion weight. + /// Thrown when an external scorer is not enabled. + unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta); /// /// Use the DeepSpeech model to perform Speech-To-Text. diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs index 3b79282b..1c49feec 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs @@ -23,11 +23,16 @@ namespace DeepSpeechClient internal unsafe static extern int DS_GetModelSampleRate(IntPtr** aCtx); [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(IntPtr** aCtx, - string aLMPath, - string aTriePath, - float aLMAlpha, - float aLMBeta); + internal static unsafe extern ErrorCodes DS_EnableExternalScorer(IntPtr** aCtx, + string aScorerPath); + + [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes DS_DisableExternalScorer(IntPtr** aCtx); + + [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] + internal static unsafe extern ErrorCodes DS_SetScorerAlphaBeta(IntPtr** aCtx, + float aAlpha, + float aBeta); [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/DeepSpeechConsole/Program.cs index 8c75a481..1f6e299b 100644 --- a/native_client/dotnet/DeepSpeechConsole/Program.cs +++ b/native_client/dotnet/DeepSpeechConsole/Program.cs @@ -35,22 +35,18 @@ namespace CSharpExamples static void Main(string[] args) { string model = null; - string lm = null; - string trie = null; + string scorer = null; string audio = null; bool extended = false; if (args.Length > 0) { model = GetArgument(args, "--model"); - lm = GetArgument(args, "--lm"); - trie = GetArgument(args, "--trie"); + scorer = GetArgument(args, "--scorer"); audio = GetArgument(args, "--audio"); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); } const uint BEAM_WIDTH = 500; - const float LM_ALPHA = 0.75f; - const float LM_BETA = 1.85f; Stopwatch stopwatch = new Stopwatch(); try @@ -64,14 +60,10 @@ namespace CSharpExamples Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); stopwatch.Reset(); - if (lm != null) + if (scorer != null) { - Console.WriteLine("Loadin LM..."); - sttClient.EnableDecoderWithLM( - lm ?? "lm.binary", - trie ?? "trie", - LM_ALPHA, LM_BETA); - + Console.WriteLine("Loading scorer..."); + sttClient.EnableExternalScorer(scorer ?? "kenlm.scorer"); } string audioFile = audio ?? "arctic_a0024.wav"; diff --git a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java index a1065d4e..12e758df 100644 --- a/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java +++ b/native_client/java/app/src/main/java/org/mozilla/deepspeech/DeepSpeechActivity.java @@ -31,8 +31,6 @@ public class DeepSpeechActivity extends AppCompatActivity { Button _startInference; final int BEAM_WIDTH = 50; - final float LM_ALPHA = 0.75f; - final float LM_BETA = 1.85f; private char readLEChar(RandomAccessFile f) throws IOException { byte b1 = f.readByte(); diff --git a/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java b/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java index 50ad71f2..bb6bbe42 100644 --- a/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java +++ b/native_client/java/libdeepspeech/src/androidTest/java/org/mozilla/deepspeech/libdeepspeech/test/BasicTest.java @@ -30,15 +30,11 @@ import java.nio.ByteBuffer; public class BasicTest { public static final String modelFile = "/data/local/tmp/test/output_graph.tflite"; - public static final String lmFile = "/data/local/tmp/test/lm.binary"; - public static final String trieFile = "/data/local/tmp/test/trie"; + public static final String scorerFile = "/data/local/tmp/test/kenlm.scorer"; public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav"; public static final int BEAM_WIDTH = 50; - public static final float LM_ALPHA = 0.75f; - public static final float LM_BETA = 1.85f; - private char readLEChar(RandomAccessFile f) throws IOException { byte b1 = f.readByte(); byte b2 = f.readByte(); @@ -130,7 +126,7 @@ public class BasicTest { @Test public void loadDeepSpeech_stt_withLM() { DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH); - m.enableDecoderWithLM(lmFile, trieFile, LM_ALPHA, LM_BETA); + m.enableExternalScorer(scorerFile); String decoded = doSTT(m, false); assertEquals("she had your dark suit in greasy wash water all year", decoded); @@ -149,7 +145,7 @@ public class BasicTest { @Test public void loadDeepSpeech_sttWithMetadata_withLM() { DeepSpeechModel m = new DeepSpeechModel(modelFile, BEAM_WIDTH); - m.enableDecoderWithLM(lmFile, trieFile, LM_ALPHA, LM_BETA); + m.enableExternalScorer(scorerFile); String decoded = doSTT(m, true); assertEquals("she had your dark suit in greasy wash water all year", decoded); diff --git a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java index e063f86b..0438ac10 100644 --- a/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java +++ b/native_client/java/libdeepspeech/src/main/java/org/mozilla/deepspeech/libdeepspeech/DeepSpeechModel.java @@ -47,17 +47,35 @@ public class DeepSpeechModel { } /** - * @brief Enable decoding using beam scoring with a KenLM language model. + * @brief Enable decoding using an external scorer. * - * @param lm The path to the language model binary file. - * @param trie The path to the trie file build from the same vocabulary as the language model binary. - * @param lm_alpha The alpha hyperparameter of the CTC decoder. Language Model weight. - * @param lm_beta The beta hyperparameter of the CTC decoder. Word insertion weight. + * @param scorer The path to the external scorer file. * * @return Zero on success, non-zero on failure (invalid arguments). */ - public void enableDecoderWithLM(String lm, String trie, float lm_alpha, float lm_beta) { - impl.EnableDecoderWithLM(this._msp, lm, trie, lm_alpha, lm_beta); + public void enableExternalScorer(String scorer) { + impl.EnableExternalScorer(this._msp, scorer); + } + + /** + * @brief Disable decoding using an external scorer. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ + public void disableExternalScorer() { + impl.DisableExternalScorer(this._msp); + } + + /** + * @brief Enable decoding using beam scoring with a KenLM language model. + * + * @param alpha The alpha hyperparameter of the decoder. Language model weight. + * @param beta The beta hyperparameter of the decoder. Word insertion weight. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ + public void setScorerAlphaBeta(float alpha, float beta) { + impl.SetScorerAlphaBeta(this._msp, alpha, beta); } /* diff --git a/native_client/javascript/client.js b/native_client/javascript/client.js index b504650f..79561a97 100644 --- a/native_client/javascript/client.js +++ b/native_client/javascript/client.js @@ -29,12 +29,11 @@ VersionAction.prototype.call = function(parser) { var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); -parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'}); -parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'}); +parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'}); parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'}); parser.addArgument(['--beam_width'], {help: 'Beam width for the CTC decoder', defaultValue: 500, type: 'int'}); -parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha)', defaultValue: 0.75, type: 'float'}); -parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta)', defaultValue: 1.85, type: 'float'}); +parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha). If not set, use default value from scorer.', type: 'float'}); +parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta). If not set, use default value from scorer.', type: 'float'}); parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}); parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'}); var args = parser.parseArgs(); @@ -60,12 +59,16 @@ console.error('Loaded model in %ds.', totalTime(model_load_end)); var desired_sample_rate = model.sampleRate(); -if (args['lm'] && args['trie']) { - console.error('Loading language model from files %s %s', args['lm'], args['trie']); - const lm_load_start = process.hrtime(); - model.enableDecoderWithLM(args['lm'], args['trie'], args['lm_alpha'], args['lm_beta']); - const lm_load_end = process.hrtime(lm_load_start); - console.error('Loaded language model in %ds.', totalTime(lm_load_end)); +if (args['scorer']) { + console.error('Loading scorer from file %s', args['scorer']); + const scorer_load_start = process.hrtime(); + model.enableExternalScorer(args['scorer']); + const scorer_load_end = process.hrtime(scorer_load_start); + console.error('Loaded scorer in %ds.', totalTime(scorer_load_end)); + + if (args['lm_alpha'] && args['lm_beta']) { + model.setScorerAlphaBeta(args['lm_alpha'], args['lm_beta']); + } } const buffer = Fs.readFileSync(args['audio']); diff --git a/native_client/javascript/index.js b/native_client/javascript/index.js index 1d4137c7..2ce039bf 100644 --- a/native_client/javascript/index.js +++ b/native_client/javascript/index.js @@ -52,31 +52,46 @@ Model.prototype.sampleRate = function() { } /** - * Enable decoding using beam scoring with a KenLM language model. + * Enable decoding using an external scorer. + * + * @param {string} aScorerPath The path to the external scorer file. + * + * @return {number} Zero on success, non-zero on failure (invalid arguments). + */ +Model.prototype.enableExternalScorer = function(aScorerPath) { + return binding.EnableExternalScorer(this._impl, aScorerPath); +} + +/** + * Disable decoding using an external scorer. + * + * @return {number} Zero on success, non-zero on failure (invalid arguments). + */ +Model.prototype.disableExternalScorer = function() { + return binding.EnableExternalScorer(this._impl); +} + +/** + * Set hyperparameters alpha and beta of the external scorer. * - * @param {string} aLMPath The path to the language model binary file. - * @param {string} aTriePath The path to the trie file build from the same vocabulary as the language model binary. * @param {float} aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight. * @param {float} aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight. * * @return {number} Zero on success, non-zero on failure (invalid arguments). */ -Model.prototype.enableDecoderWithLM = function() { - const args = [this._impl].concat(Array.prototype.slice.call(arguments)); - return binding.EnableDecoderWithLM.apply(null, args); +Model.prototype.setScorerAlphaBeta = function(aLMAlpha, aLMBeta) { + return binding.SetScorerAlphaBeta(this._impl, aLMAlpha, aLMBeta); } /** * Use the DeepSpeech model to perform Speech-To-Text. * * @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). - * @param {number} aBufferSize The number of samples in the audio signal. * * @return {string} The STT result. Returns undefined on error. */ -Model.prototype.stt = function() { - const args = [this._impl].concat(Array.prototype.slice.call(arguments)); - return binding.SpeechToText.apply(null, args); +Model.prototype.stt = function(aBuffer) { + return binding.SpeechToText(this._impl, aBuffer); } /** @@ -84,25 +99,22 @@ Model.prototype.stt = function() { * about the results. * * @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). - * @param {number} aBufferSize The number of samples in the audio signal. * * @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. */ -Model.prototype.sttWithMetadata = function() { - const args = [this._impl].concat(Array.prototype.slice.call(arguments)); - return binding.SpeechToTextWithMetadata.apply(null, args); +Model.prototype.sttWithMetadata = function(aBuffer) { + return binding.SpeechToTextWithMetadata(this._impl, aBuffer); } /** - * Create a new streaming inference state. The streaming state returned by this function can then be passed to :js:func:`Model.feedAudioContent` and :js:func:`Model.finishStream`. + * Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. * - * @return {object} an opaque object that represents the streaming state. + * @return {object} a :js:func:`Stream` object that represents the streaming state. * * @throws on error */ Model.prototype.createStream = function() { - const args = [this._impl].concat(Array.prototype.slice.call(arguments)); - const rets = binding.CreateStream.apply(null, args); + const rets = binding.CreateStream(this._impl); const status = rets[0]; const ctx = rets[1]; if (status !== 0) { @@ -111,55 +123,56 @@ Model.prototype.createStream = function() { return ctx; } +function Stream(nativeStream) { + this._impl = nativeStream; +} + /** * Feed audio samples to an ongoing streaming inference. * - * @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`. * @param {buffer} aBuffer An array of 16-bit, mono raw audio samples at the * appropriate sample rate (matching what the model was trained on). - * @param {number} aBufferSize The number of samples in @param aBuffer. */ -Model.prototype.feedAudioContent = function() { - binding.FeedAudioContent.apply(null, arguments); +Stream.prototype.feedAudioContent = function(aBuffer) { + binding.FeedAudioContent(this._impl, aBuffer); } /** * Compute the intermediate decoding of an ongoing streaming inference. * - * @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`. - * * @return {string} The STT intermediate result. */ -Model.prototype.intermediateDecode = function() { - return binding.IntermediateDecode.apply(null, arguments); +Stream.prototype.intermediateDecode = function() { + return binding.IntermediateDecode(this._impl); } /** * Signal the end of an audio signal to an ongoing streaming inference, returns the STT result over the whole audio signal. * - * @param {object} aSctx A streaming state returned by :js:func:`Model.setupStream`. - * * @return {string} The STT result. * - * This method will free the state (@param aSctx). + * This method will free the stream, it must not be used after this method is called. */ -Model.prototype.finishStream = function() { - return binding.FinishStream.apply(null, arguments); +Stream.prototype.finishStream = function() { + result = binding.FinishStream(this._impl); + this._impl = null; + return result; } /** * Signal the end of an audio signal to an ongoing streaming inference, returns per-letter metadata. * - * @param {object} aSctx A streaming state pointer returned by :js:func:`Model.setupStream`. - * * @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. * - * This method will free the state pointer (@param aSctx). + * This method will free the stream, it must not be used after this method is called. */ -Model.prototype.finishStreamWithMetadata = function() { - return binding.FinishStreamWithMetadata.apply(null, arguments); +Stream.prototype.finishStreamWithMetadata = function() { + result = binding.FinishStreamWithMetadata(this._impl); + this._impl = null; + return result; } + /** * Frees associated resources and destroys model object. * @@ -184,10 +197,10 @@ function FreeMetadata(metadata) { * can be used if you no longer need the result of an ongoing streaming * inference and don't want to perform a costly decode operation. * - * @param {Object} stream A streaming state pointer returned by :js:func:`Model.createStream`. + * @param {Object} stream A stream object returned by :js:func:`Model.createStream`. */ function FreeStream(stream) { - return binding.FreeStream(stream); + return binding.FreeStream(stream._impl); } /** diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index 0cd220e8..ee38287f 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -21,7 +21,6 @@ import deepspeech # rename for backwards compatibility from deepspeech.impl import PrintVersions as printVersions -from deepspeech.impl import FreeStream as freeStream class Model(object): """ @@ -56,127 +55,159 @@ class Model(object): """ return deepspeech.impl.GetModelSampleRate(self._impl) - def enableDecoderWithLM(self, *args, **kwargs): + def enableExternalScorer(self, scorer_path): """ - Enable decoding using beam scoring with a KenLM language model. + Enable decoding using an external scorer. - :param aLMPath: The path to the language model binary file. - :type aLMPath: str + :param scorer_path: The path to the external scorer file. + :type scorer_path: str - :param aTriePath: The path to the trie file build from the same vocabulary as the language model binary. - :type aTriePath: str - - :param aLMAlpha: The alpha hyperparameter of the CTC decoder. Language Model weight. - :type aLMAlpha: float - - :param aLMBeta: The beta hyperparameter of the CTC decoder. Word insertion weight. - :type aLMBeta: float - - :return: Zero on success, non-zero on failure (invalid arguments). + :return: Zero on success, non-zero on failure. :type: int """ - return deepspeech.impl.EnableDecoderWithLM(self._impl, *args, **kwargs) + return deepspeech.impl.EnableExternalScorer(self._impl, scorer_path) - def stt(self, *args, **kwargs): + def disableExternalScorer(self): + """ + Disable decoding using an external scorer. + + :return: Zero on success, non-zero on failure. + """ + return deepspeech.impl.DisableExternalScorer(self._impl) + + def setScorerAlphaBeta(self, alpha, beta): + """ + Set hyperparameters alpha and beta of the external scorer. + + :param alpha: The alpha hyperparameter of the decoder. Language model weight. + :type alpha: float + + :param beta: The beta hyperparameter of the decoder. Word insertion weight. + :type beta: float + + :return: Zero on success, non-zero on failure. + :type: int + """ + return deepspeech.impl.SetScorerAlphaBeta(self._impl, alpha, beta) + + def stt(self, audio_buffer): """ Use the DeepSpeech model to perform Speech-To-Text. - :param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). - :type aBuffer: int array - - :param aBufferSize: The number of samples in the audio signal. - :type aBufferSize: int + :param audio_buffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + :type audio_buffer: numpy.int16 array :return: The STT result. :type: str """ - return deepspeech.impl.SpeechToText(self._impl, *args, **kwargs) + return deepspeech.impl.SpeechToText(self._impl, audio_buffer) - def sttWithMetadata(self, *args, **kwargs): + def sttWithMetadata(self, audio_buffer): """ Use the DeepSpeech model to perform Speech-To-Text and output metadata about the results. - :param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). - :type aBuffer: int array - - :param aBufferSize: The number of samples in the audio signal. - :type aBufferSize: int + :param audio_buffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + :type audio_buffer: numpy.int16 array :return: Outputs a struct of individual letters along with their timing information. :type: :func:`Metadata` """ - return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs) + return deepspeech.impl.SpeechToTextWithMetadata(self._impl, audio_buffer) def createStream(self): """ - Create a new streaming inference state. The streaming state returned - by this function can then be passed to :func:`feedAudioContent()` and :func:`finishStream()`. + Create a new streaming inference state. The streaming state returned by + this function can then be passed to :func:`feedAudioContent()` and :func:`finishStream()`. - :return: Object holding the stream + :return: Stream object representing the newly created stream + :type: :func:`Stream` :throws: RuntimeError on error """ status, ctx = deepspeech.impl.CreateStream(self._impl) if status != 0: raise RuntimeError("CreateStream failed with error code {}".format(status)) - return ctx + return Stream(ctx) - # pylint: disable=no-self-use - def feedAudioContent(self, *args, **kwargs): + +class Stream(object): + def __init__(self, native_stream): + self._impl = native_stream + + def __del__(self): + if self._impl: + self.freeStream() + + def feedAudioContent(self, audio_buffer): """ Feed audio samples to an ongoing streaming inference. - :param aSctx: A streaming state pointer returned by :func:`createStream()`. - :type aSctx: object + :param audio_buffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + :type audio_buffer: numpy.int16 array - :param aBuffer: An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - :type aBuffer: int array - - :param aBufferSize: The number of samples in @p aBuffer. - :type aBufferSize: int + :throws: RuntimeError if the stream object is not valid """ - deepspeech.impl.FeedAudioContent(*args, **kwargs) + if not self._impl: + raise RuntimeError("Stream object is not valid. Trying to feed an already finished stream?") + deepspeech.impl.FeedAudioContent(self._impl, audio_buffer) - # pylint: disable=no-self-use - def intermediateDecode(self, *args, **kwargs): + def intermediateDecode(self): """ Compute the intermediate decoding of an ongoing streaming inference. - :param aSctx: A streaming state pointer returned by :func:`createStream()`. - :type aSctx: object - :return: The STT intermediate result. :type: str - """ - return deepspeech.impl.IntermediateDecode(*args, **kwargs) - # pylint: disable=no-self-use - def finishStream(self, *args, **kwargs): + :throws: RuntimeError if the stream object is not valid """ - Signal the end of an audio signal to an ongoing streaming - inference, returns the STT result over the whole audio signal. + if not self._impl: + raise RuntimeError("Stream object is not valid. Trying to decode an already finished stream?") + return deepspeech.impl.IntermediateDecode(self._impl) - :param aSctx: A streaming state pointer returned by :func:`createStream()`. - :type aSctx: object + def finishStream(self): + """ + Signal the end of an audio signal to an ongoing streaming inference, + returns the STT result over the whole audio signal. :return: The STT result. :type: str - """ - return deepspeech.impl.FinishStream(*args, **kwargs) - # pylint: disable=no-self-use - def finishStreamWithMetadata(self, *args, **kwargs): + :throws: RuntimeError if the stream object is not valid """ - Signal the end of an audio signal to an ongoing streaming - inference, returns per-letter metadata. + if not self._impl: + raise RuntimeError("Stream object is not valid. Trying to finish an already finished stream?") + result = deepspeech.impl.FinishStream(self._impl) + self._impl = None + return result - :param aSctx: A streaming state pointer returned by :func:`createStream()`. - :type aSctx: object + def finishStreamWithMetadata(self): + """ + Signal the end of an audio signal to an ongoing streaming inference, + returns per-letter metadata. :return: Outputs a struct of individual letters along with their timing information. :type: :func:`Metadata` + + :throws: RuntimeError if the stream object is not valid """ - return deepspeech.impl.FinishStreamWithMetadata(*args, **kwargs) + if not self._impl: + raise RuntimeError("Stream object is not valid. Trying to finish an already finished stream?") + result = deepspeech.impl.FinishStreamWithMetadata(self._impl) + self._impl = None + return result + + def freeStream(self): + """ + Destroy a streaming state without decoding the computed logits. This can + be used if you no longer need the result of an ongoing streaming inference. + + :throws: RuntimeError if the stream object is not valid + """ + if not self._impl: + raise RuntimeError("Stream object is not valid. Trying to free an already finished stream?") + deepspeech.impl.FreeStream(self._impl) + self._impl = None + # This is only for documentation purpose # Metadata and MetadataItem should be in sync with native_client/deepspeech.h @@ -189,22 +220,18 @@ class MetadataItem(object): """ The character generated for transcription """ - # pylint: disable=unnecessary-pass - pass + def timestep(self): """ Position of the character in units of 20ms """ - # pylint: disable=unnecessary-pass - pass + def start_time(self): """ Position of the character in seconds """ - # pylint: disable=unnecessary-pass - pass class Metadata(object): @@ -218,8 +245,7 @@ class Metadata(object): :return: A list of :func:`MetadataItem` elements :type: list """ - # pylint: disable=unnecessary-pass - pass + def num_items(self): """ @@ -228,8 +254,7 @@ class Metadata(object): :return: Size of the list of items :type: int """ - # pylint: disable=unnecessary-pass - pass + def confidence(self): """ @@ -237,5 +262,4 @@ class Metadata(object): sum of the acoustic model logit values for each timestep/character that contributed to the creation of this transcription. """ - # pylint: disable=unnecessary-pass - pass + diff --git a/native_client/python/client.py b/native_client/python/client.py index 91a63491..ba5d70b2 100644 --- a/native_client/python/client.py +++ b/native_client/python/client.py @@ -72,7 +72,7 @@ def metadata_json_output(metadata): json_result["words"] = words_from_metadata(metadata) json_result["confidence"] = metadata.confidence return json.dumps(json_result) - + class VersionAction(argparse.Action): @@ -88,17 +88,15 @@ def main(): parser = argparse.ArgumentParser(description='Running DeepSpeech inference.') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') - parser.add_argument('--lm', nargs='?', - help='Path to the language model binary file') - parser.add_argument('--trie', nargs='?', - help='Path to the language model trie file created with native_client/generate_trie') + parser.add_argument('--scorer', required=False, + help='Path to the external scorer file') parser.add_argument('--audio', required=True, help='Path to the audio file to run (WAV format)') parser.add_argument('--beam_width', type=int, default=500, help='Beam width for the CTC decoder') - parser.add_argument('--lm_alpha', type=float, default=0.75, + parser.add_argument('--lm_alpha', type=float, help='Language model weight (lm_alpha)') - parser.add_argument('--lm_beta', type=float, default=1.85, + parser.add_argument('--lm_beta', type=float, help='Word insertion bonus (lm_beta)') parser.add_argument('--version', action=VersionAction, help='Print version and exits') @@ -116,12 +114,15 @@ def main(): desired_sample_rate = ds.sampleRate() - if args.lm and args.trie: - print('Loading language model from files {} {}'.format(args.lm, args.trie), file=sys.stderr) - lm_load_start = timer() - ds.enableDecoderWithLM(args.lm, args.trie, args.lm_alpha, args.lm_beta) - lm_load_end = timer() - lm_load_start - print('Loaded language model in {:.3}s.'.format(lm_load_end), file=sys.stderr) + if args.scorer: + print('Loading scorer from files {}'.format(args.scorer), file=sys.stderr) + scorer_load_start = timer() + ds.enableExternalScorer(args.scorer) + scorer_load_end = timer() - scorer_load_start + print('Loaded scorer in {:.3}s.'.format(scorer_load_end), file=sys.stderr) + + if args.lm_alpha and args.lm_beta: + ds.setScorerAlphaBeta(args.lm_alpha, args.lm_beta) fin = wave.open(args.audio, 'rb') fs = fin.getframerate() diff --git a/native_client/test/concurrent_streams.py b/native_client/test/concurrent_streams.py index 2b2b4ed0..d799de36 100644 --- a/native_client/test/concurrent_streams.py +++ b/native_client/test/concurrent_streams.py @@ -14,21 +14,13 @@ from deepspeech import Model # Beam width used in the CTC decoder when building candidate transcriptions BEAM_WIDTH = 500 -# The alpha hyperparameter of the CTC decoder. Language Model weight -LM_ALPHA = 0.75 - -# The beta hyperparameter of the CTC decoder. Word insertion bonus. -LM_BETA = 1.85 - def main(): parser = argparse.ArgumentParser(description='Running DeepSpeech inference.') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') - parser.add_argument('--lm', nargs='?', - help='Path to the language model binary file') - parser.add_argument('--trie', nargs='?', - help='Path to the language model trie file created with native_client/generate_trie') + parser.add_argument('--scorer', nargs='?', + help='Path to the external scorer file') parser.add_argument('--audio1', required=True, help='First audio file to use in interleaved streams') parser.add_argument('--audio2', required=True, @@ -37,8 +29,8 @@ def main(): ds = Model(args.model, BEAM_WIDTH) - if args.lm and args.trie: - ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA) + if args.scorer: + ds.enableExternalScorer(args.scorer) fin = wave.open(args.audio1, 'rb') fs1 = fin.getframerate() @@ -57,11 +49,11 @@ def main(): splits2 = np.array_split(audio2, 10) for part1, part2 in zip(splits1, splits2): - ds.feedAudioContent(stream1, part1) - ds.feedAudioContent(stream2, part2) + stream1.feedAudioContent(part1) + stream2.feedAudioContent(part2) - print(ds.finishStream(stream1)) - print(ds.finishStream(stream2)) + print(stream1.finishStream()) + print(stream2.finishStream()) if __name__ == '__main__': main() diff --git a/taskcluster/arm64-build.sh b/taskcluster/arm64-build.sh index 178b9b35..26518d2d 100644 --- a/taskcluster/arm64-build.sh +++ b/taskcluster/arm64-build.sh @@ -8,7 +8,6 @@ source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so -//native_client:generate_trie " BAZEL_BUILD_FLAGS="${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" diff --git a/taskcluster/cuda-build.sh b/taskcluster/cuda-build.sh index cfc77824..df3e049f 100755 --- a/taskcluster/cuda-build.sh +++ b/taskcluster/cuda-build.sh @@ -8,7 +8,6 @@ source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so -//native_client:generate_trie " BAZEL_ENV_FLAGS="TF_NEED_CUDA=1 ${TF_CUDA_FLAGS}" diff --git a/taskcluster/examples-base.tyml b/taskcluster/examples-base.tyml index 5f3a1bdb..9739f36a 100644 --- a/taskcluster/examples-base.tyml +++ b/taskcluster/examples-base.tyml @@ -30,11 +30,11 @@ then: image: ${build.docker_image} env: - DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.15/models.tar.gz" + DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.6.1/models.tar.gz" DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz" PIP_DEFAULT_TIMEOUT: "60" EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "master" + EXAMPLES_CHECKOUT_TARGET: "f3dee7910d1642e14b1e3877568f8342c1c22e05" command: - "/bin/bash" diff --git a/taskcluster/host-build.sh b/taskcluster/host-build.sh index ac01f2f5..1575832c 100755 --- a/taskcluster/host-build.sh +++ b/taskcluster/host-build.sh @@ -10,7 +10,6 @@ source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so -//native_client:generate_trie " if [ "${runtime}" = "tflite" ]; then diff --git a/taskcluster/rpi3-build.sh b/taskcluster/rpi3-build.sh index 2fbaf8b1..3b17d7ef 100755 --- a/taskcluster/rpi3-build.sh +++ b/taskcluster/rpi3-build.sh @@ -8,7 +8,6 @@ source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so -//native_client:generate_trie " BAZEL_BUILD_FLAGS="${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS}" diff --git a/taskcluster/tc-evaluate_tflite.sh b/taskcluster/tc-evaluate_tflite.sh index 6b4f6d32..dce4b63f 100755 --- a/taskcluster/tc-evaluate_tflite.sh +++ b/taskcluster/tc-evaluate_tflite.sh @@ -49,7 +49,7 @@ deepspeech --version pushd ${HOME}/DeepSpeech/ds/ python bin/import_ldc93s1.py data/smoke_test - python evaluate_tflite.py --model "${TASKCLUSTER_TMP_DIR}/${model_name_mmap}" --lm data/smoke_test/vocab.pruned.lm --trie data/smoke_test/vocab.trie --csv data/smoke_test/ldc93s1.csv + python evaluate_tflite.py --model "${TASKCLUSTER_TMP_DIR}/${model_name_mmap}" --scorer data/smoke_test/pruned_lm.scorer --csv data/smoke_test/ldc93s1.csv popd virtualenv_deactivate "${pyalias}" "${PYENV_NAME}" diff --git a/taskcluster/tc-tests-utils.sh b/taskcluster/tc-tests-utils.sh index 4841afaf..877aa4ad 100755 --- a/taskcluster/tc-tests-utils.sh +++ b/taskcluster/tc-tests-utils.sh @@ -378,7 +378,7 @@ run_netframework_inference_tests() assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } @@ -401,7 +401,7 @@ run_electronjs_inference_tests() assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } @@ -427,7 +427,7 @@ run_basic_inference_tests() assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" @@ -444,7 +444,7 @@ run_all_inference_tests() assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status" @@ -457,7 +457,7 @@ run_all_inference_tests() assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}" set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; @@ -470,8 +470,7 @@ run_prod_concurrent_stream_tests() set +e output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \ --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \ - --lm ${TASKCLUSTER_TMP_DIR}/lm.binary \ - --trie ${TASKCLUSTER_TMP_DIR}/trie \ + --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \ --audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_16000.wav \ --audio2 ${TASKCLUSTER_TMP_DIR}/new-home-in-the-stars-16k.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? @@ -489,19 +488,19 @@ run_prod_inference_tests() local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" "${_bitrate}" @@ -509,7 +508,7 @@ run_prod_inference_tests() # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; @@ -520,19 +519,19 @@ run_prodtflite_inference_tests() local _bitrate=$1 set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e - phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) + phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" "${_bitrate}" @@ -540,7 +539,7 @@ run_prodtflite_inference_tests() # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e - phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) + phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; @@ -555,7 +554,7 @@ run_multi_inference_tests() assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status" set +e -o pipefail - multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') + multi_phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/ 2>${TASKCLUSTER_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status" @@ -564,7 +563,7 @@ run_multi_inference_tests() run_cpp_only_inference_tests() { set +e - phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) + phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 1280 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status" @@ -669,8 +668,7 @@ download_data() ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source}" ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}" cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/ - cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary - cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie + cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources } @@ -1562,7 +1560,6 @@ package_native_client() fi; ${TAR} -cf - \ - -C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie${PLATFORM_EXE_SUFFIX} \ -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \ -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so.if.lib \ -C ${deepspeech_dir}/ LICENSE \ @@ -1767,8 +1764,7 @@ android_setup_apk_data() adb push \ ${TASKCLUSTER_TMP_DIR}/${model_name} \ ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \ - ${TASKCLUSTER_TMP_DIR}/lm.binary \ - ${TASKCLUSTER_TMP_DIR}/trie \ + ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \ ${ANDROID_TMP_DIR}/test/ } diff --git a/taskcluster/win-build.sh b/taskcluster/win-build.sh index e3a4133d..39c3f261 100755 --- a/taskcluster/win-build.sh +++ b/taskcluster/win-build.sh @@ -10,7 +10,6 @@ source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so -//native_client:generate_trie " if [ "${package_option}" = "--cuda" ]; then diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index f1a3c680..e0c12162 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -44,7 +44,7 @@ payload: MSYS: 'winsymlinks:nativestrict' TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow} EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "master" + EXAMPLES_CHECKOUT_TARGET: "f3dee7910d1642e14b1e3877568f8342c1c22e05" command: - >- diff --git a/transcribe.py b/transcribe.py index 8c761a9a..c66bbe61 100755 --- a/transcribe.py +++ b/transcribe.py @@ -29,7 +29,7 @@ def fail(message, code=1): def transcribe_file(audio_path, tlog_path): from DeepSpeech import create_model, try_loading # pylint: disable=cyclic-import,import-outside-toplevel initialize_globals() - scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) + scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path, Config.alphabet) try: num_processes = cpu_count() except NotImplementedError: diff --git a/util/flags.py b/util/flags.py index 49d54fd0..c3ed2af8 100644 --- a/util/flags.py +++ b/util/flags.py @@ -143,10 +143,8 @@ def create_flags(): f.DEFINE_boolean('utf8', False, 'enable UTF-8 mode. When this is used the model outputs UTF-8 sequences directly rather than using an alphabet mapping.') f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.') - f.DEFINE_string('lm_binary_path', 'data/lm/lm.binary', 'path to the language model binary file created with KenLM') - f.DEFINE_alias('lm', 'lm_binary_path') - f.DEFINE_string('lm_trie_path', 'data/lm/trie', 'path to the language model trie file created with native_client/generate_trie') - f.DEFINE_alias('trie', 'lm_trie_path') + f.DEFINE_string('scorer_path', 'data/lm/kenlm.scorer', 'path to the external scorer file created with data/lm/generate_package.py') + f.DEFINE_alias('scorer', 'scorer_path') f.DEFINE_integer('beam_width', 1024, 'beam width used in the CTC decoder when building candidate transcriptions') f.DEFINE_float('lm_alpha', 0.75, 'the alpha hyperparameter of the CTC decoder. Language Model weight.') f.DEFINE_float('lm_beta', 1.85, 'the beta hyperparameter of the CTC decoder. Word insertion weight.') From 3b54f545240a62a595390d63c52658b0a4754055 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jan 2020 12:49:51 +0100 Subject: [PATCH 12/16] Fix linter errors X-DeepSpeech: NOBUILD --- .pylintrc | 2 +- data/lm/generate_package.py | 64 +++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/.pylintrc b/.pylintrc index 98a88b4f..7ba190cf 100644 --- a/.pylintrc +++ b/.pylintrc @@ -7,7 +7,7 @@ extension-pkg-whitelist= # Add files or directories to the blacklist. They should be base names, not # paths. -ignore=examples +ignore=native_client/kenlm # Add files or directories matching the regex patterns to the blacklist. The # regex matches against base names, not paths. diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index d8f39c4e..2b9acf33 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -5,7 +5,8 @@ from __future__ import absolute_import, division, print_function # This script needs to be run from the root of the DeepSpeech repository import os import sys -sys.path.insert(1, os.path.join(sys.path[0], '..', '..')) + +sys.path.insert(1, os.path.join(sys.path[0], "..", "..")) import argparse import shutil @@ -14,13 +15,21 @@ from util.text import Alphabet, UTF8Alphabet from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet -def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, default_alpha, default_beta): +def create_bundle( + alphabet_path, + lm_path, + vocab_path, + package_path, + force_utf8, + default_alpha, + default_beta, +): words = set() vocab_looks_char_based = True with open(vocab_path) as fin: for line in fin: for word in line.split(): - words.add(word.encode('utf-8')) + words.add(word.encode("utf-8")) if len(word) > 1: vocab_looks_char_based = False print("{} unique words read from vocabulary file.".format(len(words))) @@ -30,7 +39,7 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, ) ) - if force_utf8 != None: + if force_utf8 != None: # pylint: disable=singleton-comparison use_utf8 = force_utf8.value else: use_utf8 = vocab_looks_char_based @@ -53,26 +62,29 @@ def create_bundle(alphabet_path, lm_path, vocab_path, package_path, force_utf8, scorer.load_lm(lm_path) scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) - scorer.save_dictionary(package_path, True) # append, not overwrite - print('Package created in {}'.format(package_path)) + scorer.save_dictionary(package_path, True) # append, not overwrite + print("Package created in {}".format(package_path)) class Tristate(object): def __init__(self, value=None): - if any(value is v for v in (True, False, None)): - self.value = value - else: - raise ValueError("Tristate value must be True, False, or None") + if any(value is v for v in (True, False, None)): + self.value = value + else: + raise ValueError("Tristate value must be True, False, or None") def __eq__(self, other): - return (self.value is other.value if isinstance(other, Tristate) - else self.value is other) + return ( + self.value is other.value + if isinstance(other, Tristate) + else self.value is other + ) def __ne__(self, other): - return not self == other + return not self == other def __bool__(self): - raise TypeError("Tristate object may not be used as a Boolean") + raise TypeError("Tristate object may not be used as a Boolean") def __str__(self): return str(self.value) @@ -100,8 +112,18 @@ def main(): help="Path of vocabulary file. Must contain words separated by whitespace.", ) parser.add_argument("--package", required=True, help="Path to save scorer package.") - parser.add_argument("--default_alpha", type=float, required=True, help="Default value of alpha hyperparameter.") - parser.add_argument("--default_beta", type=float, required=True, help="Default value of beta hyperparameter.") + parser.add_argument( + "--default_alpha", + type=float, + required=True, + help="Default value of alpha hyperparameter.", + ) + parser.add_argument( + "--default_beta", + type=float, + required=True, + help="Default value of beta hyperparameter.", + ) parser.add_argument( "--force_utf8", default="", @@ -116,7 +138,15 @@ def main(): else: force_utf8 = Tristate(None) - create_bundle(args.alphabet, args.lm, args.vocab, args.package, force_utf8, args.default_alpha, args.default_beta) + create_bundle( + args.alphabet, + args.lm, + args.vocab, + args.package, + force_utf8, + args.default_alpha, + args.default_beta, + ) if __name__ == "__main__": From efbed73d5cd780af76e3bd9a312aec541b374ead Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jan 2020 22:27:06 +0100 Subject: [PATCH 13/16] Improve error handling around Scorer loading --- native_client/ctcdecode/scorer.cpp | 36 +++++++++++++----------------- native_client/ctcdecode/scorer.h | 5 +++-- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index d53fe917..c5ae54a2 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -31,10 +31,8 @@ int Scorer::init(const std::string& lm_path, const Alphabet& alphabet) { - alphabet_ = alphabet; - setup_char_map(); - load_lm(lm_path); - return 0; + set_alphabet(alphabet); + return load_lm(lm_path); } int @@ -46,8 +44,7 @@ Scorer::init(const std::string& lm_path, return err; } setup_char_map(); - load_lm(lm_path); - return 0; + return load_lm(lm_path); } void @@ -72,15 +69,14 @@ void Scorer::setup_char_map() } } -void Scorer::load_lm(const std::string& lm_path) +int Scorer::load_lm(const std::string& lm_path) { // load language model const char* filename = lm_path.c_str(); - VALID_CHECK_EQ(access(filename, R_OK), 0, "Invalid language model path"); - lm::ngram::Config config; config.load_method = util::LoadMethod::LAZY; language_model_.reset(lm::ngram::LoadVirtual(filename, config)); + max_order_ = language_model_->Order(); uint64_t package_size; { @@ -88,26 +84,25 @@ void Scorer::load_lm(const std::string& lm_path) package_size = util::SizeFile(fd.get()); } uint64_t trie_offset = language_model_->GetEndOfSearchOffset(); - bool has_trie = package_size > trie_offset; - - if (has_trie) { - // Read metadata and trie from file - std::ifstream fin(lm_path, std::ios::binary); - fin.seekg(trie_offset); - load_trie(fin, lm_path); + if (package_size <= trie_offset) { + // File ends without a trie structure + return 1; } - max_order_ = language_model_->Order(); + // Read metadata and trie from file + std::ifstream fin(lm_path, std::ios::binary); + fin.seekg(trie_offset); + return load_trie(fin, lm_path); } -void Scorer::load_trie(std::ifstream& fin, const std::string& file_path) +int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) { int magic; fin.read(reinterpret_cast(&magic), sizeof(magic)); if (magic != MAGIC) { std::cerr << "Error: Can't parse trie file, invalid header. Try updating " "your trie file." << std::endl; - throw 1; + return 1; } int version; @@ -122,7 +117,7 @@ void Scorer::load_trie(std::ifstream& fin, const std::string& file_path) std::cerr << "Downgrade your trie file or update your version of DeepSpeech."; } std::cerr << std::endl; - throw 1; + return 1; } fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); @@ -137,6 +132,7 @@ void Scorer::load_trie(std::ifstream& fin, const std::string& file_path) opt.mode = fst::FstReadOptions::MAP; opt.source = file_path; dictionary.reset(FstType::Read(fin, opt)); + return 0; } void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index b2e5c817..55f337ed 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -12,6 +12,7 @@ #include "path_trie.h" #include "alphabet.h" +#include "deepspeech.h" const double OOV_SCORE = -1000.0; const std::string START_TOKEN = ""; @@ -85,7 +86,7 @@ public: void fill_dictionary(const std::vector &vocabulary); // load language model from given path - void load_lm(const std::string &lm_path); + int load_lm(const std::string &lm_path); // language model weight double alpha = 0.; @@ -99,7 +100,7 @@ protected: // necessary setup after setting alphabet void setup_char_map(); - void load_trie(std::ifstream& fin, const std::string& file_path); + int load_trie(std::ifstream& fin, const std::string& file_path); private: std::unique_ptr language_model_; From 1d3b3a31a18e188743fc4c9620306a4cc45c6c10 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 22 Jan 2020 15:18:17 +0100 Subject: [PATCH 14/16] Address review comments and update docs --- .gitattributes | 3 --- README.rst | 4 ++-- data/README.rst | 4 +--- data/lm/README.rst | 6 +++--- data/lm/generate_lm.py | 7 +++++-- data/lm/generate_package.py | 1 + doc/C-Examples.rst | 2 +- doc/NodeJS-API.rst | 6 ++++++ doc/NodeJS-Examples.rst | 4 ++-- doc/Python-API.rst | 6 ++++++ doc/Python-Examples.rst | 4 ++-- doc/USING.rst | 6 +++--- native_client/args.h | 18 +++++++++--------- native_client/ctcdecode/__init__.py | 8 ++++---- .../ctcdecode/ctc_beam_search_decoder.cpp | 2 +- native_client/java/README.rst | 2 +- native_client/javascript/index.js | 5 +++++ native_client/python/__init__.py | 4 ++++ taskcluster/examples-base.tyml | 2 +- taskcluster/win-opt-base.tyml | 2 +- 20 files changed, 58 insertions(+), 38 deletions(-) diff --git a/.gitattributes b/.gitattributes index b2aaede4..4e2fd505 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1 @@ -*.binary filter=lfs diff=lfs merge=lfs -crlf -data/lm/trie filter=lfs diff=lfs merge=lfs -crlf -data/lm/vocab.txt filter=lfs diff=lfs merge=lfs -text data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text diff --git a/README.rst b/README.rst index d2ec566b..e0ed5ad8 100644 --- a/README.rst +++ b/README.rst @@ -36,7 +36,7 @@ To install and use deepspeech all you have to do is: tar xvf audio-0.6.1.tar.gz # Transcribe an audio file - deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --lm deepspeech-0.6.1-models/lm.binary --trie deepspeech-0.6.1-models/trie --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --scorer deepspeech-0.6.1-models/kenlm.scorer --audio audio/2830-3980-0043.wav A pre-trained English model is available for use and can be downloaded using `the instructions below `_. A package with some example audio files is available for download in our `release notes `_. @@ -52,7 +52,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th pip3 install deepspeech-gpu # Transcribe an audio file. - deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --lm deepspeech-0.6.1-models/lm.binary --trie deepspeech-0.6.1-models/trie --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --scorer deepspeech-0.6.1-models/kenlm.scorer --audio audio/2830-3980-0043.wav Please ensure you have the required `CUDA dependencies `_. diff --git a/data/README.rst b/data/README.rst index 54230080..9db78c6b 100644 --- a/data/README.rst +++ b/data/README.rst @@ -5,9 +5,7 @@ This directory contains language-specific data files. Most importantly, you will 1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt` -2. A binary n-gram language model compiled by `kenlm` in `data/lm/lm.binary` - -3. A trie model compiled by `generate_trie `_ in `data/lm/trie` +2. A scorer package (`data/lm/kenlm.scorer`) generated with `data/lm/generate_package.py`, which includes a binary n-gram language model generated with `data/lm/generate_lm.py`. For more information on how to build these resources from scratch, see `data/lm/README.md` diff --git a/data/lm/README.rst b/data/lm/README.rst index bd2c2d3b..c1666700 100644 --- a/data/lm/README.rst +++ b/data/lm/README.rst @@ -1,8 +1,8 @@ -lm.binary was generated from the LibriSpeech normalized LM training text, available `here `_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). `KenLM `_'s built binaries must be in your PATH (lmplz, build_binary, filter). +The LM binary was generated from the LibriSpeech normalized LM training text, available `here `_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). `KenLM `_'s built binaries must be in your PATH (lmplz, build_binary, filter). -The trie was then generated from the vocabulary of the language model: +The scorer package was then built using the `generate_package.py` script: .. code-block:: bash - ./generate_trie ../data/alphabet.txt lm.binary trie + python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --default_alpha 0.75 --default_beta 1.85 --package kenlm.scorer diff --git a/data/lm/generate_lm.py b/data/lm/generate_lm.py index 6dc320a5..00049996 100644 --- a/data/lm/generate_lm.py +++ b/data/lm/generate_lm.py @@ -39,10 +39,13 @@ def main(): '--prune', '0', '0', '1' ]) - # Filter LM using vocabulary of top 500k words - filtered_path = os.path.join(tmp, 'lm_filtered.arpa') vocab_str = '\n'.join(word for word, count in counter.most_common(500000)) + with open('librispeech-vocab-500k.txt', 'w') as fout: + fout.write(vocab_str) + + # Filter LM using vocabulary of top 500k words print('Filtering ARPA file...') + filtered_path = os.path.join(tmp, 'lm_filtered.arpa') subprocess.run(['filter', 'single', 'model:{}'.format(lm_path), filtered_path], input=vocab_str.encode('utf-8'), check=True) # Quantize and produce trie binary. diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index 2b9acf33..3b9aa372 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -41,6 +41,7 @@ def create_bundle( if force_utf8 != None: # pylint: disable=singleton-comparison use_utf8 = force_utf8.value + print("Forcing UTF-8 mode = {}".format(use_utf8)) else: use_utf8 = vocab_looks_char_based diff --git a/doc/C-Examples.rst b/doc/C-Examples.rst index 44ab46ac..5072ba30 100644 --- a/doc/C-Examples.rst +++ b/doc/C-Examples.rst @@ -7,7 +7,7 @@ Creating a model instance and loading model .. literalinclude:: ../native_client/client.cc :language: c :linenos: - :lines: 370-388 + :lines: 370-390 Performing inference -------------------- diff --git a/doc/NodeJS-API.rst b/doc/NodeJS-API.rst index aa92e361..acdc3ab7 100644 --- a/doc/NodeJS-API.rst +++ b/doc/NodeJS-API.rst @@ -7,6 +7,12 @@ Model .. js:autoclass:: Model :members: +Stream +------ + +.. js:autoclass:: Stream + :members: + Module exported methods ----------------------- diff --git a/doc/NodeJS-Examples.rst b/doc/NodeJS-Examples.rst index 4e8a73b3..a9549525 100644 --- a/doc/NodeJS-Examples.rst +++ b/doc/NodeJS-Examples.rst @@ -7,7 +7,7 @@ Creating a model instance and loading model .. literalinclude:: ../native_client/javascript/client.js :language: javascript :linenos: - :lines: 57-66 + :lines: 54-72 Performing inference -------------------- @@ -15,7 +15,7 @@ Performing inference .. literalinclude:: ../native_client/javascript/client.js :language: javascript :linenos: - :lines: 115-117 + :lines: 117-121 Full source code ---------------- diff --git a/doc/Python-API.rst b/doc/Python-API.rst index 08851da6..b2b3567f 100644 --- a/doc/Python-API.rst +++ b/doc/Python-API.rst @@ -9,6 +9,12 @@ Model .. autoclass:: Model :members: +Stream +------ + +.. autoclass:: Stream + :members: + Metadata -------- diff --git a/doc/Python-Examples.rst b/doc/Python-Examples.rst index 2cca86a0..26aee69c 100644 --- a/doc/Python-Examples.rst +++ b/doc/Python-Examples.rst @@ -7,7 +7,7 @@ Creating a model instance and loading model .. literalinclude:: ../native_client/python/client.py :language: python :linenos: - :lines: 69, 78 + :lines: 111, 120 Performing inference -------------------- @@ -15,7 +15,7 @@ Performing inference .. literalinclude:: ../native_client/python/client.py :language: python :linenos: - :lines: 95-98 + :lines: 140-145 Full source code ---------------- diff --git a/doc/USING.rst b/doc/USING.rst index 9769d386..465d4319 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -106,9 +106,9 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio my_audio_file.wav + deepspeech --model models/output_graph.pbmm --scorer models/kenlm.scorer --audio my_audio_file.wav -The arguments ``--lm`` and ``--trie`` are optional, and represent a language model. +The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio. See :github:`client.py ` for an example of how to use the package programatically. @@ -162,7 +162,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - ./deepspeech --model models/output_graph.pbmm --lm models/lm.binary --trie models/trie --audio audio_input.wav + ./deepspeech --model models/output_graph.pbmm --scorer models/kenlm.scorer --audio audio_input.wav See the help output with ``./deepspeech -h`` and the :github:`native client README ` for more details. diff --git a/native_client/args.h b/native_client/args.h index a158fb18..d5a0f869 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -59,11 +59,11 @@ void PrintHelp(const char* bin) bool ProcessArgs(int argc, char** argv) { - const char* const short_opts = "m:a:s:r:w:c:d:b:tehv"; + const char* const short_opts = "m:l:a:b:c:d:tejs:vh"; const option long_opts[] = { {"model", required_argument, nullptr, 'm'}, {"scorer", required_argument, nullptr, 'l'}, - {"audio", required_argument, nullptr, 'w'}, + {"audio", required_argument, nullptr, 'a'}, {"beam_width", required_argument, nullptr, 'b'}, {"lm_alpha", required_argument, nullptr, 'c'}, {"lm_beta", required_argument, nullptr, 'd'}, @@ -71,8 +71,8 @@ bool ProcessArgs(int argc, char** argv) {"extended", no_argument, nullptr, 'e'}, {"json", no_argument, nullptr, 'j'}, {"stream", required_argument, nullptr, 's'}, - {"help", no_argument, nullptr, 'h'}, {"version", no_argument, nullptr, 'v'}, + {"help", no_argument, nullptr, 'h'}, {nullptr, no_argument, nullptr, 0} }; @@ -93,14 +93,14 @@ bool ProcessArgs(int argc, char** argv) scorer = optarg; break; - case 'w': + case 'a': audio = optarg; break; case 'b': beam_width = atoi(optarg); break; - + case 'c': set_alphabeta = true; lm_alpha = atof(optarg); @@ -115,10 +115,6 @@ bool ProcessArgs(int argc, char** argv) show_times = true; break; - case 'v': - has_versions = true; - break; - case 'e': extended_metadata = true; break; @@ -131,6 +127,10 @@ bool ProcessArgs(int argc, char** argv) stream_size = atoi(optarg); break; + case 'v': + has_versions = true; + break; + case 'h': // -h or --help case '?': // Unrecognized option default: diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 8ba2e9b2..2474741f 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -12,11 +12,11 @@ class Scorer(swigwrapper.Scorer): :type alpha: float :param beta: Word insertion bonus. :type beta: float - :model_path: Path to load scorer. + :scorer_path: Path to load scorer from. :alphabet: Alphabet - :type model_path: basestring + :type scorer_path: basestring """ - def __init__(self, alpha=None, beta=None, model_path=None, alphabet=None): + def __init__(self, alpha=None, beta=None, scorer_path=None, alphabet=None): super(Scorer, self).__init__() # Allow bare initialization if alphabet: @@ -26,7 +26,7 @@ class Scorer(swigwrapper.Scorer): if err != 0: raise ValueError("Error when deserializing alphabet.") - err = self.init(model_path.encode('utf-8'), + err = self.init(scorer_path.encode('utf-8'), native_alphabet) if err != 0: raise ValueError("Scorer initialization failed with error code {}".format(err), err) diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 852ef34c..2958dec9 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -36,7 +36,7 @@ DecoderState::init(const Alphabet& alphabet, prefix_root_.reset(root); prefixes_.push_back(root); - if (ext_scorer != nullptr && (bool)ext_scorer_->dictionary) { + if (ext_scorer != nullptr && (bool)(ext_scorer_->dictionary)) { // no need for std::make_shared<>() since Copy() does 'new' behind the doors auto dict_ptr = std::shared_ptr(ext_scorer->dictionary->Copy(true)); root->set_dictionary(dict_ptr); diff --git a/native_client/java/README.rst b/native_client/java/README.rst index c345c094..7b3e3dcc 100644 --- a/native_client/java/README.rst +++ b/native_client/java/README.rst @@ -51,7 +51,7 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including: * ``output_graph.tflite`` which is the TF Lite model -* ``lm.binary`` and ``trie`` files, if you want to use the language model ; please +* ``kenlm.scorer``, if you want to use the language model ; please be aware that too big language model will make the device run out of memory Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ : diff --git a/native_client/javascript/index.js b/native_client/javascript/index.js index 2ce039bf..772b1a82 100644 --- a/native_client/javascript/index.js +++ b/native_client/javascript/index.js @@ -123,6 +123,11 @@ Model.prototype.createStream = function() { return ctx; } +/** + * @class + * Provides an interface to a DeepSpeech stream. The constructor cannot be called + * directly, use :js:func:`Model.createStream`. + */ function Stream(nativeStream) { this._impl = nativeStream; } diff --git a/native_client/python/__init__.py b/native_client/python/__init__.py index ee38287f..ccb53fc4 100644 --- a/native_client/python/__init__.py +++ b/native_client/python/__init__.py @@ -131,6 +131,10 @@ class Model(object): class Stream(object): + """ + Class wrapping a DeepSpeech stream. The constructor cannot be called directly. + Use :func:`Model.createStream()` + """ def __init__(self, native_stream): self._impl = native_stream diff --git a/taskcluster/examples-base.tyml b/taskcluster/examples-base.tyml index 9739f36a..acee40d9 100644 --- a/taskcluster/examples-base.tyml +++ b/taskcluster/examples-base.tyml @@ -34,7 +34,7 @@ then: DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz" PIP_DEFAULT_TIMEOUT: "60" EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "f3dee7910d1642e14b1e3877568f8342c1c22e05" + EXAMPLES_CHECKOUT_TARGET: "4b97ac41d03ca0d23fa92526433db72a90f47d4a" command: - "/bin/bash" diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index e0c12162..6bcc0acd 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -44,7 +44,7 @@ payload: MSYS: 'winsymlinks:nativestrict' TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow} EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples" - EXAMPLES_CHECKOUT_TARGET: "f3dee7910d1642e14b1e3877568f8342c1c22e05" + EXAMPLES_CHECKOUT_TARGET: "4b97ac41d03ca0d23fa92526433db72a90f47d4a" command: - >- From 8dedda7759b1b42a0ca6fb15e0314c26a752f383 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 5 Feb 2020 17:19:53 +0100 Subject: [PATCH 15/16] Address review comments --- data/README.rst | 2 +- data/lm/README.rst | 6 ++--- evaluate_tflite.py | 2 +- native_client/ctcdecode/__init__.py | 8 +++++-- .../ctcdecode/ctc_beam_search_decoder.cpp | 16 ++++++------- .../ctcdecode/ctc_beam_search_decoder.h | 9 ++++---- native_client/ctcdecode/scorer.cpp | 23 ++++++++++++++----- native_client/ctcdecode/swigwrapper.i | 9 +++++--- native_client/deepspeech.cc | 4 ++-- native_client/java/README.rst | 5 ++-- native_client/javascript/client.js | 4 ++-- native_client/modelstate.h | 2 +- native_client/python/client.py | 4 ++-- 13 files changed, 56 insertions(+), 38 deletions(-) diff --git a/data/README.rst b/data/README.rst index 9db78c6b..88314843 100644 --- a/data/README.rst +++ b/data/README.rst @@ -5,7 +5,7 @@ This directory contains language-specific data files. Most importantly, you will 1. A list of unique characters for the target language (e.g. English) in `data/alphabet.txt` -2. A scorer package (`data/lm/kenlm.scorer`) generated with `data/lm/generate_package.py`, which includes a binary n-gram language model generated with `data/lm/generate_lm.py`. +2. A scorer package (`data/lm/kenlm.scorer`) generated with `data/lm/generate_package.py`. The scorer package includes a binary n-gram language model generated with `data/lm/generate_lm.py`. For more information on how to build these resources from scratch, see `data/lm/README.md` diff --git a/data/lm/README.rst b/data/lm/README.rst index c1666700..cc3e11b7 100644 --- a/data/lm/README.rst +++ b/data/lm/README.rst @@ -1,8 +1,8 @@ -The LM binary was generated from the LibriSpeech normalized LM training text, available `here `_\ , using the `generate_lm.py` script (will generate lm.binary in the folder it is run from). `KenLM `_'s built binaries must be in your PATH (lmplz, build_binary, filter). +The LM binary was generated from the LibriSpeech normalized LM training text, available `here `_\ , using the `generate_lm.py` script (will generate `lm.binary` and `librispeech-vocab-500k.txt` in the folder it is run from). `KenLM `_'s built binaries must be in your PATH (lmplz, build_binary, filter). The scorer package was then built using the `generate_package.py` script: .. code-block:: bash - - python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --default_alpha 0.75 --default_beta 1.85 --package kenlm.scorer + python generate_lm.py # this will create lm.binary and librispeech-vocab-500k.txt + python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --default_alpha 0.75 --default_beta 1.85 --package kenlm.scorer diff --git a/evaluate_tflite.py b/evaluate_tflite.py index bdc5f231..aba6fb68 100644 --- a/evaluate_tflite.py +++ b/evaluate_tflite.py @@ -27,7 +27,7 @@ This module should be self-contained: - pip install native_client/python/dist/deepspeech*.whl - pip install -r requirements_eval_tflite.txt -Then run with a TF Lite model, LM and a CSV test file +Then run with a TF Lite model, a scorer and a CSV test file ''' BEAM_WIDTH = 500 diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 2474741f..e0282ca5 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -20,16 +20,20 @@ class Scorer(swigwrapper.Scorer): super(Scorer, self).__init__() # Allow bare initialization if alphabet: + assert alpha is not None, 'alpha parameter is required' + assert beta is not None, 'beta parameter is required' + assert scorer_path, 'scorer_path parameter is required' + serialized = alphabet.serialize() native_alphabet = swigwrapper.Alphabet() err = native_alphabet.deserialize(serialized, len(serialized)) if err != 0: - raise ValueError("Error when deserializing alphabet.") + raise ValueError('Error when deserializing alphabet.') err = self.init(scorer_path.encode('utf-8'), native_alphabet) if err != 0: - raise ValueError("Scorer initialization failed with error code {}".format(err), err) + raise ValueError('Scorer initialization failed with error code {}'.format(err)) self.reset_params(alpha, beta) diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp index 2958dec9..5dadd57f 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp +++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp @@ -18,7 +18,7 @@ DecoderState::init(const Alphabet& alphabet, size_t beam_size, double cutoff_prob, size_t cutoff_top_n, - Scorer *ext_scorer) + std::shared_ptr ext_scorer) { // assign special ids abs_time_step_ = 0; @@ -36,7 +36,7 @@ DecoderState::init(const Alphabet& alphabet, prefix_root_.reset(root); prefixes_.push_back(root); - if (ext_scorer != nullptr && (bool)(ext_scorer_->dictionary)) { + if (ext_scorer && (bool)(ext_scorer_->dictionary)) { // no need for std::make_shared<>() since Copy() does 'new' behind the doors auto dict_ptr = std::shared_ptr(ext_scorer->dictionary->Copy(true)); root->set_dictionary(dict_ptr); @@ -58,7 +58,7 @@ DecoderState::next(const double *probs, float min_cutoff = -NUM_FLT_INF; bool full_beam = false; - if (ext_scorer_ != nullptr) { + if (ext_scorer_) { size_t num_prefixes = std::min(prefixes_.size(), beam_size_); std::partial_sort(prefixes_.begin(), prefixes_.begin() + num_prefixes, @@ -109,7 +109,7 @@ DecoderState::next(const double *probs, log_p = log_prob_c + prefix->score; } - if (ext_scorer_ != nullptr) { + if (ext_scorer_) { // skip scoring the space in word based LMs PathTrie* prefix_to_score; if (ext_scorer_->is_utf8_mode()) { @@ -166,7 +166,7 @@ DecoderState::decode() const } // score the last word of each prefix that doesn't end with space - if (ext_scorer_ != nullptr) { + if (ext_scorer_) { for (size_t i = 0; i < beam_size_ && i < prefixes_copy.size(); ++i) { auto prefix = prefixes_copy[i]; if (!ext_scorer_->is_scoring_boundary(prefix->parent, prefix->character)) { @@ -200,7 +200,7 @@ DecoderState::decode() const Output output; prefixes_copy[i]->get_path_vec(output.tokens, output.timesteps); double approx_ctc = scores[prefixes_copy[i]]; - if (ext_scorer_ != nullptr) { + if (ext_scorer_) { auto words = ext_scorer_->split_labels_into_scored_units(output.tokens); // remove term insertion weight approx_ctc -= words.size() * ext_scorer_->beta; @@ -222,7 +222,7 @@ std::vector ctc_beam_search_decoder( size_t beam_size, double cutoff_prob, size_t cutoff_top_n, - Scorer *ext_scorer) + std::shared_ptr ext_scorer) { DecoderState state; state.init(alphabet, beam_size, cutoff_prob, cutoff_top_n, ext_scorer); @@ -243,7 +243,7 @@ ctc_beam_search_decoder_batch( size_t num_processes, double cutoff_prob, size_t cutoff_top_n, - Scorer *ext_scorer) + std::shared_ptr ext_scorer) { VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); VALID_CHECK_EQ(batch_size, seq_lengths_size, "must have one sequence length per batch element"); diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.h b/native_client/ctcdecode/ctc_beam_search_decoder.h index 4d6b7ea5..a3d5c480 100644 --- a/native_client/ctcdecode/ctc_beam_search_decoder.h +++ b/native_client/ctcdecode/ctc_beam_search_decoder.h @@ -1,6 +1,7 @@ #ifndef CTC_BEAM_SEARCH_DECODER_H_ #define CTC_BEAM_SEARCH_DECODER_H_ +#include #include #include @@ -16,7 +17,7 @@ class DecoderState { double cutoff_prob_; size_t cutoff_top_n_; - Scorer* ext_scorer_; // weak + std::shared_ptr ext_scorer_; std::vector prefixes_; std::unique_ptr prefix_root_; @@ -45,7 +46,7 @@ public: size_t beam_size, double cutoff_prob, size_t cutoff_top_n, - Scorer *ext_scorer); + std::shared_ptr ext_scorer); /* Send data to the decoder * @@ -95,7 +96,7 @@ std::vector ctc_beam_search_decoder( size_t beam_size, double cutoff_prob, size_t cutoff_top_n, - Scorer *ext_scorer); + std::shared_ptr ext_scorer); /* CTC Beam Search Decoder for batch data * Parameters: @@ -126,6 +127,6 @@ ctc_beam_search_decoder_batch( size_t num_processes, double cutoff_prob, size_t cutoff_top_n, - Scorer *ext_scorer); + std::shared_ptr ext_scorer); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index c5ae54a2..7b6c74c9 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -71,8 +71,19 @@ void Scorer::setup_char_map() int Scorer::load_lm(const std::string& lm_path) { - // load language model + // Check if file is readable to avoid KenLM throwing an exception const char* filename = lm_path.c_str(); + if (access(filename, R_OK) != 0) { + return 1; + } + + // Check if the file format is valid to avoid KenLM throwing an exception + lm::ngram::ModelType model_type; + if (!lm::ngram::RecognizeBinary(filename, model_type)) { + return 1; + } + + // Load the LM lm::ngram::Config config; config.load_method = util::LoadMethod::LAZY; language_model_.reset(lm::ngram::LoadVirtual(filename, config)); @@ -100,21 +111,21 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) int magic; fin.read(reinterpret_cast(&magic), sizeof(magic)); if (magic != MAGIC) { - std::cerr << "Error: Can't parse trie file, invalid header. Try updating " - "your trie file." << std::endl; + std::cerr << "Error: Can't parse scorer file, invalid header. Try updating " + "your scorer file." << std::endl; return 1; } int version; fin.read(reinterpret_cast(&version), sizeof(version)); if (version != FILE_VERSION) { - std::cerr << "Error: Trie file version mismatch (" << version + std::cerr << "Error: Scorer file version mismatch (" << version << " instead of expected " << FILE_VERSION << "). "; if (version < FILE_VERSION) { - std::cerr << "Update your trie file."; + std::cerr << "Update your scorer file."; } else { - std::cerr << "Downgrade your trie file or update your version of DeepSpeech."; + std::cerr << "Downgrade your scorer file or update your version of DeepSpeech."; } std::cerr << std::endl; return 1; diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index af3a1952..fd0f4f08 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -7,9 +7,10 @@ #include "workspace_status.h" %} -%include "pyabc.i" -%include "std_string.i" -%include "std_vector.i" +%include +%include +%include +%include %include "numpy.i" %init %{ @@ -20,6 +21,8 @@ namespace std { %template(StringVector) vector; } +%shared_ptr(Scorer); + // Convert NumPy arrays to pointer+lengths %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(const double *probs, int time_dim, int class_dim)}; %apply (double* IN_ARRAY3, int DIM1, int DIM2, int DIM3) {(const double *probs, int batch_size, int time_dim, int class_dim)}; diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 0a61f3de..274ce41f 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -319,7 +319,7 @@ int DS_DisableExternalScorer(ModelState* aCtx) { if (aCtx->scorer_) { - aCtx->scorer_.reset(nullptr); + aCtx->scorer_.reset(); return DS_ERR_OK; } return DS_ERR_SCORER_NOT_ENABLED; @@ -363,7 +363,7 @@ DS_CreateStream(ModelState* aCtx, aCtx->beam_width_, cutoff_prob, cutoff_top_n, - aCtx->scorer_.get()); + aCtx->scorer_); *retval = ctx.release(); return DS_ERR_OK; diff --git a/native_client/java/README.rst b/native_client/java/README.rst index 7b3e3dcc..626400d0 100644 --- a/native_client/java/README.rst +++ b/native_client/java/README.rst @@ -51,12 +51,11 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including: * ``output_graph.tflite`` which is the TF Lite model -* ``kenlm.scorer``, if you want to use the language model ; please - be aware that too big language model will make the device run out of memory +* ``kenlm.scorer``, if you want to use the scorer; please be aware that too big + scorer will make the device run out of memory Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ : - * ``deepspeech`` * ``libdeepspeech.so`` * ``libc++_shared.so`` diff --git a/native_client/javascript/client.js b/native_client/javascript/client.js index 79561a97..7266b85d 100644 --- a/native_client/javascript/client.js +++ b/native_client/javascript/client.js @@ -32,8 +32,8 @@ parser.addArgument(['--model'], {required: true, help: 'Path to the model (proto parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'}); parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'}); parser.addArgument(['--beam_width'], {help: 'Beam width for the CTC decoder', defaultValue: 500, type: 'int'}); -parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha). If not set, use default value from scorer.', type: 'float'}); -parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta). If not set, use default value from scorer.', type: 'float'}); +parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha). If not specified, use default from the scorer package.', type: 'float'}); +parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta). If not specified, use default from the scorer package.', type: 'float'}); parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}); parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'}); var args = parser.parseArgs(); diff --git a/native_client/modelstate.h b/native_client/modelstate.h index ff106a62..d4f11c1c 100644 --- a/native_client/modelstate.h +++ b/native_client/modelstate.h @@ -16,7 +16,7 @@ struct ModelState { static constexpr unsigned int BATCH_SIZE = 1; Alphabet alphabet_; - std::unique_ptr scorer_; + std::shared_ptr scorer_; unsigned int beam_width_; unsigned int n_steps_; unsigned int n_context_; diff --git a/native_client/python/client.py b/native_client/python/client.py index ba5d70b2..2ef88caf 100644 --- a/native_client/python/client.py +++ b/native_client/python/client.py @@ -95,9 +95,9 @@ def main(): parser.add_argument('--beam_width', type=int, default=500, help='Beam width for the CTC decoder') parser.add_argument('--lm_alpha', type=float, - help='Language model weight (lm_alpha)') + help='Language model weight (lm_alpha). If not specified, use default from the scorer package.') parser.add_argument('--lm_beta', type=float, - help='Word insertion bonus (lm_beta)') + help='Word insertion bonus (lm_beta). If not specified, use default from the scorer package.') parser.add_argument('--version', action=VersionAction, help='Print version and exits') parser.add_argument('--extended', required=False, action='store_true', From 6efc3ccf50df1c499e06df17b2a3eb337069f492 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 11 Feb 2020 11:41:28 +0100 Subject: [PATCH 16/16] Update examples model asset --- taskcluster/examples-base.tyml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskcluster/examples-base.tyml b/taskcluster/examples-base.tyml index acee40d9..2af1c1f1 100644 --- a/taskcluster/examples-base.tyml +++ b/taskcluster/examples-base.tyml @@ -30,7 +30,7 @@ then: image: ${build.docker_image} env: - DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.6.1/models.tar.gz" + DEEPSPEECH_MODEL: "https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.1/models.tar.gz" DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz" PIP_DEFAULT_TIMEOUT: "60" EXAMPLES_CLONE_URL: "https://github.com/mozilla/DeepSpeech-examples"