From 83a89dcae60ff8d0567ad518608c06df822894b1 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 15 Oct 2019 12:43:03 +0200 Subject: [PATCH 1/2] Add debugging code to trie_load.cc --- native_client/trie_load.cc | 42 +++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/native_client/trie_load.cc b/native_client/trie_load.cc index eab97b4b..cd625c23 100644 --- a/native_client/trie_load.cc +++ b/native_client/trie_load.cc @@ -5,8 +5,13 @@ #include "ctcdecode/scorer.h" #include "alphabet.h" -using namespace std; +#ifdef DEBUG +#include +#include +#include "ctcdecode/path_trie.h" +#endif // DEBUG +using namespace std; int main(int argc, char** argv) { @@ -22,5 +27,40 @@ int main(int argc, char** argv) return err; } Scorer scorer; + +#ifndef DEBUG return scorer.init(0.0, 0.0, kenlm_path, trie_path, alphabet); +#else + // Print some info about the FST + using FstType = fst::ConstFst; + + auto dict = scorer.dictionary.get(); + + struct state_info { + int range_min = std::numeric_limits::max(); + int range_max = std::numeric_limits::min(); + }; + + auto print_states_from = [&](int i) { + std::unordered_map sinfo; + for (fst::ArcIterator aiter(*dict, i); !aiter.Done(); aiter.Next()) { + const fst::StdArc& arc = aiter.Value(); + sinfo[arc.nextstate].range_min = std::min(sinfo[arc.nextstate].range_min, arc.ilabel-1); + sinfo[arc.nextstate].range_max = std::max(sinfo[arc.nextstate].range_max, arc.ilabel-1); + } + + for (auto it = sinfo.begin(); it != sinfo.end(); ++it) { + state_info s = it->second; + printf("%d -> state %d (chars 0x%X - 0x%X, '%c' - '%c')\n", i, it->first, (unsigned int)s.range_min, (unsigned int)s.range_max, (char)s.range_min, (char)s.range_max); + } + }; + + print_states_from(0); + + // for (int i = 1; i < 10; ++i) { + // print_states_from(i); + // } +#endif // DEBUG + + return 0; } From 31d81740eeb0506339ec9a967f76fe7dba6a7b29 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 15 Oct 2019 12:46:59 +0200 Subject: [PATCH 2/2] Add debugging helpers to PathTrie --- native_client/ctcdecode/Makefile | 4 ++-- native_client/ctcdecode/build_common.py | 2 +- native_client/ctcdecode/path_trie.cpp | 28 +++++++++++++++++++++++++ native_client/ctcdecode/path_trie.h | 9 ++++++++ 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index c000fc9e..98f8f9fd 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -33,8 +33,8 @@ bindings: clean-keep-common bindings-debug: clean-keep-common pip install --quiet $(PYTHON_PACKAGES) wheel==0.31.0 setuptools==39.1.0 - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) $(GENERATE_DEBUG_SYMS) find temp_build -type f -name "*.o" -delete - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build diff --git a/native_client/ctcdecode/build_common.py b/native_client/ctcdecode/build_common.py index 1965287e..1a4058b5 100644 --- a/native_client/ctcdecode/build_common.py +++ b/native_client/ctcdecode/build_common.py @@ -11,7 +11,7 @@ from multiprocessing.dummy import Pool ARGS = ['-DKENLM_MAX_ORDER=6', '-std=c++11', '-Wno-unused-local-typedefs', '-Wno-sign-compare'] OPT_ARGS = ['-O3', '-DNDEBUG'] -DBG_ARGS = ['-O0', '-g', '-UNDEBUG'] +DBG_ARGS = ['-O0', '-g', '-UNDEBUG', '-DDEBUG'] INCLUDES = [ '..', diff --git a/native_client/ctcdecode/path_trie.cpp b/native_client/ctcdecode/path_trie.cpp index 64410d93..51f75ff3 100644 --- a/native_client/ctcdecode/path_trie.cpp +++ b/native_client/ctcdecode/path_trie.cpp @@ -174,3 +174,31 @@ void PathTrie::set_dictionary(PathTrie::FstType* dictionary) { void PathTrie::set_matcher(std::shared_ptr> matcher) { matcher_ = matcher; } + +#ifdef DEBUG +void PathTrie::vec(std::vector& out) { + if (parent != nullptr) { + parent->vec(out); + } + out.push_back(this); +} + +void PathTrie::print(const Alphabet& a) { + std::vector chain; + vec(chain); + std::string tr; + printf("characters:\t "); + for (PathTrie* el : chain) { + printf("%X ", el->character); + if (el->character != ROOT_) { + tr.append(a.StringFromLabel(el->character)); + } + } + printf("\ntimesteps:\t "); + for (PathTrie* el : chain) { + printf("%d ", el->timestep); + } + printf("\n"); + printf("transcript:\t %s\n", tr.c_str()); +} +#endif // DEBUG \ No newline at end of file diff --git a/native_client/ctcdecode/path_trie.h b/native_client/ctcdecode/path_trie.h index 10a1b687..9b71f35b 100644 --- a/native_client/ctcdecode/path_trie.h +++ b/native_client/ctcdecode/path_trie.h @@ -9,6 +9,10 @@ #include "fst/fstlib.h" +#ifdef DEBUG +#include "alphabet.h" +#endif + /* Trie tree for prefix storing and manipulating, with a dictionary in * finite-state transducer for spelling correction. */ @@ -44,6 +48,11 @@ public: // remove current path from root void remove(); +#ifdef DEBUG + void vec(std::vector& out); + void print(const Alphabet& a); +#endif // DEBUG + float log_prob_b_prev; float log_prob_nb_prev; float log_prob_b_cur;