From 83a89dcae60ff8d0567ad518608c06df822894b1 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Tue, 15 Oct 2019 12:43:03 +0200
Subject: [PATCH 1/2] Add debugging code to trie_load.cc

---
 native_client/trie_load.cc | 42 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/native_client/trie_load.cc b/native_client/trie_load.cc
index eab97b4b..cd625c23 100644
--- a/native_client/trie_load.cc
+++ b/native_client/trie_load.cc
@@ -5,8 +5,13 @@
 #include "ctcdecode/scorer.h"
 #include "alphabet.h"
 
-using namespace std;
+#ifdef DEBUG
+#include <limits>
+#include <unordered_map>
+#include "ctcdecode/path_trie.h"
+#endif // DEBUG
 
+using namespace std;
 
 int main(int argc, char** argv)
 {
@@ -22,5 +27,40 @@ int main(int argc, char** argv)
     return err;
   }
   Scorer scorer;
+
+#ifndef DEBUG
   return scorer.init(0.0, 0.0, kenlm_path, trie_path, alphabet);
+#else
+  // Print some info about the FST
+  using FstType = fst::ConstFst<fst::StdArc>;
+
+  auto dict = scorer.dictionary.get();
+
+  struct state_info {
+    int range_min = std::numeric_limits<int>::max();
+    int range_max = std::numeric_limits<int>::min();
+  };
+
+  auto print_states_from = [&](int i) {
+    std::unordered_map<int, state_info> sinfo;
+    for (fst::ArcIterator<FstType> aiter(*dict, i); !aiter.Done(); aiter.Next()) {
+      const fst::StdArc& arc = aiter.Value();
+      sinfo[arc.nextstate].range_min = std::min(sinfo[arc.nextstate].range_min, arc.ilabel-1);
+      sinfo[arc.nextstate].range_max = std::max(sinfo[arc.nextstate].range_max, arc.ilabel-1);
+    }
+
+    for (auto it = sinfo.begin(); it != sinfo.end(); ++it) {
+      state_info s = it->second;
+      printf("%d -> state %d (chars 0x%X - 0x%X, '%c' - '%c')\n", i, it->first, (unsigned int)s.range_min, (unsigned int)s.range_max, (char)s.range_min, (char)s.range_max);
+    }
+  };
+
+  print_states_from(0);
+
+  // for (int i = 1; i < 10; ++i) {
+  //   print_states_from(i);
+  // }
+#endif // DEBUG
+
+  return 0;
 }

From 31d81740eeb0506339ec9a967f76fe7dba6a7b29 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Tue, 15 Oct 2019 12:46:59 +0200
Subject: [PATCH 2/2] Add debugging helpers to PathTrie

---
 native_client/ctcdecode/Makefile        |  4 ++--
 native_client/ctcdecode/build_common.py |  2 +-
 native_client/ctcdecode/path_trie.cpp   | 28 +++++++++++++++++++++++++
 native_client/ctcdecode/path_trie.h     |  9 ++++++++
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile
index c000fc9e..98f8f9fd 100644
--- a/native_client/ctcdecode/Makefile
+++ b/native_client/ctcdecode/Makefile
@@ -33,8 +33,8 @@ bindings: clean-keep-common
 
 bindings-debug: clean-keep-common
 	pip install --quiet $(PYTHON_PACKAGES) wheel==0.31.0 setuptools==39.1.0
-	AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
+	AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
 	$(GENERATE_DEBUG_SYMS)
 	find temp_build -type f -name "*.o" -delete
-	AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
+	AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
 	rm -rf temp_build
diff --git a/native_client/ctcdecode/build_common.py b/native_client/ctcdecode/build_common.py
index 1965287e..1a4058b5 100644
--- a/native_client/ctcdecode/build_common.py
+++ b/native_client/ctcdecode/build_common.py
@@ -11,7 +11,7 @@ from multiprocessing.dummy import Pool
 
 ARGS = ['-DKENLM_MAX_ORDER=6', '-std=c++11', '-Wno-unused-local-typedefs', '-Wno-sign-compare']
 OPT_ARGS = ['-O3', '-DNDEBUG']
-DBG_ARGS = ['-O0', '-g', '-UNDEBUG']
+DBG_ARGS = ['-O0', '-g', '-UNDEBUG', '-DDEBUG']
 
 INCLUDES = [
     '..',
diff --git a/native_client/ctcdecode/path_trie.cpp b/native_client/ctcdecode/path_trie.cpp
index 64410d93..51f75ff3 100644
--- a/native_client/ctcdecode/path_trie.cpp
+++ b/native_client/ctcdecode/path_trie.cpp
@@ -174,3 +174,31 @@ void PathTrie::set_dictionary(PathTrie::FstType* dictionary) {
 void PathTrie::set_matcher(std::shared_ptr<fst::SortedMatcher<FstType>> matcher) {
   matcher_ = matcher;
 }
+
+#ifdef DEBUG
+void PathTrie::vec(std::vector<PathTrie*>& out) {
+  if (parent != nullptr) {
+    parent->vec(out);
+  }
+  out.push_back(this);
+}
+
+void PathTrie::print(const Alphabet& a) {
+  std::vector<PathTrie*> chain;
+  vec(chain);
+  std::string tr;
+  printf("characters:\t ");
+  for (PathTrie* el : chain) {
+    printf("%X ", el->character);
+    if (el->character != ROOT_) {
+      tr.append(a.StringFromLabel(el->character));
+    }
+  }
+  printf("\ntimesteps:\t ");
+  for (PathTrie* el : chain) {
+    printf("%d ", el->timestep);
+  }
+  printf("\n");
+  printf("transcript:\t %s\n", tr.c_str());
+}
+#endif // DEBUG
\ No newline at end of file
diff --git a/native_client/ctcdecode/path_trie.h b/native_client/ctcdecode/path_trie.h
index 10a1b687..9b71f35b 100644
--- a/native_client/ctcdecode/path_trie.h
+++ b/native_client/ctcdecode/path_trie.h
@@ -9,6 +9,10 @@
 
 #include "fst/fstlib.h"
 
+#ifdef DEBUG
+#include "alphabet.h"
+#endif
+
 /* Trie tree for prefix storing and manipulating, with a dictionary in
  * finite-state transducer for spelling correction.
  */
@@ -44,6 +48,11 @@ public:
   // remove current path from root
   void remove();
 
+#ifdef DEBUG
+  void vec(std::vector<PathTrie*>& out);
+  void print(const Alphabet& a);
+#endif // DEBUG
+
   float log_prob_b_prev;
   float log_prob_nb_prev;
   float log_prob_b_cur;