From d2eb305b73823efacb3f8de2b480346017c50cd7 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Tue, 12 Nov 2019 21:56:42 +0100
Subject: [PATCH] Address review comment and add missing check for presence of
 scorer

---
 .../ctcdecode/ctc_beam_search_decoder.cpp     | 38 +++++++++----------
 native_client/ctcdecode/decoder_utils.cpp     |  6 ---
 native_client/ctcdecode/decoder_utils.h       |  7 +++-
 3 files changed, 23 insertions(+), 28 deletions(-)
diff --git a/native_client/ctcdecode/ctc_beam_search_decoder.cpp b/native_client/ctcdecode/ctc_beam_search_decoder.cpp
index 31999078..5a2c834e 100644
--- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp
+++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp
@@ -109,27 +109,25 @@ DecoderState::next(const double *probs,
             log_p = log_prob_c + prefix->score;
           }
 
-          // skip scoring the space in word based LMs
-          PathTrie* prefix_to_score;
-          if (ext_scorer_->is_utf8_mode()) {
-            prefix_to_score = prefix_new;
-          } else {
-            prefix_to_score = prefix;
-          }
+          if (ext_scorer_ != nullptr) {
+            // skip scoring the space in word based LMs
+            PathTrie* prefix_to_score;
+            if (ext_scorer_->is_utf8_mode()) {
+              prefix_to_score = prefix_new;
+            } else {
+              prefix_to_score = prefix;
+            }
 
-          // check if we need to score
-          bool is_scoring_boundary = ext_scorer_ != nullptr &&
-                                     ext_scorer_->is_scoring_boundary(prefix_to_score, c);
-
-          // language model scoring
-          if (is_scoring_boundary) {
-            float score = 0.0;
-            std::vector<std::string> ngram;
-            ngram = ext_scorer_->make_ngram(prefix_to_score);
-            bool bos = ngram.size() < ext_scorer_->get_max_order();
-            score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha;
-            log_p += score;
-            log_p += ext_scorer_->beta;
+            // language model scoring
+            if (ext_scorer_->is_scoring_boundary(prefix_to_score, c)) {
+              float score = 0.0;
+              std::vector<std::string> ngram;
+              ngram = ext_scorer_->make_ngram(prefix_to_score);
+              bool bos = ngram.size() < ext_scorer_->get_max_order();
+              score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha;
+              log_p += score;
+              log_p += ext_scorer_->beta;
+            }
           }
 
           prefix_new->log_prob_nb_cur =
diff --git a/native_client/ctcdecode/decoder_utils.cpp b/native_client/ctcdecode/decoder_utils.cpp
index be810c07..ed244c3a 100644
--- a/native_client/ctcdecode/decoder_utils.cpp
+++ b/native_client/ctcdecode/decoder_utils.cpp
@@ -46,12 +46,6 @@ size_t get_utf8_str_len(const std::string &str) {
   return str_len;
 }
 
-// Return weather a byte is a code point boundary (not a continuation byte).
-bool byte_is_codepoint_boundary(unsigned char c) {
-  // only continuation bytes have their most significant bits set to 10
-  return (c & 0xC0) != 0x80;
-}
-
 std::vector<std::string> split_into_codepoints(const std::string &str) {
   std::vector<std::string> result;
   std::string out_str;
diff --git a/native_client/ctcdecode/decoder_utils.h b/native_client/ctcdecode/decoder_utils.h
index ec0a93fc..3ba1d7e6 100644
--- a/native_client/ctcdecode/decoder_utils.h
+++ b/native_client/ctcdecode/decoder_utils.h
@@ -89,8 +89,11 @@ std::vector<std::string> split_into_bytes(const std::string &str);
 void add_word_to_fst(const std::vector<int> &word,
                      fst::StdVectorFst *dictionary);
 
-// Return weather a byte is a code point boundary (not a continuation byte).
-bool byte_is_codepoint_boundary(unsigned char c);
+// Return whether a byte is a code point boundary (not a continuation byte).
+inline bool byte_is_codepoint_boundary(unsigned char c) {
+  // only continuation bytes have their most significant bits set to 10
+  return (c & 0xC0) != 0x80;
+}
 
 // Add a word in string to dictionary
 bool add_word_to_dictionary(