Address review comment and add missing check for presence of scorer

2019-11-12 21:56:42 +01:00 · 2019-11-12 21:56:42 +01:00 · d2eb305b73
commit d2eb305b73
parent 0e6952c3a8
3 changed files with 23 additions and 28 deletions
--- a/native_client/ctcdecode/ctc_beam_search_decoder.cpp
+++ b/native_client/ctcdecode/ctc_beam_search_decoder.cpp
@ -109,27 +109,25 @@ DecoderState::next(const double *probs,
            log_p = log_prob_c + prefix->score;
          }

-          // skip scoring the space in word based LMs
-          PathTrie* prefix_to_score;
-          if (ext_scorer_->is_utf8_mode()) {
-            prefix_to_score = prefix_new;
-          } else {
-            prefix_to_score = prefix;
-          }
+          if (ext_scorer_ != nullptr) {
+            // skip scoring the space in word based LMs
+            PathTrie* prefix_to_score;
+            if (ext_scorer_->is_utf8_mode()) {
+              prefix_to_score = prefix_new;
+            } else {
+              prefix_to_score = prefix;
+            }

-          // check if we need to score
-          bool is_scoring_boundary = ext_scorer_ != nullptr &&
-                                     ext_scorer_->is_scoring_boundary(prefix_to_score, c);
-
-          // language model scoring
-          if (is_scoring_boundary) {
-            float score = 0.0;
-            std::vector<std::string> ngram;
-            ngram = ext_scorer_->make_ngram(prefix_to_score);
-            bool bos = ngram.size() < ext_scorer_->get_max_order();
-            score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha;
-            log_p += score;
-            log_p += ext_scorer_->beta;
+            // language model scoring
+            if (ext_scorer_->is_scoring_boundary(prefix_to_score, c)) {
+              float score = 0.0;
+              std::vector<std::string> ngram;
+              ngram = ext_scorer_->make_ngram(prefix_to_score);
+              bool bos = ngram.size() < ext_scorer_->get_max_order();
+              score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha;
+              log_p += score;
+              log_p += ext_scorer_->beta;
+            }
          }

          prefix_new->log_prob_nb_cur =
--- a/native_client/ctcdecode/decoder_utils.cpp
+++ b/native_client/ctcdecode/decoder_utils.cpp
@ -46,12 +46,6 @@ size_t get_utf8_str_len(const std::string &str) {
  return str_len;
 }

-// Return weather a byte is a code point boundary (not a continuation byte).
-bool byte_is_codepoint_boundary(unsigned char c) {
-  // only continuation bytes have their most significant bits set to 10
-  return (c & 0xC0) != 0x80;
-}
-
 std::vector<std::string> split_into_codepoints(const std::string &str) {
  std::vector<std::string> result;
  std::string out_str;
--- a/native_client/ctcdecode/decoder_utils.h
+++ b/native_client/ctcdecode/decoder_utils.h
@ -89,8 +89,11 @@ std::vector<std::string> split_into_bytes(const std::string &str);
 void add_word_to_fst(const std::vector<int> &word,
                     fst::StdVectorFst *dictionary);

-// Return weather a byte is a code point boundary (not a continuation byte).
-bool byte_is_codepoint_boundary(unsigned char c);
+// Return whether a byte is a code point boundary (not a continuation byte).
+inline bool byte_is_codepoint_boundary(unsigned char c) {
+  // only continuation bytes have their most significant bits set to 10
+  return (c & 0xC0) != 0x80;
+}

 // Add a word in string to dictionary
 bool add_word_to_dictionary(