Address review comment and add missing check for presence of scorer

This commit is contained in:
Reuben Morais 2019-11-12 21:56:42 +01:00
parent 0e6952c3a8
commit d2eb305b73
3 changed files with 23 additions and 28 deletions

View File

@ -109,27 +109,25 @@ DecoderState::next(const double *probs,
log_p = log_prob_c + prefix->score;
}
// skip scoring the space in word based LMs
PathTrie* prefix_to_score;
if (ext_scorer_->is_utf8_mode()) {
prefix_to_score = prefix_new;
} else {
prefix_to_score = prefix;
}
if (ext_scorer_ != nullptr) {
// skip scoring the space in word based LMs
PathTrie* prefix_to_score;
if (ext_scorer_->is_utf8_mode()) {
prefix_to_score = prefix_new;
} else {
prefix_to_score = prefix;
}
// check if we need to score
bool is_scoring_boundary = ext_scorer_ != nullptr &&
ext_scorer_->is_scoring_boundary(prefix_to_score, c);
// language model scoring
if (is_scoring_boundary) {
float score = 0.0;
std::vector<std::string> ngram;
ngram = ext_scorer_->make_ngram(prefix_to_score);
bool bos = ngram.size() < ext_scorer_->get_max_order();
score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha;
log_p += score;
log_p += ext_scorer_->beta;
// language model scoring
if (ext_scorer_->is_scoring_boundary(prefix_to_score, c)) {
float score = 0.0;
std::vector<std::string> ngram;
ngram = ext_scorer_->make_ngram(prefix_to_score);
bool bos = ngram.size() < ext_scorer_->get_max_order();
score = ext_scorer_->get_log_cond_prob(ngram, bos) * ext_scorer_->alpha;
log_p += score;
log_p += ext_scorer_->beta;
}
}
prefix_new->log_prob_nb_cur =

View File

@ -46,12 +46,6 @@ size_t get_utf8_str_len(const std::string &str) {
return str_len;
}
// Return weather a byte is a code point boundary (not a continuation byte).
bool byte_is_codepoint_boundary(unsigned char c) {
// only continuation bytes have their most significant bits set to 10
return (c & 0xC0) != 0x80;
}
std::vector<std::string> split_into_codepoints(const std::string &str) {
std::vector<std::string> result;
std::string out_str;

View File

@ -89,8 +89,11 @@ std::vector<std::string> split_into_bytes(const std::string &str);
void add_word_to_fst(const std::vector<int> &word,
fst::StdVectorFst *dictionary);
// Return weather a byte is a code point boundary (not a continuation byte).
bool byte_is_codepoint_boundary(unsigned char c);
// Return whether a byte is a code point boundary (not a continuation byte).
inline bool byte_is_codepoint_boundary(unsigned char c) {
// only continuation bytes have their most significant bits set to 10
return (c & 0xC0) != 0x80;
}
// Add a word in string to dictionary
bool add_word_to_dictionary(