Don't add special tokens to vocabulary

This commit is contained in:
Reuben Morais 2019-10-14 11:48:44 +02:00
parent 3015237e8d
commit c8802a38e7

View File

@ -296,7 +296,9 @@ void Scorer::fill_dictionary(const std::vector<std::string>& vocabulary, bool ad
fst::StdVectorFst dictionary;
// For each unigram convert to ints and put in trie
for (const auto& word : vocabulary) {
add_word_to_dictionary(word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
if (word != START_TOKEN && word != UNK_TOKEN && word != END_TOKEN) {
add_word_to_dictionary(word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
}
}
/* Simplify FST