Don't add special tokens to vocabulary
This commit is contained in:
parent
3015237e8d
commit
c8802a38e7
@ -296,8 +296,10 @@ void Scorer::fill_dictionary(const std::vector<std::string>& vocabulary, bool ad
|
|||||||
fst::StdVectorFst dictionary;
|
fst::StdVectorFst dictionary;
|
||||||
// For each unigram convert to ints and put in trie
|
// For each unigram convert to ints and put in trie
|
||||||
for (const auto& word : vocabulary) {
|
for (const auto& word : vocabulary) {
|
||||||
|
if (word != START_TOKEN && word != UNK_TOKEN && word != END_TOKEN) {
|
||||||
add_word_to_dictionary(word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
|
add_word_to_dictionary(word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Simplify FST
|
/* Simplify FST
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user