Don't add special tokens to vocabulary

2019-10-14 11:48:44 +02:00 · 2019-10-14 11:48:44 +02:00 · c8802a38e7
commit c8802a38e7
parent 3015237e8d
1 changed files with 3 additions and 1 deletions
--- a/native_client/ctcdecode/scorer.cpp
+++ b/native_client/ctcdecode/scorer.cpp
@ -296,8 +296,10 @@ void Scorer::fill_dictionary(const std::vector<std::string>& vocabulary, bool ad
  fst::StdVectorFst dictionary;
  // For each unigram convert to ints and put in trie
  for (const auto& word : vocabulary) {
    if (word != START_TOKEN && word != UNK_TOKEN && word != END_TOKEN) {
      add_word_to_dictionary(word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
    }
  }
  /* Simplify FST