Merge pull request #2933 from mozilla/scorer_params

Updated alpha and beta
This commit is contained in:
Kelly Davis 2020-04-23 11:17:13 +02:00 committed by GitHub
commit ba6ff124d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 5 deletions

View File

@ -29,4 +29,4 @@ Afterwards you can use ``generate_package.py`` to generate the scorer package us
.. code-block:: bash
python3 generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt \
--package kenlm.scorer --default_alpha 0.75 --default_beta 1.85
--package kenlm.scorer --default_alpha 0.931289039105002 --default_beta 1.1834137581510284

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3ba04978fca285c34c99bf115ee61549937e422ac91def80122a767e114c035e
size 953436352
oid sha256:d0cf926ab9cab54a8a7d70003b931b2d62ebd9105ed392d1ec9c840029867799
size 953363776

View File

@ -174,8 +174,8 @@ def create_flags():
f.DEFINE_string('scorer_path', 'data/lm/kenlm.scorer', 'path to the external scorer file created with data/lm/generate_package.py')
f.DEFINE_alias('scorer', 'scorer_path')
f.DEFINE_integer('beam_width', 1024, 'beam width used in the CTC decoder when building candidate transcriptions')
f.DEFINE_float('lm_alpha', 0.75, 'the alpha hyperparameter of the CTC decoder. Language Model weight.')
f.DEFINE_float('lm_beta', 1.85, 'the beta hyperparameter of the CTC decoder. Word insertion weight.')
f.DEFINE_float('lm_alpha', 0.931289039105002, 'the alpha hyperparameter of the CTC decoder. Language Model weight.')
f.DEFINE_float('lm_beta', 1.1834137581510284, 'the beta hyperparameter of the CTC decoder. Word insertion weight.')
f.DEFINE_float('cutoff_prob', 1.0, 'only consider characters until this probability mass is reached. 1.0 = disabled.')
f.DEFINE_integer('cutoff_top_n', 300, 'only process this number of characters sorted by probability mass for each time step. If bigger than alphabet size, disabled.')