Update readme.

This commit is contained in:
Daniel 2020-03-09 16:34:04 +01:00
parent 9c73700ac7
commit f808720b5b
2 changed files with 12 additions and 5 deletions

View File

@ -4,9 +4,9 @@
* '--download_librispeech': Download the librispeech text corpus (will be downloaded to '--input_txt')
* '--kenlm_bins path/to/bins/': Change the path of the kenlm binaries (defaults to directory in docker container)
* '--top_k 300000': Change the number of most frequent words
* '--arpa_order 3': Change order of k-grams in arpa-file generation
* '--max_arpa_memory 90%': Set maximum allowed memory usage in arpa-file generation
* '--top_k 500000': Change the number of most frequent words
* '--arpa_order 5': Change order of k-grams in arpa-file generation
* '--max_arpa_memory 75%': Set maximum allowed memory usage in arpa-file generation
.. code-block:: bash
@ -15,7 +15,13 @@
| Generate scorer package with the above vocab-500000.txt and lm.binary files
|
| Optional Parameters:
* '--default_alpha 0.75'
* '--default_beta 1.85'
* '--force_utf8 ""'
.. code-block:: bash
python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --default_alpha 0.75 --default_beta 1.85 --package kenlm.scorer
python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --package kenlm.scorer

View File

@ -115,7 +115,8 @@ def main():
parser.add_argument(
"--package",
required=True,
help="Path to save scorer package.")
help="Path to save scorer package."
)
parser.add_argument(
"--default_alpha",
type=float,