Update readme.

2020-03-09 16:34:04 +01:00 · 2020-03-09 16:34:04 +01:00 · f808720b5b
commit f808720b5b
parent 9c73700ac7
2 changed files with 12 additions and 5 deletions
--- a/data/lm/README.rst
+++ b/data/lm/README.rst
@ -4,9 +4,9 @@

 * '--download_librispeech': Download the librispeech text corpus (will be downloaded to '--input_txt')
 * '--kenlm_bins path/to/bins/':  Change the path of the kenlm binaries (defaults to directory in docker container)
-* '--top_k 300000': Change the number of most frequent words
-* '--arpa_order 3': Change order of k-grams in arpa-file generation
-* '--max_arpa_memory 90%': Set maximum allowed memory usage in arpa-file generation
+* '--top_k 500000': Change the number of most frequent words
+* '--arpa_order 5': Change order of k-grams in arpa-file generation
+* '--max_arpa_memory 75%': Set maximum allowed memory usage in arpa-file generation


 .. code-block:: bash
@ -15,7 +15,13 @@


 | Generate scorer package with the above vocab-500000.txt and lm.binary files
+|
+| Optional Parameters:
+
+* '--default_alpha 0.75'
+* '--default_beta 1.85'
+* '--force_utf8 ""'

 .. code-block:: bash

-    python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --default_alpha 0.75 --default_beta 1.85 --package kenlm.scorer
+    python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab librispeech-vocab-500k.txt --package kenlm.scorer
--- a/data/lm/generate_package.py
+++ b/data/lm/generate_package.py
@ -115,7 +115,8 @@ def main():
    parser.add_argument(
        "--package",
        required=True,
-        help="Path to save scorer package.")
+        help="Path to save scorer package."
+    )
    parser.add_argument(
        "--default_alpha",
        type=float,