From c29c0beb725213efc6f55a3d4f4d17ef533fbcf3 Mon Sep 17 00:00:00 2001
From: Daniel <daniel@mail.de>
Date: Wed, 8 Apr 2020 20:23:04 +0200
Subject: [PATCH] Default to required params.

---
 data/lm/README.rst     |  9 ++++++---
 data/lm/generate_lm.py | 16 ++++++++--------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/data/lm/README.rst b/data/lm/README.rst
index 2d8e5c71..130df2c3 100644
--- a/data/lm/README.rst
+++ b/data/lm/README.rst
@@ -11,15 +11,18 @@ You can download the librispeech corpus with the following commands:
 | Then use the `generate_lm.py` script to generate `lm.binary` and `vocab-500000.txt`.
 | As input you can use a `file.txt` or `file.txt.gz` with one sentence in each line.
 | If you are not using the DeepSpeech docker container, you have to build `KenLM <https://github.com/kpu/kenlm>`_ first
-  and then pass the build path to the script `--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/`.
+  and then pass the build directory to the script.
 
 .. code-block:: bash
 
-    python3 data/lm/generate_lm.py --input_txt path/to/librispeech.txt.gz --output_dir path/lm/
+    python3 data/lm/generate_lm.py --input_txt path/to/librispeech.txt.gz  --output_dir path/lm/ --top_k 500000 \
+      --kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/ --arpa_order 5 --max_arpa_memory "85%" \
+      --arpa_prune "0|0|1" --binary_a_bits 255 --binary_q_bits 8 --binary_type trie
 
 
 Afterwards you can generate the scorer package with the above vocab-500000.txt and lm.binary files
 
 .. code-block:: bash
 
-    python generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt --package kenlm.scorer --default_alpha 0.75 --default_beta 1.85
+    python3 generate_package.py --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt \
+      --package kenlm.scorer --default_alpha 0.75 --default_beta 1.85
diff --git a/data/lm/generate_lm.py b/data/lm/generate_lm.py
index 4d4944f2..52b8d731 100644
--- a/data/lm/generate_lm.py
+++ b/data/lm/generate_lm.py
@@ -144,49 +144,49 @@ def main():
         "--top_k",
         help="Use top_k most frequent words for the vocab.txt file. These will be used to filter the ARPA file.",
         type=int,
-        default=500000,
+        required=True,
     )
     parser.add_argument(
         "--kenlm_bins",
         help="File path to the KENLM binaries lmplz, filter and build_binary",
         type=str,
-        default="/DeepSpeech/native_client/kenlm/build/bin/",
+        required=True,
     )
     parser.add_argument(
         "--arpa_order",
         help="Order of k-grams in ARPA-file generation",
         type=int,
-        default=5,
+        required=True,
     )
     parser.add_argument(
         "--max_arpa_memory",
         help="Maximum allowed memory usage for ARPA-file generation",
         type=str,
-        default="75%",
+        required=True,
     )
     parser.add_argument(
         "--arpa_prune",
         help="ARPA pruning parameters. Separate values with '|'",
         type=str,
-        default="0|0|1",
+        required=True,
     )
     parser.add_argument(
         "--binary_a_bits",
         help="Build binary quantization value a in bits",
         type=int,
-        default=255,
+        required=True,
     )
     parser.add_argument(
         "--binary_q_bits",
         help="Build binary quantization value q in bits",
         type=int,
-        default=8,
+        required=True,
     )
     parser.add_argument(
         "--binary_type",
         help="Build binary data structure type",
         type=str,
-        default="trie",
+        required=True,
     )
     args = parser.parse_args()