Use os.join and kenlm parameter usage description.
This commit is contained in:
parent
e862cd41db
commit
e16b72ff28
@ -9,7 +9,9 @@ You can download the librispeech corpus with the following commands:
|
||||
gunzip librispeech.txt.gz
|
||||
|
||||
|
||||
Then use the `generate_lm.py` script to generate `lm.binary` and `vocab-500000.txt`.
|
||||
| Then use the `generate_lm.py` script to generate `lm.binary` and `vocab-500000.txt`.
|
||||
| If you are not using the DeepSpeech docker container, you have to build `KenLM <https://github.com/kpu/kenlm>`_ first
|
||||
and then pass the build path to the script `--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/`.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
@ -79,7 +79,7 @@ def build_lm(args, data_lower, vocab_str):
|
||||
lm_path = os.path.join(args.output_dir, "lm.arpa")
|
||||
subprocess.check_call(
|
||||
[
|
||||
args.kenlm_bins + "lmplz",
|
||||
os.path.join(args.kenlm_bins, "lmplz"),
|
||||
"--order",
|
||||
str(args.arpa_order),
|
||||
"--temp_prefix",
|
||||
@ -95,12 +95,12 @@ def build_lm(args, data_lower, vocab_str):
|
||||
]
|
||||
)
|
||||
|
||||
# Filter LM using vocabulary of top 500k words
|
||||
# Filter LM using vocabulary of top-k words
|
||||
print("\nFiltering ARPA file using vocabulary of top-k words ...")
|
||||
filtered_path = os.path.join(args.output_dir, "lm_filtered.arpa")
|
||||
subprocess.run(
|
||||
[
|
||||
args.kenlm_bins + "filter",
|
||||
os.path.join(args.kenlm_bins, "filter"),
|
||||
"single",
|
||||
"model:{}".format(lm_path),
|
||||
filtered_path,
|
||||
@ -114,7 +114,7 @@ def build_lm(args, data_lower, vocab_str):
|
||||
binary_path = os.path.join(args.output_dir, "lm.binary")
|
||||
subprocess.check_call(
|
||||
[
|
||||
args.kenlm_bins + "build_binary",
|
||||
os.path.join(args.kenlm_bins, "build_binary"),
|
||||
"-a",
|
||||
str(args.binary_a_bits),
|
||||
"-q",
|
||||
|
Loading…
x
Reference in New Issue
Block a user