Use os.join and kenlm parameter usage description.
This commit is contained in:
parent
e862cd41db
commit
e16b72ff28
@ -9,7 +9,9 @@ You can download the librispeech corpus with the following commands:
|
|||||||
gunzip librispeech.txt.gz
|
gunzip librispeech.txt.gz
|
||||||
|
|
||||||
|
|
||||||
Then use the `generate_lm.py` script to generate `lm.binary` and `vocab-500000.txt`.
|
| Then use the `generate_lm.py` script to generate `lm.binary` and `vocab-500000.txt`.
|
||||||
|
| If you are not using the DeepSpeech docker container, you have to build `KenLM <https://github.com/kpu/kenlm>`_ first
|
||||||
|
and then pass the build path to the script `--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/`.
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ def build_lm(args, data_lower, vocab_str):
|
|||||||
lm_path = os.path.join(args.output_dir, "lm.arpa")
|
lm_path = os.path.join(args.output_dir, "lm.arpa")
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
[
|
[
|
||||||
args.kenlm_bins + "lmplz",
|
os.path.join(args.kenlm_bins, "lmplz"),
|
||||||
"--order",
|
"--order",
|
||||||
str(args.arpa_order),
|
str(args.arpa_order),
|
||||||
"--temp_prefix",
|
"--temp_prefix",
|
||||||
@ -95,12 +95,12 @@ def build_lm(args, data_lower, vocab_str):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Filter LM using vocabulary of top 500k words
|
# Filter LM using vocabulary of top-k words
|
||||||
print("\nFiltering ARPA file using vocabulary of top-k words ...")
|
print("\nFiltering ARPA file using vocabulary of top-k words ...")
|
||||||
filtered_path = os.path.join(args.output_dir, "lm_filtered.arpa")
|
filtered_path = os.path.join(args.output_dir, "lm_filtered.arpa")
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
[
|
||||||
args.kenlm_bins + "filter",
|
os.path.join(args.kenlm_bins, "filter"),
|
||||||
"single",
|
"single",
|
||||||
"model:{}".format(lm_path),
|
"model:{}".format(lm_path),
|
||||||
filtered_path,
|
filtered_path,
|
||||||
@ -114,7 +114,7 @@ def build_lm(args, data_lower, vocab_str):
|
|||||||
binary_path = os.path.join(args.output_dir, "lm.binary")
|
binary_path = os.path.join(args.output_dir, "lm.binary")
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
[
|
[
|
||||||
args.kenlm_bins + "build_binary",
|
os.path.join(args.kenlm_bins, "build_binary"),
|
||||||
"-a",
|
"-a",
|
||||||
str(args.binary_a_bits),
|
str(args.binary_a_bits),
|
||||||
"-q",
|
"-q",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user