Merge pull request #3360 from mozilla/utf8alphabet-python-bindings
Fix binding of UTF8Alphabet class in decoder package
This commit is contained in:
commit
07fcd5bcd1
31
bin/run-tc-ldc93s1_checkpoint_bytes.sh
Executable file
31
bin/run-tc-ldc93s1_checkpoint_bytes.sh
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
ldc93s1_dir="./data/smoke_test"
|
||||||
|
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
|
||||||
|
|
||||||
|
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
|
||||||
|
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
|
||||||
|
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Force only one visible device because we have a single-sample dataset
|
||||||
|
# and when trying to run on multiple devices (like GPUs), this will break
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
|
--train_files ${ldc93s1_csv} --train_batch_size 1 \
|
||||||
|
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
|
||||||
|
--test_files ${ldc93s1_csv} --test_batch_size 1 \
|
||||||
|
--n_hidden 100 --epochs 1 \
|
||||||
|
--max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' --bytes_output_mode \
|
||||||
|
--learning_rate 0.001 --dropout_rate 0.05 \
|
||||||
|
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' | tee /tmp/resume.log
|
||||||
|
|
||||||
|
if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then
|
||||||
|
echo "Did not resume training from checkpoint"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
exit 0
|
||||||
|
fi
|
30
bin/run-tc-ldc93s1_new_bytes.sh
Executable file
30
bin/run-tc-ldc93s1_new_bytes.sh
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
ldc93s1_dir="./data/smoke_test"
|
||||||
|
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
|
||||||
|
|
||||||
|
epoch_count=$1
|
||||||
|
audio_sample_rate=$2
|
||||||
|
|
||||||
|
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
|
||||||
|
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
|
||||||
|
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Force only one visible device because we have a single-sample dataset
|
||||||
|
# and when trying to run on multiple devices (like GPUs), this will break
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
|
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||||
|
--train_files ${ldc93s1_csv} --train_batch_size 1 \
|
||||||
|
--feature_cache '/tmp/ldc93s1_cache' \
|
||||||
|
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
|
||||||
|
--test_files ${ldc93s1_csv} --test_batch_size 1 \
|
||||||
|
--n_hidden 100 --epochs $epoch_count \
|
||||||
|
--max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' \
|
||||||
|
--learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_bytes' \
|
||||||
|
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \
|
||||||
|
--audio_sample_rate ${audio_sample_rate} \
|
||||||
|
--bytes_output_mode
|
26
bin/run-tc-ldc93s1_new_bytes_tflite.sh
Executable file
26
bin/run-tc-ldc93s1_new_bytes_tflite.sh
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
ldc93s1_dir="./data/smoke_test"
|
||||||
|
ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv"
|
||||||
|
|
||||||
|
audio_sample_rate=$1
|
||||||
|
|
||||||
|
if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then
|
||||||
|
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
|
||||||
|
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Force only one visible device because we have a single-sample dataset
|
||||||
|
# and when trying to run on multiple devices (like GPUs), this will break
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
|
python -u DeepSpeech.py --noshow_progressbar \
|
||||||
|
--n_hidden 100 \
|
||||||
|
--checkpoint_dir '/tmp/ckpt_bytes' \
|
||||||
|
--export_dir '/tmp/train_bytes_tflite' \
|
||||||
|
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \
|
||||||
|
--bytes_output_mode \
|
||||||
|
--audio_sample_rate ${audio_sample_rate} \
|
||||||
|
--export_tflite
|
BIN
data/smoke_test/pruned_lm.bytes.scorer
Normal file
BIN
data/smoke_test/pruned_lm.bytes.scorer
Normal file
Binary file not shown.
3540
data/smoke_test/vocab.pruned.bytes.txt
Normal file
3540
data/smoke_test/vocab.pruned.bytes.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -42,7 +42,7 @@ Bytes output mode
|
|||||||
|
|
||||||
**Note**: Currently, Bytes output mode makes assumptions that hold for Chinese Mandarin models but do not hold for other language targets, such as not predicting spaces.
|
**Note**: Currently, Bytes output mode makes assumptions that hold for Chinese Mandarin models but do not hold for other language targets, such as not predicting spaces.
|
||||||
|
|
||||||
In bytes output mode the model predicts UTF-8 bytes directly instead of letters from an alphabet file. This idea was proposed in the paper `Bytes Are All You Need <https://arxiv.org/abs/1811.09021>`_. This mode is enabled with the ``--utf8`` flag at training and export time. At training time, the alphabet file is not used. Instead, the model is forced to have 256 labels, with labels 0-254 corresponding to UTF-8 byte values 1-255, and label 255 is used for the CTC blank symbol. If using an external scorer at decoding time, it MUST be built according to the instructions that follow.
|
In bytes output mode the model predicts UTF-8 bytes directly instead of letters from an alphabet file. This idea was proposed in the paper `Bytes Are All You Need <https://arxiv.org/abs/1811.09021>`_. This mode is enabled with the ``--bytes_output_mode`` flag at training and export time. At training time, the alphabet file is not used. Instead, the model is forced to have 256 labels, with labels 0-254 corresponding to UTF-8 byte values 1-255, and label 255 is used for the CTC blank symbol. If using an external scorer at decoding time, it MUST be built according to the instructions that follow.
|
||||||
|
|
||||||
Bytes output mode can be useful for languages with very large alphabets, such as Mandarin written with Simplified Chinese characters. It may also be useful for building multi-language models, or as a base for transfer learning. Currently these cases are untested and unsupported. Note that bytes output mode makes assumptions that hold for Mandarin written with Simplified Chinese characters and may not hold for other languages.
|
Bytes output mode can be useful for languages with very large alphabets, such as Mandarin written with Simplified Chinese characters. It may also be useful for building multi-language models, or as a base for transfer learning. Currently these cases are untested and unsupported. Note that bytes output mode makes assumptions that hold for Mandarin written with Simplified Chinese characters and may not hold for other languages.
|
||||||
|
|
||||||
@ -58,11 +58,11 @@ corresponds to the following three "words", or UTF-8 byte sequences:
|
|||||||
|
|
||||||
At decoding time, the scorer is queried every time a Unicode codepoint is predicted, instead of when a space character is predicted. From the language modeling perspective, this is a character based model. From the implementation perspective, this is a word based model, because each character is composed of multiple labels.
|
At decoding time, the scorer is queried every time a Unicode codepoint is predicted, instead of when a space character is predicted. From the language modeling perspective, this is a character based model. From the implementation perspective, this is a word based model, because each character is composed of multiple labels.
|
||||||
|
|
||||||
**Acoustic models trained with ``--utf8`` MUST NOT be used with an alphabet based scorer. Conversely, acoustic models trained with an alphabet file MUST NOT be used with a UTF-8 scorer.**
|
**Acoustic models trained with ``--bytes_output_mode`` MUST NOT be used with an alphabet based scorer. Conversely, acoustic models trained with an alphabet file MUST NOT be used with a UTF-8 scorer.**
|
||||||
|
|
||||||
UTF-8 scorers can be built by using an input corpus with space separated codepoints. If your corpus only contains single codepoints separated by spaces, ``generate_scorer_package`` should automatically enable bytes output mode, and it should print the message "Looks like a character based model."
|
UTF-8 scorers can be built by using an input corpus with space separated codepoints. If your corpus only contains single codepoints separated by spaces, ``generate_scorer_package`` should automatically enable bytes output mode, and it should print the message "Looks like a character based model."
|
||||||
|
|
||||||
If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. Bytes output mode can be forced by specifying the ``--force_utf8`` flag when running ``generate_scorer_package``, but it is NOT RECOMMENDED.
|
If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. Bytes output mode can be forced by specifying the ``--force_bytes_output_mode`` flag when running ``generate_scorer_package``, but it is NOT RECOMMENDED.
|
||||||
|
|
||||||
See :ref:`scorer-scripts` for more details on using ``generate_scorer_package``.
|
See :ref:`scorer-scripts` for more details on using ``generate_scorer_package``.
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from . import swigwrapper # pylint: disable=import-self
|
from . import swigwrapper # pylint: disable=import-self
|
||||||
from .swigwrapper import UTF8Alphabet
|
|
||||||
|
|
||||||
# This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle
|
# This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle
|
||||||
# string encoding explicitly, here and throughout this file.
|
# string encoding explicitly, here and throughout this file.
|
||||||
@ -89,6 +88,56 @@ class Alphabet(swigwrapper.Alphabet):
|
|||||||
return res.decode('utf-8')
|
return res.decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
class UTF8Alphabet(swigwrapper.UTF8Alphabet):
|
||||||
|
"""Convenience wrapper for Alphabet which calls init in the constructor"""
|
||||||
|
def __init__(self):
|
||||||
|
super(UTF8Alphabet, self).__init__()
|
||||||
|
err = self.init(b'')
|
||||||
|
if err != 0:
|
||||||
|
raise ValueError('UTF8Alphabet initialization failed with error code 0x{:X}'.format(err))
|
||||||
|
|
||||||
|
def CanEncodeSingle(self, input):
|
||||||
|
'''
|
||||||
|
Returns true if the single character/output class has a corresponding label
|
||||||
|
in the alphabet.
|
||||||
|
'''
|
||||||
|
return super(UTF8Alphabet, self).CanEncodeSingle(input.encode('utf-8'))
|
||||||
|
|
||||||
|
def CanEncode(self, input):
|
||||||
|
'''
|
||||||
|
Returns true if the entire string can be encoded into labels in this
|
||||||
|
alphabet.
|
||||||
|
'''
|
||||||
|
return super(UTF8Alphabet, self).CanEncode(input.encode('utf-8'))
|
||||||
|
|
||||||
|
def EncodeSingle(self, input):
|
||||||
|
'''
|
||||||
|
Encode a single character/output class into a label. Character must be in
|
||||||
|
the alphabet, this method will assert that. Use `CanEncodeSingle` to test.
|
||||||
|
'''
|
||||||
|
return super(UTF8Alphabet, self).EncodeSingle(input.encode('utf-8'))
|
||||||
|
|
||||||
|
def Encode(self, input):
|
||||||
|
'''
|
||||||
|
Encode a sequence of character/output classes into a sequence of labels.
|
||||||
|
Characters are assumed to always take a single Unicode codepoint.
|
||||||
|
Characters must be in the alphabet, this method will assert that. Use
|
||||||
|
`CanEncode` and `CanEncodeSingle` to test.
|
||||||
|
'''
|
||||||
|
# Convert SWIG's UnsignedIntVec to a Python list
|
||||||
|
res = super(UTF8Alphabet, self).Encode(input.encode('utf-8'))
|
||||||
|
return [el for el in res]
|
||||||
|
|
||||||
|
def DecodeSingle(self, input):
|
||||||
|
res = super(UTF8Alphabet, self).DecodeSingle(input)
|
||||||
|
return res.decode('utf-8')
|
||||||
|
|
||||||
|
def Decode(self, input):
|
||||||
|
'''Decode a sequence of labels into a string.'''
|
||||||
|
res = super(UTF8Alphabet, self).Decode(input)
|
||||||
|
return res.decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def ctc_beam_search_decoder(probs_seq,
|
def ctc_beam_search_decoder(probs_seq,
|
||||||
alphabet,
|
alphabet,
|
||||||
|
@ -20,7 +20,7 @@ create_package(absl::optional<string> alphabet_path,
|
|||||||
string lm_path,
|
string lm_path,
|
||||||
string vocab_path,
|
string vocab_path,
|
||||||
string package_path,
|
string package_path,
|
||||||
absl::optional<bool> force_utf8,
|
absl::optional<bool> force_bytes_output_mode,
|
||||||
float default_alpha,
|
float default_alpha,
|
||||||
float default_beta)
|
float default_beta)
|
||||||
{
|
{
|
||||||
@ -43,27 +43,27 @@ create_package(absl::optional<string> alphabet_path,
|
|||||||
<< (vocab_looks_char_based ? "Looks" : "Doesn't look")
|
<< (vocab_looks_char_based ? "Looks" : "Doesn't look")
|
||||||
<< " like a character based (Bytes Are All You Need) model.\n";
|
<< " like a character based (Bytes Are All You Need) model.\n";
|
||||||
|
|
||||||
if (!force_utf8.has_value()) {
|
if (!force_bytes_output_mode.has_value()) {
|
||||||
force_utf8 = vocab_looks_char_based;
|
force_bytes_output_mode = vocab_looks_char_based;
|
||||||
cerr << "--force_utf8 was not specified, using value "
|
cerr << "--force_bytes_output_mode was not specified, using value "
|
||||||
<< "infered from vocabulary contents: "
|
<< "infered from vocabulary contents: "
|
||||||
<< (vocab_looks_char_based ? "true" : "false") << "\n";
|
<< (vocab_looks_char_based ? "true" : "false") << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!force_utf8.value() && !alphabet_path.has_value()) {
|
if (!force_bytes_output_mode.value() && !alphabet_path.has_value()) {
|
||||||
cerr << "No --alphabet file specified, not using bytes output mode, can't continue.\n";
|
cerr << "No --alphabet file specified, not using bytes output mode, can't continue.\n";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Scorer scorer;
|
Scorer scorer;
|
||||||
if (force_utf8.value()) {
|
if (force_bytes_output_mode.value()) {
|
||||||
scorer.set_alphabet(UTF8Alphabet());
|
scorer.set_alphabet(UTF8Alphabet());
|
||||||
} else {
|
} else {
|
||||||
Alphabet alphabet;
|
Alphabet alphabet;
|
||||||
alphabet.init(alphabet_path->c_str());
|
alphabet.init(alphabet_path->c_str());
|
||||||
scorer.set_alphabet(alphabet);
|
scorer.set_alphabet(alphabet);
|
||||||
}
|
}
|
||||||
scorer.set_utf8_mode(force_utf8.value());
|
scorer.set_utf8_mode(force_bytes_output_mode.value());
|
||||||
scorer.reset_params(default_alpha, default_beta);
|
scorer.reset_params(default_alpha, default_beta);
|
||||||
int err = scorer.load_lm(lm_path);
|
int err = scorer.load_lm(lm_path);
|
||||||
if (err != DS_ERR_SCORER_NO_TRIE) {
|
if (err != DS_ERR_SCORER_NO_TRIE) {
|
||||||
@ -96,13 +96,13 @@ main(int argc, char** argv)
|
|||||||
po::options_description desc("Options");
|
po::options_description desc("Options");
|
||||||
desc.add_options()
|
desc.add_options()
|
||||||
("help", "show help message")
|
("help", "show help message")
|
||||||
("alphabet", po::value<string>(), "Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using UTF-8 mode.")
|
("alphabet", po::value<string>(), "Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using bytes output mode.")
|
||||||
("lm", po::value<string>(), "Path of KenLM binary LM file. Must be built without including the vocabulary (use the -v flag). See generate_lm.py for how to create a binary LM.")
|
("lm", po::value<string>(), "Path of KenLM binary LM file. Must be built without including the vocabulary (use the -v flag). See generate_lm.py for how to create a binary LM.")
|
||||||
("vocab", po::value<string>(), "Path of vocabulary file. Must contain words separated by whitespace.")
|
("vocab", po::value<string>(), "Path of vocabulary file. Must contain words separated by whitespace.")
|
||||||
("package", po::value<string>(), "Path to save scorer package.")
|
("package", po::value<string>(), "Path to save scorer package.")
|
||||||
("default_alpha", po::value<float>(), "Default value of alpha hyperparameter (float).")
|
("default_alpha", po::value<float>(), "Default value of alpha hyperparameter (float).")
|
||||||
("default_beta", po::value<float>(), "Default value of beta hyperparameter (float).")
|
("default_beta", po::value<float>(), "Default value of beta hyperparameter (float).")
|
||||||
("force_utf8", po::value<bool>(), "Boolean flag, force set or unset UTF-8 mode in the scorer package. If not set, infers from the vocabulary. See <https://deepspeech.readthedocs.io/en/master/Decoder.html#utf-8-mode> for further explanation.")
|
("force_bytes_output_mode", po::value<bool>(), "Boolean flag, force set or unset bytes output mode in the scorer package. If not set, infers from the vocabulary. See <https://deepspeech.readthedocs.io/en/master/Decoder.html#bytes-output-mode> for further explanation.")
|
||||||
;
|
;
|
||||||
|
|
||||||
po::variables_map vm;
|
po::variables_map vm;
|
||||||
@ -122,10 +122,10 @@ main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse optional --force_utf8
|
// Parse optional --force_bytes_output_mode
|
||||||
absl::optional<bool> force_utf8 = absl::nullopt;
|
absl::optional<bool> force_bytes_output_mode = absl::nullopt;
|
||||||
if (vm.count("force_utf8")) {
|
if (vm.count("force_bytes_output_mode")) {
|
||||||
force_utf8 = vm["force_utf8"].as<bool>();
|
force_bytes_output_mode = vm["force_bytes_output_mode"].as<bool>();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse optional --alphabet
|
// Parse optional --alphabet
|
||||||
@ -138,7 +138,7 @@ main(int argc, char** argv)
|
|||||||
vm["lm"].as<string>(),
|
vm["lm"].as<string>(),
|
||||||
vm["vocab"].as<string>(),
|
vm["vocab"].as<string>(),
|
||||||
vm["package"].as<string>(),
|
vm["package"].as<string>(),
|
||||||
force_utf8,
|
force_bytes_output_mode,
|
||||||
vm["default_alpha"].as<float>(),
|
vm["default_alpha"].as<float>(),
|
||||||
vm["default_beta"].as<float>());
|
vm["default_beta"].as<float>());
|
||||||
|
|
||||||
|
@ -98,6 +98,7 @@ download_data()
|
|||||||
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
|
${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}"
|
||||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
|
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/
|
||||||
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer
|
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer
|
||||||
|
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.bytes.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.bytes.scorer
|
||||||
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
|
cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources
|
||||||
}
|
}
|
||||||
|
|
||||||
|
16
taskcluster/tc-cpp-bytes-ds-tests.sh
Normal file
16
taskcluster/tc-cpp-bytes-ds-tests.sh
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
source $(dirname "$0")/tc-tests-utils.sh
|
||||||
|
|
||||||
|
bitrate=$1
|
||||||
|
set_ldc_sample_filename "${bitrate}"
|
||||||
|
|
||||||
|
download_material "${TASKCLUSTER_TMP_DIR}/ds"
|
||||||
|
|
||||||
|
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
|
||||||
|
|
||||||
|
# Bytes output mode with LDC93S1 takes too long to converge so we simply test
|
||||||
|
# that loading the model won't crash
|
||||||
|
check_versions
|
@ -54,10 +54,30 @@ pushd ${HOME}/DeepSpeech/ds/
|
|||||||
|
|
||||||
# Test --metrics_files training argument
|
# Test --metrics_files training argument
|
||||||
time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}"
|
time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}"
|
||||||
|
|
||||||
|
# Test training with bytes output mode
|
||||||
|
time ./bin/run-tc-ldc93s1_new_bytes.sh 200 "${sample_rate}"
|
||||||
|
time ./bin/run-tc-ldc93s1_new_bytes_tflite.sh "${sample_rate}"
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
# Save exported model artifacts from bytes output mode training
|
||||||
|
cp /tmp/train_bytes/output_graph.pb ${TASKCLUSTER_ARTIFACTS}/output_graph.pb
|
||||||
|
cp /tmp/train_bytes_tflite/output_graph.tflite ${TASKCLUSTER_ARTIFACTS}/output_graph.tflite
|
||||||
|
|
||||||
pushd ${HOME}/DeepSpeech/ds/
|
pushd ${HOME}/DeepSpeech/ds/
|
||||||
|
python util/taskcluster.py --source tensorflow --artifact convert_graphdef_memmapped_format --branch r1.15 --target /tmp/
|
||||||
|
popd
|
||||||
|
|
||||||
|
/tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train_bytes/output_graph.pb --out_graph=/tmp/train_bytes/output_graph.pbmm
|
||||||
|
cp /tmp/train_bytes/output_graph.pbmm ${TASKCLUSTER_ARTIFACTS}
|
||||||
|
|
||||||
|
# Test resuming from checkpoints created above
|
||||||
|
pushd ${HOME}/DeepSpeech/ds/
|
||||||
|
# SDB, resuming from checkpoint
|
||||||
time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh
|
time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh
|
||||||
|
|
||||||
|
# Bytes output mode, resuming from checkpoint
|
||||||
|
time ./bin/run-tc-ldc93s1_checkpoint_bytes.sh
|
||||||
popd
|
popd
|
||||||
|
|
||||||
virtualenv_deactivate "${pyalias}" "deepspeech"
|
virtualenv_deactivate "${pyalias}" "deepspeech"
|
||||||
|
12
taskcluster/test-cpp_16k_bytes-darwin-amd64-opt.yml
Normal file
12
taskcluster/test-cpp_16k_bytes-darwin-amd64-opt.yml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
build:
|
||||||
|
template_file: test-darwin-opt-base.tyml
|
||||||
|
dependencies:
|
||||||
|
- "darwin-amd64-cpu-opt"
|
||||||
|
- "test-training-extra_16k-linux-amd64-py36m-opt"
|
||||||
|
- "homebrew_tests-darwin-amd64"
|
||||||
|
test_model_task: "test-training-extra_16k-linux-amd64-py36m-opt"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-cpp-bytes-ds-tests.sh 16k"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech OSX AMD64 CPU C++ tests (Bytes Output Model, 16kHz)"
|
||||||
|
description: "Testing DeepSpeech C++ for OSX/AMD64, CPU only, optimized version (Bytes Output Model, 16kHz)"
|
12
taskcluster/test-cpp_16k_bytes-linux-amd64-opt.yml
Normal file
12
taskcluster/test-cpp_16k_bytes-linux-amd64-opt.yml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
build:
|
||||||
|
template_file: test-linux-opt-base.tyml
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-cpu-opt"
|
||||||
|
- "test-training-extra_16k-linux-amd64-py36m-opt"
|
||||||
|
test_model_task: "test-training-extra_16k-linux-amd64-py36m-opt"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-cpp-bytes-ds-tests.sh 16k"
|
||||||
|
workerType: "${docker.dsTests}"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech Linux AMD64 CPU C++ tests (Bytes Output Model, 16kHz)"
|
||||||
|
description: "Testing DeepSpeech C++ for Linux/AMD64, CPU only, optimized version (Bytes Output Model, 16kHz)"
|
@ -83,7 +83,7 @@ def initialize_globals():
|
|||||||
if not c.available_devices:
|
if not c.available_devices:
|
||||||
c.available_devices = [c.cpu_device]
|
c.available_devices = [c.cpu_device]
|
||||||
|
|
||||||
if FLAGS.utf8:
|
if FLAGS.bytes_output_mode:
|
||||||
c.alphabet = UTF8Alphabet()
|
c.alphabet = UTF8Alphabet()
|
||||||
else:
|
else:
|
||||||
c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path))
|
c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path))
|
||||||
|
@ -72,7 +72,7 @@ def calculate_and_print_report(wav_filenames, labels, decodings, losses, dataset
|
|||||||
samples.sort(key=lambda s: s.loss, reverse=True)
|
samples.sort(key=lambda s: s.loss, reverse=True)
|
||||||
|
|
||||||
# Then order by ascending WER/CER
|
# Then order by ascending WER/CER
|
||||||
if FLAGS.utf8:
|
if FLAGS.bytes_output_mode:
|
||||||
samples.sort(key=lambda s: s.cer)
|
samples.sort(key=lambda s: s.cer)
|
||||||
else:
|
else:
|
||||||
samples.sort(key=lambda s: s.wer)
|
samples.sort(key=lambda s: s.wer)
|
||||||
|
@ -156,7 +156,7 @@ def create_flags():
|
|||||||
|
|
||||||
# Decoder
|
# Decoder
|
||||||
|
|
||||||
f.DEFINE_boolean('utf8', False, 'enable UTF-8 mode. When this is used the model outputs UTF-8 sequences directly rather than using an alphabet mapping.')
|
f.DEFINE_boolean('bytes_output_mode', False, 'enable Bytes Output Mode mode. When this is used the model outputs UTF-8 byte values directly rather than using an alphabet mapping. The --alphabet_config_path option will be ignored. See the training documentation for more details.')
|
||||||
f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
|
f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
|
||||||
f.DEFINE_string('scorer_path', '', 'path to the external scorer file.')
|
f.DEFINE_string('scorer_path', '', 'path to the external scorer file.')
|
||||||
f.DEFINE_alias('scorer', 'scorer_path')
|
f.DEFINE_alias('scorer', 'scorer_path')
|
||||||
|
Loading…
Reference in New Issue
Block a user