Merge pull request #2998 from mozilla/scorer-error
Improve error handling around Scorer (Fixes #2995 and #2996)
This commit is contained in:
commit
430132c5a5
@ -3,7 +3,9 @@ from __future__ import absolute_import, division, print_function
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
import ds_ctcdecoder
|
||||
from deepspeech_training.util.text import Alphabet, UTF8Alphabet
|
||||
from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet
|
||||
|
||||
@ -52,7 +54,11 @@ def create_bundle(
|
||||
scorer.set_alphabet(alphabet)
|
||||
scorer.set_utf8_mode(use_utf8)
|
||||
scorer.reset_params(default_alpha, default_beta)
|
||||
scorer.load_lm(lm_path)
|
||||
err = scorer.load_lm(lm_path)
|
||||
if err != ds_ctcdecoder.DS_ERR_SCORER_NO_TRIE:
|
||||
print('Error loading language model file: 0x{:X}.'.format(err))
|
||||
print('See the error codes section in https://deepspeech.readthedocs.io for a description.')
|
||||
sys.exit(1)
|
||||
scorer.fill_dictionary(list(words))
|
||||
shutil.copy(lm_path, package_path)
|
||||
scorer.save_dictionary(package_path, True) # append, not overwrite
|
||||
|
@ -1,5 +1,9 @@
|
||||
Error codes
|
||||
===========
|
||||
|
||||
.. doxygenenum:: DeepSpeech_Error_Codes
|
||||
:project: deepspeech-c
|
||||
Below is the definition for all error codes used in the API, their numerical values, and a human readable description.
|
||||
|
||||
.. literalinclude:: ../native_client/deepspeech.h
|
||||
:language: c
|
||||
:start-after: sphinx-doc: error_code_listing_start
|
||||
:end-before: sphinx-doc: error_code_listing_end
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 81a06eea64d1dda734f6b97b3005b4416ac2f50a
|
||||
Subproject commit 6f5f501fa62743f1b78fe162eb1a579a450bd38f
|
@ -398,7 +398,9 @@ main(int argc, char **argv)
|
||||
// sphinx-doc: c_ref_model_start
|
||||
int status = DS_CreateModel(model, &ctx);
|
||||
if (status != 0) {
|
||||
fprintf(stderr, "Could not create model.\n");
|
||||
char* error = DS_ErrorCodeToErrorMessage(status);
|
||||
fprintf(stderr, "Could not create model: %s\n", error);
|
||||
free(error);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,12 @@ from .swigwrapper import Alphabet
|
||||
|
||||
__version__ = swigwrapper.__version__
|
||||
|
||||
# Hack: import error codes by matching on their names, as SWIG unfortunately
|
||||
# does not support binding enums to Python in a scoped manner yet.
|
||||
for symbol in dir(swigwrapper):
|
||||
if symbol.startswith('DS_ERR_'):
|
||||
globals()[symbol] = getattr(swigwrapper, symbol)
|
||||
|
||||
class Scorer(swigwrapper.Scorer):
|
||||
"""Wrapper for Scorer.
|
||||
|
||||
|
@ -74,13 +74,13 @@ int Scorer::load_lm(const std::string& lm_path)
|
||||
// Check if file is readable to avoid KenLM throwing an exception
|
||||
const char* filename = lm_path.c_str();
|
||||
if (access(filename, R_OK) != 0) {
|
||||
return 1;
|
||||
return DS_ERR_SCORER_UNREADABLE;
|
||||
}
|
||||
|
||||
// Check if the file format is valid to avoid KenLM throwing an exception
|
||||
lm::ngram::ModelType model_type;
|
||||
if (!lm::ngram::RecognizeBinary(filename, model_type)) {
|
||||
return 1;
|
||||
return DS_ERR_SCORER_INVALID_LM;
|
||||
}
|
||||
|
||||
// Load the LM
|
||||
@ -97,7 +97,7 @@ int Scorer::load_lm(const std::string& lm_path)
|
||||
uint64_t trie_offset = language_model_->GetEndOfSearchOffset();
|
||||
if (package_size <= trie_offset) {
|
||||
// File ends without a trie structure
|
||||
return 1;
|
||||
return DS_ERR_SCORER_NO_TRIE;
|
||||
}
|
||||
|
||||
// Read metadata and trie from file
|
||||
@ -113,7 +113,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
|
||||
if (magic != MAGIC) {
|
||||
std::cerr << "Error: Can't parse scorer file, invalid header. Try updating "
|
||||
"your scorer file." << std::endl;
|
||||
return 1;
|
||||
return DS_ERR_SCORER_INVALID_TRIE;
|
||||
}
|
||||
|
||||
int version;
|
||||
@ -128,7 +128,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
|
||||
std::cerr << "Downgrade your scorer file or update your version of DeepSpeech.";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
return 1;
|
||||
return DS_ERR_SCORER_VERSION_MISMATCH;
|
||||
}
|
||||
|
||||
fin.read(reinterpret_cast<char*>(&is_utf8_mode_), sizeof(is_utf8_mode_));
|
||||
@ -143,7 +143,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
|
||||
opt.mode = fst::FstReadOptions::MAP;
|
||||
opt.source = file_path;
|
||||
dictionary.reset(FstType::Read(fin, opt));
|
||||
return 0;
|
||||
return DS_ERR_OK;
|
||||
}
|
||||
|
||||
void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite)
|
||||
|
@ -41,3 +41,9 @@ namespace std {
|
||||
%template(IntVector) std::vector<int>;
|
||||
%template(OutputVector) std::vector<Output>;
|
||||
%template(OutputVectorVector) std::vector<std::vector<Output>>;
|
||||
|
||||
// Import only the error code enum definitions from deepspeech.h
|
||||
%ignore "";
|
||||
%rename("%s", regexmatch$name="DS_ERR_") "";
|
||||
%rename("%s", regexmatch$name="DeepSpeech_Error_Codes") "";
|
||||
%include "../deepspeech.h"
|
||||
|
@ -505,37 +505,16 @@ DS_Version()
|
||||
char*
|
||||
DS_ErrorCodeToErrorMessage(int aErrorCode)
|
||||
{
|
||||
#define RETURN_MESSAGE(NAME, VALUE, DESC) \
|
||||
case NAME: \
|
||||
return strdup(DESC);
|
||||
|
||||
switch(aErrorCode)
|
||||
{
|
||||
case DS_ERR_OK:
|
||||
return strdup("No error.");
|
||||
case DS_ERR_NO_MODEL:
|
||||
return strdup("Missing model information.");
|
||||
case DS_ERR_INVALID_ALPHABET:
|
||||
return strdup("Invalid alphabet embedded in model. (Data corruption?)");
|
||||
case DS_ERR_INVALID_SHAPE:
|
||||
return strdup("Invalid model shape.");
|
||||
case DS_ERR_INVALID_SCORER:
|
||||
return strdup("Invalid scorer file.");
|
||||
case DS_ERR_FAIL_INIT_MMAP:
|
||||
return strdup("Failed to initialize memory mapped model.");
|
||||
case DS_ERR_FAIL_INIT_SESS:
|
||||
return strdup("Failed to initialize the session.");
|
||||
case DS_ERR_FAIL_INTERPRETER:
|
||||
return strdup("Interpreter failed.");
|
||||
case DS_ERR_FAIL_RUN_SESS:
|
||||
return strdup("Failed to run the session.");
|
||||
case DS_ERR_FAIL_CREATE_STREAM:
|
||||
return strdup("Error creating the stream.");
|
||||
case DS_ERR_FAIL_READ_PROTOBUF:
|
||||
return strdup("Error reading the proto buffer model file.");
|
||||
case DS_ERR_FAIL_CREATE_SESS:
|
||||
return strdup("Failed to create session.");
|
||||
case DS_ERR_MODEL_INCOMPATIBLE:
|
||||
return strdup("Incompatible model.");
|
||||
case DS_ERR_SCORER_NOT_ENABLED:
|
||||
return strdup("External scorer is not enabled.");
|
||||
DS_FOR_EACH_ERROR(RETURN_MESSAGE)
|
||||
default:
|
||||
return strdup("Unknown error, please make sure you are using the correct native binary.");
|
||||
}
|
||||
|
||||
#undef RETURN_MESSAGE
|
||||
}
|
||||
|
@ -59,30 +59,37 @@ typedef struct Metadata {
|
||||
const unsigned int num_transcripts;
|
||||
} Metadata;
|
||||
|
||||
// sphinx-doc: error_code_listing_start
|
||||
|
||||
#define DS_FOR_EACH_ERROR(APPLY) \
|
||||
APPLY(DS_ERR_OK, 0x0000, "No error.") \
|
||||
APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \
|
||||
APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \
|
||||
APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \
|
||||
APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \
|
||||
APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \
|
||||
APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \
|
||||
APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \
|
||||
APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \
|
||||
APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \
|
||||
APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \
|
||||
APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \
|
||||
APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \
|
||||
APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \
|
||||
APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \
|
||||
APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \
|
||||
APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \
|
||||
APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \
|
||||
APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \
|
||||
APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.")
|
||||
|
||||
// sphinx-doc: error_code_listing_end
|
||||
|
||||
enum DeepSpeech_Error_Codes
|
||||
{
|
||||
// OK
|
||||
DS_ERR_OK = 0x0000,
|
||||
|
||||
// Missing invormations
|
||||
DS_ERR_NO_MODEL = 0x1000,
|
||||
|
||||
// Invalid parameters
|
||||
DS_ERR_INVALID_ALPHABET = 0x2000,
|
||||
DS_ERR_INVALID_SHAPE = 0x2001,
|
||||
DS_ERR_INVALID_SCORER = 0x2002,
|
||||
DS_ERR_MODEL_INCOMPATIBLE = 0x2003,
|
||||
DS_ERR_SCORER_NOT_ENABLED = 0x2004,
|
||||
|
||||
// Runtime failures
|
||||
DS_ERR_FAIL_INIT_MMAP = 0x3000,
|
||||
DS_ERR_FAIL_INIT_SESS = 0x3001,
|
||||
DS_ERR_FAIL_INTERPRETER = 0x3002,
|
||||
DS_ERR_FAIL_RUN_SESS = 0x3003,
|
||||
DS_ERR_FAIL_CREATE_STREAM = 0x3004,
|
||||
DS_ERR_FAIL_READ_PROTOBUF = 0x3005,
|
||||
DS_ERR_FAIL_CREATE_SESS = 0x3006,
|
||||
DS_ERR_FAIL_CREATE_MODEL = 0x3007,
|
||||
#define DEFINE(NAME, VALUE, DESC) NAME = VALUE,
|
||||
DS_FOR_EACH_ERROR(DEFINE)
|
||||
#undef DEFINE
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -33,7 +33,6 @@ then:
|
||||
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
|
||||
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "exit"
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
|
||||
- $let:
|
||||
taskIndexExpire: { $fromNow: '6 months' }
|
||||
in: >
|
||||
|
@ -367,6 +367,11 @@ run_electronjs_inference_tests()
|
||||
|
||||
run_basic_inference_tests()
|
||||
{
|
||||
set +e
|
||||
deepspeech --model "" --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr
|
||||
set -e
|
||||
grep "Missing model information" ${TASKCLUSTER_TMP_DIR}/stderr
|
||||
|
||||
set +e
|
||||
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr)
|
||||
status=$?
|
||||
|
@ -38,7 +38,6 @@ then:
|
||||
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
|
||||
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "exit"
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
|
||||
- $let:
|
||||
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
|
||||
in: >
|
||||
|
@ -40,7 +40,6 @@ then:
|
||||
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
|
||||
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "exit"
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
|
||||
- $let:
|
||||
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
|
||||
in: >
|
||||
|
@ -40,7 +40,6 @@ payload:
|
||||
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
|
||||
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "exit"
|
||||
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
|
||||
- echo .\msys64\usr\bin\bash.exe --login -cxe "
|
||||
export LC_ALL=C &&
|
||||
export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&
|
||||
|
Loading…
x
Reference in New Issue
Block a user