Merge pull request #2998 from mozilla/scorer-error

Improve error handling around Scorer (Fixes #2995 and #2996)
This commit is contained in:
Reuben Morais 2020-05-19 13:55:54 +02:00 committed by GitHub
commit 430132c5a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 76 additions and 65 deletions

View File

@ -3,7 +3,9 @@ from __future__ import absolute_import, division, print_function
import argparse
import shutil
import sys
import ds_ctcdecoder
from deepspeech_training.util.text import Alphabet, UTF8Alphabet
from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet
@ -52,7 +54,11 @@ def create_bundle(
scorer.set_alphabet(alphabet)
scorer.set_utf8_mode(use_utf8)
scorer.reset_params(default_alpha, default_beta)
scorer.load_lm(lm_path)
err = scorer.load_lm(lm_path)
if err != ds_ctcdecoder.DS_ERR_SCORER_NO_TRIE:
print('Error loading language model file: 0x{:X}.'.format(err))
print('See the error codes section in https://deepspeech.readthedocs.io for a description.')
sys.exit(1)
scorer.fill_dictionary(list(words))
shutil.copy(lm_path, package_path)
scorer.save_dictionary(package_path, True) # append, not overwrite

View File

@ -1,5 +1,9 @@
Error codes
===========
.. doxygenenum:: DeepSpeech_Error_Codes
:project: deepspeech-c
Below is the definition for all error codes used in the API, their numerical values, and a human readable description.
.. literalinclude:: ../native_client/deepspeech.h
:language: c
:start-after: sphinx-doc: error_code_listing_start
:end-before: sphinx-doc: error_code_listing_end

@ -1 +1 @@
Subproject commit 81a06eea64d1dda734f6b97b3005b4416ac2f50a
Subproject commit 6f5f501fa62743f1b78fe162eb1a579a450bd38f

View File

@ -398,7 +398,9 @@ main(int argc, char **argv)
// sphinx-doc: c_ref_model_start
int status = DS_CreateModel(model, &ctx);
if (status != 0) {
fprintf(stderr, "Could not create model.\n");
char* error = DS_ErrorCodeToErrorMessage(status);
fprintf(stderr, "Could not create model: %s\n", error);
free(error);
return 1;
}

View File

@ -5,6 +5,12 @@ from .swigwrapper import Alphabet
__version__ = swigwrapper.__version__
# Hack: import error codes by matching on their names, as SWIG unfortunately
# does not support binding enums to Python in a scoped manner yet.
for symbol in dir(swigwrapper):
if symbol.startswith('DS_ERR_'):
globals()[symbol] = getattr(swigwrapper, symbol)
class Scorer(swigwrapper.Scorer):
"""Wrapper for Scorer.

View File

@ -74,13 +74,13 @@ int Scorer::load_lm(const std::string& lm_path)
// Check if file is readable to avoid KenLM throwing an exception
const char* filename = lm_path.c_str();
if (access(filename, R_OK) != 0) {
return 1;
return DS_ERR_SCORER_UNREADABLE;
}
// Check if the file format is valid to avoid KenLM throwing an exception
lm::ngram::ModelType model_type;
if (!lm::ngram::RecognizeBinary(filename, model_type)) {
return 1;
return DS_ERR_SCORER_INVALID_LM;
}
// Load the LM
@ -97,7 +97,7 @@ int Scorer::load_lm(const std::string& lm_path)
uint64_t trie_offset = language_model_->GetEndOfSearchOffset();
if (package_size <= trie_offset) {
// File ends without a trie structure
return 1;
return DS_ERR_SCORER_NO_TRIE;
}
// Read metadata and trie from file
@ -113,7 +113,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
if (magic != MAGIC) {
std::cerr << "Error: Can't parse scorer file, invalid header. Try updating "
"your scorer file." << std::endl;
return 1;
return DS_ERR_SCORER_INVALID_TRIE;
}
int version;
@ -128,7 +128,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
std::cerr << "Downgrade your scorer file or update your version of DeepSpeech.";
}
std::cerr << std::endl;
return 1;
return DS_ERR_SCORER_VERSION_MISMATCH;
}
fin.read(reinterpret_cast<char*>(&is_utf8_mode_), sizeof(is_utf8_mode_));
@ -143,7 +143,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
opt.mode = fst::FstReadOptions::MAP;
opt.source = file_path;
dictionary.reset(FstType::Read(fin, opt));
return 0;
return DS_ERR_OK;
}
void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite)

View File

@ -41,3 +41,9 @@ namespace std {
%template(IntVector) std::vector<int>;
%template(OutputVector) std::vector<Output>;
%template(OutputVectorVector) std::vector<std::vector<Output>>;
// Import only the error code enum definitions from deepspeech.h
%ignore "";
%rename("%s", regexmatch$name="DS_ERR_") "";
%rename("%s", regexmatch$name="DeepSpeech_Error_Codes") "";
%include "../deepspeech.h"

View File

@ -505,37 +505,16 @@ DS_Version()
char*
DS_ErrorCodeToErrorMessage(int aErrorCode)
{
#define RETURN_MESSAGE(NAME, VALUE, DESC) \
case NAME: \
return strdup(DESC);
switch(aErrorCode)
{
case DS_ERR_OK:
return strdup("No error.");
case DS_ERR_NO_MODEL:
return strdup("Missing model information.");
case DS_ERR_INVALID_ALPHABET:
return strdup("Invalid alphabet embedded in model. (Data corruption?)");
case DS_ERR_INVALID_SHAPE:
return strdup("Invalid model shape.");
case DS_ERR_INVALID_SCORER:
return strdup("Invalid scorer file.");
case DS_ERR_FAIL_INIT_MMAP:
return strdup("Failed to initialize memory mapped model.");
case DS_ERR_FAIL_INIT_SESS:
return strdup("Failed to initialize the session.");
case DS_ERR_FAIL_INTERPRETER:
return strdup("Interpreter failed.");
case DS_ERR_FAIL_RUN_SESS:
return strdup("Failed to run the session.");
case DS_ERR_FAIL_CREATE_STREAM:
return strdup("Error creating the stream.");
case DS_ERR_FAIL_READ_PROTOBUF:
return strdup("Error reading the proto buffer model file.");
case DS_ERR_FAIL_CREATE_SESS:
return strdup("Failed to create session.");
case DS_ERR_MODEL_INCOMPATIBLE:
return strdup("Incompatible model.");
case DS_ERR_SCORER_NOT_ENABLED:
return strdup("External scorer is not enabled.");
DS_FOR_EACH_ERROR(RETURN_MESSAGE)
default:
return strdup("Unknown error, please make sure you are using the correct native binary.");
}
#undef RETURN_MESSAGE
}

View File

@ -59,30 +59,37 @@ typedef struct Metadata {
const unsigned int num_transcripts;
} Metadata;
// sphinx-doc: error_code_listing_start
#define DS_FOR_EACH_ERROR(APPLY) \
APPLY(DS_ERR_OK, 0x0000, "No error.") \
APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \
APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \
APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \
APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \
APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \
APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \
APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \
APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \
APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \
APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \
APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \
APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \
APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \
APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \
APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \
APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \
APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \
APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \
APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.")
// sphinx-doc: error_code_listing_end
enum DeepSpeech_Error_Codes
{
// OK
DS_ERR_OK = 0x0000,
// Missing invormations
DS_ERR_NO_MODEL = 0x1000,
// Invalid parameters
DS_ERR_INVALID_ALPHABET = 0x2000,
DS_ERR_INVALID_SHAPE = 0x2001,
DS_ERR_INVALID_SCORER = 0x2002,
DS_ERR_MODEL_INCOMPATIBLE = 0x2003,
DS_ERR_SCORER_NOT_ENABLED = 0x2004,
// Runtime failures
DS_ERR_FAIL_INIT_MMAP = 0x3000,
DS_ERR_FAIL_INIT_SESS = 0x3001,
DS_ERR_FAIL_INTERPRETER = 0x3002,
DS_ERR_FAIL_RUN_SESS = 0x3003,
DS_ERR_FAIL_CREATE_STREAM = 0x3004,
DS_ERR_FAIL_READ_PROTOBUF = 0x3005,
DS_ERR_FAIL_CREATE_SESS = 0x3006,
DS_ERR_FAIL_CREATE_MODEL = 0x3007,
#define DEFINE(NAME, VALUE, DESC) NAME = VALUE,
DS_FOR_EACH_ERROR(DEFINE)
#undef DEFINE
};
/**

View File

@ -33,7 +33,6 @@ then:
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
- .\msys64\usr\bin\bash.exe --login -cx "exit"
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
- $let:
taskIndexExpire: { $fromNow: '6 months' }
in: >

View File

@ -367,6 +367,11 @@ run_electronjs_inference_tests()
run_basic_inference_tests()
{
set +e
deepspeech --model "" --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr
set -e
grep "Missing model information" ${TASKCLUSTER_TMP_DIR}/stderr
set +e
phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr)
status=$?

View File

@ -38,7 +38,6 @@ then:
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
- .\msys64\usr\bin\bash.exe --login -cx "exit"
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
- $let:
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
in: >

View File

@ -40,7 +40,6 @@ then:
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
- .\msys64\usr\bin\bash.exe --login -cx "exit"
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
- $let:
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
in: >

View File

@ -40,7 +40,6 @@ payload:
"C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz |
"C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si
- .\msys64\usr\bin\bash.exe --login -cx "exit"
- .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu"
- echo .\msys64\usr\bin\bash.exe --login -cxe "
export LC_ALL=C &&
export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&