diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index 66cee942..27b00742 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -3,7 +3,9 @@ from __future__ import absolute_import, division, print_function import argparse import shutil +import sys +import ds_ctcdecoder from deepspeech_training.util.text import Alphabet, UTF8Alphabet from ds_ctcdecoder import Scorer, Alphabet as NativeAlphabet @@ -52,7 +54,11 @@ def create_bundle( scorer.set_alphabet(alphabet) scorer.set_utf8_mode(use_utf8) scorer.reset_params(default_alpha, default_beta) - scorer.load_lm(lm_path) + err = scorer.load_lm(lm_path) + if err != ds_ctcdecoder.DS_ERR_SCORER_NO_TRIE: + print('Error loading language model file: 0x{:X}.'.format(err)) + print('See the error codes section in https://deepspeech.readthedocs.io for a description.') + sys.exit(1) scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) scorer.save_dictionary(package_path, True) # append, not overwrite diff --git a/doc/Error-Codes.rst b/doc/Error-Codes.rst index 68d6483f..f97ae3ea 100644 --- a/doc/Error-Codes.rst +++ b/doc/Error-Codes.rst @@ -1,5 +1,9 @@ Error codes =========== -.. doxygenenum:: DeepSpeech_Error_Codes - :project: deepspeech-c +Below is the definition for all error codes used in the API, their numerical values, and a human readable description. + +.. literalinclude:: ../native_client/deepspeech.h + :language: c + :start-after: sphinx-doc: error_code_listing_start + :end-before: sphinx-doc: error_code_listing_end diff --git a/doc/examples b/doc/examples index 81a06eea..6f5f501f 160000 --- a/doc/examples +++ b/doc/examples @@ -1 +1 @@ -Subproject commit 81a06eea64d1dda734f6b97b3005b4416ac2f50a +Subproject commit 6f5f501fa62743f1b78fe162eb1a579a450bd38f diff --git a/native_client/client.cc b/native_client/client.cc index 72b1dac7..1b335955 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -398,7 +398,9 @@ main(int argc, char **argv) // sphinx-doc: c_ref_model_start int status = DS_CreateModel(model, &ctx); if (status != 0) { - fprintf(stderr, "Could not create model.\n"); + char* error = DS_ErrorCodeToErrorMessage(status); + fprintf(stderr, "Could not create model: %s\n", error); + free(error); return 1; } diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index e0282ca5..faacea9e 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -5,6 +5,12 @@ from .swigwrapper import Alphabet __version__ = swigwrapper.__version__ +# Hack: import error codes by matching on their names, as SWIG unfortunately +# does not support binding enums to Python in a scoped manner yet. +for symbol in dir(swigwrapper): + if symbol.startswith('DS_ERR_'): + globals()[symbol] = getattr(swigwrapper, symbol) + class Scorer(swigwrapper.Scorer): """Wrapper for Scorer. diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 4e0cb1d8..1834c21c 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -74,13 +74,13 @@ int Scorer::load_lm(const std::string& lm_path) // Check if file is readable to avoid KenLM throwing an exception const char* filename = lm_path.c_str(); if (access(filename, R_OK) != 0) { - return 1; + return DS_ERR_SCORER_UNREADABLE; } // Check if the file format is valid to avoid KenLM throwing an exception lm::ngram::ModelType model_type; if (!lm::ngram::RecognizeBinary(filename, model_type)) { - return 1; + return DS_ERR_SCORER_INVALID_LM; } // Load the LM @@ -97,7 +97,7 @@ int Scorer::load_lm(const std::string& lm_path) uint64_t trie_offset = language_model_->GetEndOfSearchOffset(); if (package_size <= trie_offset) { // File ends without a trie structure - return 1; + return DS_ERR_SCORER_NO_TRIE; } // Read metadata and trie from file @@ -113,7 +113,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) if (magic != MAGIC) { std::cerr << "Error: Can't parse scorer file, invalid header. Try updating " "your scorer file." << std::endl; - return 1; + return DS_ERR_SCORER_INVALID_TRIE; } int version; @@ -128,7 +128,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) std::cerr << "Downgrade your scorer file or update your version of DeepSpeech."; } std::cerr << std::endl; - return 1; + return DS_ERR_SCORER_VERSION_MISMATCH; } fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); @@ -143,7 +143,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) opt.mode = fst::FstReadOptions::MAP; opt.source = file_path; dictionary.reset(FstType::Read(fin, opt)); - return 0; + return DS_ERR_OK; } void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) diff --git a/native_client/ctcdecode/swigwrapper.i b/native_client/ctcdecode/swigwrapper.i index fd0f4f08..64ed5f5a 100644 --- a/native_client/ctcdecode/swigwrapper.i +++ b/native_client/ctcdecode/swigwrapper.i @@ -41,3 +41,9 @@ namespace std { %template(IntVector) std::vector; %template(OutputVector) std::vector; %template(OutputVectorVector) std::vector>; + +// Import only the error code enum definitions from deepspeech.h +%ignore ""; +%rename("%s", regexmatch$name="DS_ERR_") ""; +%rename("%s", regexmatch$name="DeepSpeech_Error_Codes") ""; +%include "../deepspeech.h" diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index b5b0f163..3bcecc60 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -505,37 +505,16 @@ DS_Version() char* DS_ErrorCodeToErrorMessage(int aErrorCode) { +#define RETURN_MESSAGE(NAME, VALUE, DESC) \ + case NAME: \ + return strdup(DESC); + switch(aErrorCode) { - case DS_ERR_OK: - return strdup("No error."); - case DS_ERR_NO_MODEL: - return strdup("Missing model information."); - case DS_ERR_INVALID_ALPHABET: - return strdup("Invalid alphabet embedded in model. (Data corruption?)"); - case DS_ERR_INVALID_SHAPE: - return strdup("Invalid model shape."); - case DS_ERR_INVALID_SCORER: - return strdup("Invalid scorer file."); - case DS_ERR_FAIL_INIT_MMAP: - return strdup("Failed to initialize memory mapped model."); - case DS_ERR_FAIL_INIT_SESS: - return strdup("Failed to initialize the session."); - case DS_ERR_FAIL_INTERPRETER: - return strdup("Interpreter failed."); - case DS_ERR_FAIL_RUN_SESS: - return strdup("Failed to run the session."); - case DS_ERR_FAIL_CREATE_STREAM: - return strdup("Error creating the stream."); - case DS_ERR_FAIL_READ_PROTOBUF: - return strdup("Error reading the proto buffer model file."); - case DS_ERR_FAIL_CREATE_SESS: - return strdup("Failed to create session."); - case DS_ERR_MODEL_INCOMPATIBLE: - return strdup("Incompatible model."); - case DS_ERR_SCORER_NOT_ENABLED: - return strdup("External scorer is not enabled."); + DS_FOR_EACH_ERROR(RETURN_MESSAGE) default: return strdup("Unknown error, please make sure you are using the correct native binary."); } + +#undef RETURN_MESSAGE } diff --git a/native_client/deepspeech.h b/native_client/deepspeech.h index a8c29c93..1df3cf2e 100644 --- a/native_client/deepspeech.h +++ b/native_client/deepspeech.h @@ -59,30 +59,37 @@ typedef struct Metadata { const unsigned int num_transcripts; } Metadata; +// sphinx-doc: error_code_listing_start + +#define DS_FOR_EACH_ERROR(APPLY) \ + APPLY(DS_ERR_OK, 0x0000, "No error.") \ + APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ + APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ + APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ + APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ + APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ + APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ + APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ + APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ + APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ + APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ + APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ + APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ + APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ + APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ + APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ + APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ + APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ + APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ + APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") + +// sphinx-doc: error_code_listing_end + enum DeepSpeech_Error_Codes { - // OK - DS_ERR_OK = 0x0000, - - // Missing invormations - DS_ERR_NO_MODEL = 0x1000, - - // Invalid parameters - DS_ERR_INVALID_ALPHABET = 0x2000, - DS_ERR_INVALID_SHAPE = 0x2001, - DS_ERR_INVALID_SCORER = 0x2002, - DS_ERR_MODEL_INCOMPATIBLE = 0x2003, - DS_ERR_SCORER_NOT_ENABLED = 0x2004, - - // Runtime failures - DS_ERR_FAIL_INIT_MMAP = 0x3000, - DS_ERR_FAIL_INIT_SESS = 0x3001, - DS_ERR_FAIL_INTERPRETER = 0x3002, - DS_ERR_FAIL_RUN_SESS = 0x3003, - DS_ERR_FAIL_CREATE_STREAM = 0x3004, - DS_ERR_FAIL_READ_PROTOBUF = 0x3005, - DS_ERR_FAIL_CREATE_SESS = 0x3006, - DS_ERR_FAIL_CREATE_MODEL = 0x3007, +#define DEFINE(NAME, VALUE, DESC) NAME = VALUE, +DS_FOR_EACH_ERROR(DEFINE) +#undef DEFINE }; /** diff --git a/taskcluster/pyenv-win-opt-base.tyml b/taskcluster/pyenv-win-opt-base.tyml index b49f4499..8f083581 100644 --- a/taskcluster/pyenv-win-opt-base.tyml +++ b/taskcluster/pyenv-win-opt-base.tyml @@ -33,7 +33,6 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "exit" - - .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu" - $let: taskIndexExpire: { $fromNow: '6 months' } in: > diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh index 38d9a288..7a164b07 100755 --- a/taskcluster/tc-asserts.sh +++ b/taskcluster/tc-asserts.sh @@ -367,6 +367,11 @@ run_electronjs_inference_tests() run_basic_inference_tests() { + set +e + deepspeech --model "" --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr + set -e + grep "Missing model information" ${TASKCLUSTER_TMP_DIR}/stderr + set +e phrase_pbmodel_nolm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name} --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} 2>${TASKCLUSTER_TMP_DIR}/stderr) status=$? diff --git a/taskcluster/test-win-cuda-opt-base.tyml b/taskcluster/test-win-cuda-opt-base.tyml index 0549fa8e..5fb2c91b 100644 --- a/taskcluster/test-win-cuda-opt-base.tyml +++ b/taskcluster/test-win-cuda-opt-base.tyml @@ -38,7 +38,6 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "exit" - - .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu" - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > diff --git a/taskcluster/test-win-opt-base.tyml b/taskcluster/test-win-opt-base.tyml index e95a3037..7171abff 100644 --- a/taskcluster/test-win-opt-base.tyml +++ b/taskcluster/test-win-opt-base.tyml @@ -40,7 +40,6 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "exit" - - .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu" - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index 97f3d78f..fa032622 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -40,7 +40,6 @@ payload: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "exit" - - .\msys64\usr\bin\bash.exe --login -cx "pacman --noconfirm -Syu" - echo .\msys64\usr\bin\bash.exe --login -cxe " export LC_ALL=C && export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" &&