Merge pull request #1909 from coqui-ai/kenlm-dynamic
Dynamically link KenLM and distribute with packages
This commit is contained in:
commit
4b2af9ce6b
@ -26,8 +26,14 @@ package_native_client()
|
||||
win_lib="-C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so.if.lib"
|
||||
fi;
|
||||
|
||||
if [ -f "${tensorflow_dir}/bazel-bin/native_client/libkenlm.so.if.lib" ]; then
|
||||
win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so.if.lib"
|
||||
fi;
|
||||
|
||||
${TAR} --verbose -cf - \
|
||||
--transform='flags=r;s|README.coqui|KenLM_License_Info.txt|' \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so \
|
||||
${win_lib} \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \
|
||||
-C ${stt_dir}/ LICENSE \
|
||||
|
@ -52,6 +52,31 @@ OPENFST_INCLUDES_PLATFORM = select({
|
||||
"//conditions:default": ["ctcdecode/third_party/openfst-1.6.7/src/include"],
|
||||
})
|
||||
|
||||
DECODER_SOURCES = [
|
||||
"alphabet.cc",
|
||||
"alphabet.h",
|
||||
"ctcdecode/ctc_beam_search_decoder.cpp",
|
||||
"ctcdecode/ctc_beam_search_decoder.h",
|
||||
"ctcdecode/decoder_utils.cpp",
|
||||
"ctcdecode/decoder_utils.h",
|
||||
"ctcdecode/path_trie.cpp",
|
||||
"ctcdecode/path_trie.h",
|
||||
"ctcdecode/scorer.cpp",
|
||||
"ctcdecode/scorer.h",
|
||||
] + OPENFST_SOURCES_PLATFORM
|
||||
|
||||
DECODER_INCLUDES = [
|
||||
".",
|
||||
"ctcdecode/third_party/ThreadPool",
|
||||
"ctcdecode/third_party/object_pool",
|
||||
] + OPENFST_INCLUDES_PLATFORM
|
||||
|
||||
DECODER_LINKOPTS = [
|
||||
"-lm",
|
||||
"-ldl",
|
||||
"-pthread",
|
||||
]
|
||||
|
||||
LINUX_LINKOPTS = [
|
||||
"-ldl",
|
||||
"-pthread",
|
||||
@ -60,10 +85,12 @@ LINUX_LINKOPTS = [
|
||||
"-Wl,-export-dynamic",
|
||||
]
|
||||
|
||||
cc_library(
|
||||
name = "kenlm",
|
||||
tf_cc_shared_object(
|
||||
name = "libkenlm.so",
|
||||
srcs = glob([
|
||||
"kenlm/lm/*.hh",
|
||||
"kenlm/lm/*.cc",
|
||||
"kenlm/util/*.hh",
|
||||
"kenlm/util/*.cc",
|
||||
"kenlm/util/double-conversion/*.cc",
|
||||
"kenlm/util/double-conversion/*.h",
|
||||
@ -72,10 +99,25 @@ cc_library(
|
||||
"kenlm/*/*test.cc",
|
||||
"kenlm/*/*main.cc",
|
||||
],),
|
||||
copts = [
|
||||
"-std=c++11"
|
||||
] + select({
|
||||
"//tensorflow:windows": [],
|
||||
"//conditions:default": ["-fvisibility=hidden"],
|
||||
}),
|
||||
defines = ["KENLM_MAX_ORDER=6"],
|
||||
includes = ["kenlm"],
|
||||
framework_so = [],
|
||||
linkopts = [],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name="kenlm",
|
||||
hdrs = glob([
|
||||
"kenlm/lm/*.hh",
|
||||
"kenlm/util/*.hh",
|
||||
]),
|
||||
srcs = ["libkenlm.so"],
|
||||
copts = ["-std=c++11"],
|
||||
defines = ["KENLM_MAX_ORDER=6"],
|
||||
includes = ["kenlm"],
|
||||
@ -83,32 +125,11 @@ cc_library(
|
||||
|
||||
cc_library(
|
||||
name = "decoder",
|
||||
srcs = [
|
||||
"ctcdecode/ctc_beam_search_decoder.cpp",
|
||||
"ctcdecode/decoder_utils.cpp",
|
||||
"ctcdecode/decoder_utils.h",
|
||||
"ctcdecode/scorer.cpp",
|
||||
"ctcdecode/path_trie.cpp",
|
||||
"ctcdecode/path_trie.h",
|
||||
"alphabet.cc",
|
||||
] + OPENFST_SOURCES_PLATFORM,
|
||||
hdrs = [
|
||||
"ctcdecode/ctc_beam_search_decoder.h",
|
||||
"ctcdecode/scorer.h",
|
||||
"ctcdecode/decoder_utils.h",
|
||||
"alphabet.h",
|
||||
],
|
||||
includes = [
|
||||
".",
|
||||
"ctcdecode/third_party/ThreadPool",
|
||||
"ctcdecode/third_party/object_pool",
|
||||
] + OPENFST_INCLUDES_PLATFORM,
|
||||
srcs = DECODER_SOURCES,
|
||||
includes = DECODER_INCLUDES,
|
||||
deps = [":kenlm"],
|
||||
linkopts = [
|
||||
"-lm",
|
||||
"-ldl",
|
||||
"-pthread",
|
||||
],
|
||||
linkopts = DECODER_LINKOPTS,
|
||||
copts = ["-fexceptions"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
@ -130,8 +151,8 @@ cc_library(
|
||||
"tfmodelstate.h",
|
||||
"tfmodelstate.cc",
|
||||
],
|
||||
}),
|
||||
copts = tf_copts() + select({
|
||||
}) + DECODER_SOURCES,
|
||||
copts = tf_copts(allow_exceptions=True) + select({
|
||||
# -fvisibility=hidden is not required on Windows, MSCV hides all declarations by default
|
||||
"//tensorflow:windows": ["/w"],
|
||||
# -Wno-sign-compare to silent a lot of warnings from tensorflow itself,
|
||||
@ -143,16 +164,20 @@ cc_library(
|
||||
}) + select({
|
||||
"//native_client:tflite": ["-DUSE_TFLITE"],
|
||||
"//conditions:default": ["-UUSE_TFLITE"],
|
||||
}) + tflite_copts(),
|
||||
}),
|
||||
linkopts = lrt_if_needed() + select({
|
||||
"//tensorflow:macos": [],
|
||||
"//tensorflow:ios": ["-fembed-bitcode"],
|
||||
"//tensorflow:linux_x86_64": LINUX_LINKOPTS,
|
||||
"//native_client:rpi3": LINUX_LINKOPTS,
|
||||
"//native_client:rpi3-armv8": LINUX_LINKOPTS,
|
||||
"//tensorflow:windows": [],
|
||||
# Bazel is has too strong opinions about static linking, so it's
|
||||
# near impossible to get it to link a DLL against another DLL on Windows.
|
||||
# We simply force the linker option manually here as a hacky fix.
|
||||
"//tensorflow:windows": ["bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib"],
|
||||
"//conditions:default": [],
|
||||
}) + tflite_linkopts(),
|
||||
}) + tflite_linkopts() + DECODER_LINKOPTS,
|
||||
includes = DECODER_INCLUDES,
|
||||
deps = select({
|
||||
"//native_client:tflite": [
|
||||
"//tensorflow/lite/kernels:builtin_ops",
|
||||
@ -201,7 +226,7 @@ cc_library(
|
||||
],
|
||||
}) + if_cuda([
|
||||
"//tensorflow/core:core",
|
||||
]) + [":decoder"],
|
||||
]) + [":kenlm"],
|
||||
)
|
||||
|
||||
tf_cc_shared_object(
|
||||
@ -231,9 +256,13 @@ cc_binary(
|
||||
"generate_scorer_package.cpp",
|
||||
"stt_errors.cc",
|
||||
],
|
||||
copts = ["-std=c++11"],
|
||||
copts = select({
|
||||
"//tensorflow:windows": [],
|
||||
"//conditions:default": ["-std=c++11"],
|
||||
}),
|
||||
deps = [
|
||||
":decoder",
|
||||
":kenlm",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/flags:parse",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
@ -247,6 +276,10 @@ cc_binary(
|
||||
] + select({
|
||||
# ARMv7: error: Android 5.0 and later only support position-independent executables (-fPIE).
|
||||
"//tensorflow:android": ["-fPIE -pie"],
|
||||
# Bazel is has too strong opinions about static linking, so it's
|
||||
# near impossible to get it to link a DLL against another DLL on Windows.
|
||||
# We simply force the linker option manually here as a hacky fix.
|
||||
"//tensorflow:windows": ["bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
@ -263,9 +296,8 @@ cc_binary(
|
||||
cc_binary(
|
||||
name = "trie_load",
|
||||
srcs = [
|
||||
"alphabet.h",
|
||||
"trie_load.cc",
|
||||
],
|
||||
] + DECODER_SOURCES,
|
||||
copts = ["-std=c++11"],
|
||||
deps = [":decoder"],
|
||||
linkopts = DECODER_LINKOPTS,
|
||||
)
|
||||
|
@ -20,7 +20,7 @@ endif
|
||||
|
||||
STT_BIN := stt$(PLATFORM_EXE_SUFFIX)
|
||||
CFLAGS_STT := -std=c++11 -o $(STT_BIN)
|
||||
LINK_STT := -lstt
|
||||
LINK_STT := -lstt -lkenlm
|
||||
LINK_PATH_STT := -L${TFDIR}/bazel-bin/native_client
|
||||
|
||||
ifeq ($(TARGET),host)
|
||||
@ -61,7 +61,7 @@ TOOL_CC := cl.exe
|
||||
TOOL_CXX := cl.exe
|
||||
TOOL_LD := link.exe
|
||||
TOOL_LIBEXE := lib.exe
|
||||
LINK_STT := $(TFDIR)\bazel-bin\native_client\libstt.so.if.lib
|
||||
LINK_STT := $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libstt.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libkenlm.so.if.lib")
|
||||
LINK_PATH_STT :=
|
||||
CFLAGS_STT := -nologo -Fe$(STT_BIN)
|
||||
SOX_CFLAGS :=
|
||||
@ -185,7 +185,7 @@ define copy_missing_libs
|
||||
new_missing="$$( (for f in $$(otool -L $$lib 2>/dev/null | tail -n +2 | awk '{ print $$1 }' | grep -v '$$lib'); do ls -hal $$f; done;) 2>&1 | grep 'No such' | cut -d':' -f2 | xargs basename -a)"; \
|
||||
missing_libs="$$missing_libs $$new_missing"; \
|
||||
elif [ "$(OS)" = "${CI_MSYS_VERSION}" ]; then \
|
||||
missing_libs="libstt.so"; \
|
||||
missing_libs="libstt.so libkenlm.so"; \
|
||||
else \
|
||||
missing_libs="$$missing_libs $$($(LDD) $$lib | grep 'not found' | awk '{ print $$1 }')"; \
|
||||
fi; \
|
||||
|
@ -50,7 +50,7 @@ configure: stt_wrap.cxx package.json npm-dev
|
||||
PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE)
|
||||
|
||||
build: configure stt_wrap.cxx
|
||||
PATH="$(NODE_MODULES_BIN):${PATH}" NODE_PRE_GYP_ABI_CROSSWALK=$(NODE_PRE_GYP_ABI_CROSSWALK_FILE) AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DEVDIR) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE)
|
||||
PATH="$(NODE_MODULES_BIN):${PATH}" NODE_PRE_GYP_ABI_CROSSWALK=$(NODE_PRE_GYP_ABI_CROSSWALK_FILE) AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS="$(LIBS)" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DEVDIR) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE)
|
||||
|
||||
copy-deps: build
|
||||
$(call copy_missing_libs,lib/binding/*/*/*/stt.node,lib/binding/*/*/)
|
||||
|
@ -3,7 +3,7 @@
|
||||
{
|
||||
"target_name": "stt",
|
||||
"sources": ["stt_wrap.cxx"],
|
||||
"libraries": ["$(LIBS)"],
|
||||
"libraries": [],
|
||||
"include_dirs": ["../"],
|
||||
"conditions": [
|
||||
[
|
||||
@ -20,7 +20,22 @@
|
||||
],
|
||||
}
|
||||
},
|
||||
]
|
||||
],
|
||||
[
|
||||
"OS=='win'",
|
||||
{
|
||||
"libraries": [
|
||||
"../../../tensorflow/bazel-bin/native_client/libstt.so.if.lib",
|
||||
"../../../tensorflow/bazel-bin/native_client/libkenlm.so.if.lib",
|
||||
],
|
||||
},
|
||||
{
|
||||
"libraries": [
|
||||
"../../../tensorflow/bazel-bin/native_client/libstt.so",
|
||||
"../../../tensorflow/bazel-bin/native_client/libkenlm.so",
|
||||
],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -13,3 +13,84 @@ git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/do
|
||||
|
||||
Cherry-pick fix for MSVC:
|
||||
curl -vsSL https://github.com/kpu/kenlm/commit/d70e28403f07e88b276c6bd9f162d2a428530f2e.patch | git am -p1 --directory=native_client/kenlm
|
||||
|
||||
Most of the KenLM code is licensed under the LGPL. There are exceptions that
|
||||
have their own licenses, listed below. See comments in those files for more
|
||||
details.
|
||||
|
||||
util/getopt.* is getopt for Windows
|
||||
util/murmur_hash.cc
|
||||
util/string_piece.hh and util/string_piece.cc
|
||||
util/double-conversion/LICENSE covers util/double-conversion except the build files
|
||||
util/file.cc contains a modified implementation of mkstemp under the LGPL
|
||||
util/integer_to_string.* is BSD
|
||||
|
||||
For the rest:
|
||||
|
||||
KenLM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation, either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
KenLM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License 2.1
|
||||
along with KenLM code. If not, see <http://www.gnu.org/licenses/lgpl-2.1.html>.
|
||||
|
||||
|
||||
|
||||
util/double-conversion:
|
||||
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
|
||||
util/integer_to_string.*:
|
||||
|
||||
Copyright (C) 2014 Milo Yip
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
@ -23,7 +23,7 @@ extern const char *kModelNames[6];
|
||||
* If so, return true and set recognized to the type. This is the only API in
|
||||
* this header designed for use by decoder authors.
|
||||
*/
|
||||
bool RecognizeBinary(const char *file, ModelType &recognized);
|
||||
KENLM_EXPORT bool RecognizeBinary(const char *file, ModelType &recognized);
|
||||
|
||||
struct FixedWidthParameters {
|
||||
unsigned char order;
|
||||
|
@ -10,13 +10,19 @@
|
||||
|
||||
/* Configuration for ngram model. Separate header to reduce pollution. */
|
||||
|
||||
#if defined _MSC_VER
|
||||
#define KENLM_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define KENLM_EXPORT __attribute__ ((visibility("default")))
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
namespace lm {
|
||||
|
||||
class EnumerateVocab;
|
||||
|
||||
namespace ngram {
|
||||
|
||||
struct Config {
|
||||
struct KENLM_EXPORT Config {
|
||||
// EFFECTIVE FOR BOTH ARPA AND BINARY READS
|
||||
|
||||
// (default true) print progress bar to messages
|
||||
|
@ -149,7 +149,7 @@ typedef ProbingModel Model;
|
||||
/* Autorecognize the file type, load, and return the virtual base class. Don't
|
||||
* use the virtual base class if you can avoid it. Instead, use the above
|
||||
* classes as template arguments to your own virtual feature function.*/
|
||||
base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING);
|
||||
KENLM_EXPORT base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING);
|
||||
|
||||
} // namespace ngram
|
||||
} // namespace lm
|
||||
|
@ -10,9 +10,16 @@
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined _MSC_VER
|
||||
#define KENLM_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define KENLM_EXPORT __attribute__ ((visibility("default")))
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
|
||||
namespace util {
|
||||
|
||||
class scoped_fd {
|
||||
class KENLM_EXPORT scoped_fd {
|
||||
public:
|
||||
scoped_fd() : fd_(-1) {}
|
||||
|
||||
@ -82,7 +89,7 @@ class EndOfFileException : public Exception {
|
||||
class UnsupportedOSException : public Exception {};
|
||||
|
||||
// Open for read only.
|
||||
int OpenReadOrThrow(const char *name);
|
||||
KENLM_EXPORT int OpenReadOrThrow(const char *name);
|
||||
// Create file if it doesn't exist, truncate if it does. Opened for write.
|
||||
int CreateOrThrow(const char *name);
|
||||
|
||||
@ -110,7 +117,7 @@ bool OutputPathIsStdout(StringPiece path);
|
||||
|
||||
// Return value for SizeFile when it can't size properly.
|
||||
const uint64_t kBadSize = (uint64_t)-1;
|
||||
uint64_t SizeFile(int fd);
|
||||
KENLM_EXPORT uint64_t SizeFile(int fd);
|
||||
uint64_t SizeOrThrow(int fd);
|
||||
|
||||
void ResizeOrThrow(int fd, uint64_t to);
|
||||
|
Loading…
Reference in New Issue
Block a user