diff --git a/ci_scripts/package-utils.sh b/ci_scripts/package-utils.sh index 3330cf4e..9fdef33d 100755 --- a/ci_scripts/package-utils.sh +++ b/ci_scripts/package-utils.sh @@ -26,8 +26,14 @@ package_native_client() win_lib="-C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so.if.lib" fi; + if [ -f "${tensorflow_dir}/bazel-bin/native_client/libkenlm.so.if.lib" ]; then + win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so.if.lib" + fi; + ${TAR} --verbose -cf - \ + --transform='flags=r;s|README.coqui|KenLM_License_Info.txt|' \ -C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so \ + -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so \ ${win_lib} \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${stt_dir}/ LICENSE \ diff --git a/native_client/BUILD b/native_client/BUILD index 2600ecd5..d11a55bf 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -52,6 +52,31 @@ OPENFST_INCLUDES_PLATFORM = select({ "//conditions:default": ["ctcdecode/third_party/openfst-1.6.7/src/include"], }) +DECODER_SOURCES = [ + "alphabet.cc", + "alphabet.h", + "ctcdecode/ctc_beam_search_decoder.cpp", + "ctcdecode/ctc_beam_search_decoder.h", + "ctcdecode/decoder_utils.cpp", + "ctcdecode/decoder_utils.h", + "ctcdecode/path_trie.cpp", + "ctcdecode/path_trie.h", + "ctcdecode/scorer.cpp", + "ctcdecode/scorer.h", +] + OPENFST_SOURCES_PLATFORM + +DECODER_INCLUDES = [ + ".", + "ctcdecode/third_party/ThreadPool", + "ctcdecode/third_party/object_pool", +] + OPENFST_INCLUDES_PLATFORM + +DECODER_LINKOPTS = [ + "-lm", + "-ldl", + "-pthread", +] + LINUX_LINKOPTS = [ "-ldl", "-pthread", @@ -60,10 +85,12 @@ LINUX_LINKOPTS = [ "-Wl,-export-dynamic", ] -cc_library( - name = "kenlm", +tf_cc_shared_object( + name = "libkenlm.so", srcs = glob([ + "kenlm/lm/*.hh", "kenlm/lm/*.cc", + "kenlm/util/*.hh", "kenlm/util/*.cc", "kenlm/util/double-conversion/*.cc", "kenlm/util/double-conversion/*.h", @@ -72,10 +99,25 @@ cc_library( "kenlm/*/*test.cc", "kenlm/*/*main.cc", ],), + copts = [ + "-std=c++11" + ] + select({ + "//tensorflow:windows": [], + "//conditions:default": ["-fvisibility=hidden"], + }), + defines = ["KENLM_MAX_ORDER=6"], + includes = ["kenlm"], + framework_so = [], + linkopts = [], +) + +cc_library( + name="kenlm", hdrs = glob([ "kenlm/lm/*.hh", "kenlm/util/*.hh", ]), + srcs = ["libkenlm.so"], copts = ["-std=c++11"], defines = ["KENLM_MAX_ORDER=6"], includes = ["kenlm"], @@ -83,32 +125,11 @@ cc_library( cc_library( name = "decoder", - srcs = [ - "ctcdecode/ctc_beam_search_decoder.cpp", - "ctcdecode/decoder_utils.cpp", - "ctcdecode/decoder_utils.h", - "ctcdecode/scorer.cpp", - "ctcdecode/path_trie.cpp", - "ctcdecode/path_trie.h", - "alphabet.cc", - ] + OPENFST_SOURCES_PLATFORM, - hdrs = [ - "ctcdecode/ctc_beam_search_decoder.h", - "ctcdecode/scorer.h", - "ctcdecode/decoder_utils.h", - "alphabet.h", - ], - includes = [ - ".", - "ctcdecode/third_party/ThreadPool", - "ctcdecode/third_party/object_pool", - ] + OPENFST_INCLUDES_PLATFORM, + srcs = DECODER_SOURCES, + includes = DECODER_INCLUDES, deps = [":kenlm"], - linkopts = [ - "-lm", - "-ldl", - "-pthread", - ], + linkopts = DECODER_LINKOPTS, + copts = ["-fexceptions"], ) cc_library( @@ -130,8 +151,8 @@ cc_library( "tfmodelstate.h", "tfmodelstate.cc", ], - }), - copts = tf_copts() + select({ + }) + DECODER_SOURCES, + copts = tf_copts(allow_exceptions=True) + select({ # -fvisibility=hidden is not required on Windows, MSCV hides all declarations by default "//tensorflow:windows": ["/w"], # -Wno-sign-compare to silent a lot of warnings from tensorflow itself, @@ -143,16 +164,20 @@ cc_library( }) + select({ "//native_client:tflite": ["-DUSE_TFLITE"], "//conditions:default": ["-UUSE_TFLITE"], - }) + tflite_copts(), + }), linkopts = lrt_if_needed() + select({ "//tensorflow:macos": [], "//tensorflow:ios": ["-fembed-bitcode"], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, "//native_client:rpi3": LINUX_LINKOPTS, "//native_client:rpi3-armv8": LINUX_LINKOPTS, - "//tensorflow:windows": [], + # Bazel is has too strong opinions about static linking, so it's + # near impossible to get it to link a DLL against another DLL on Windows. + # We simply force the linker option manually here as a hacky fix. + "//tensorflow:windows": ["bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib"], "//conditions:default": [], - }) + tflite_linkopts(), + }) + tflite_linkopts() + DECODER_LINKOPTS, + includes = DECODER_INCLUDES, deps = select({ "//native_client:tflite": [ "//tensorflow/lite/kernels:builtin_ops", @@ -201,7 +226,7 @@ cc_library( ], }) + if_cuda([ "//tensorflow/core:core", - ]) + [":decoder"], + ]) + [":kenlm"], ) tf_cc_shared_object( @@ -231,9 +256,13 @@ cc_binary( "generate_scorer_package.cpp", "stt_errors.cc", ], - copts = ["-std=c++11"], + copts = select({ + "//tensorflow:windows": [], + "//conditions:default": ["-std=c++11"], + }), deps = [ ":decoder", + ":kenlm", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/flags:parse", "@com_google_absl//absl/types:optional", @@ -247,6 +276,10 @@ cc_binary( ] + select({ # ARMv7: error: Android 5.0 and later only support position-independent executables (-fPIE). "//tensorflow:android": ["-fPIE -pie"], + # Bazel is has too strong opinions about static linking, so it's + # near impossible to get it to link a DLL against another DLL on Windows. + # We simply force the linker option manually here as a hacky fix. + "//tensorflow:windows": ["bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib"], "//conditions:default": [], }), ) @@ -263,9 +296,8 @@ cc_binary( cc_binary( name = "trie_load", srcs = [ - "alphabet.h", "trie_load.cc", - ], + ] + DECODER_SOURCES, copts = ["-std=c++11"], - deps = [":decoder"], + linkopts = DECODER_LINKOPTS, ) diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 8efa65f2..9280614e 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -20,7 +20,7 @@ endif STT_BIN := stt$(PLATFORM_EXE_SUFFIX) CFLAGS_STT := -std=c++11 -o $(STT_BIN) -LINK_STT := -lstt +LINK_STT := -lstt -lkenlm LINK_PATH_STT := -L${TFDIR}/bazel-bin/native_client ifeq ($(TARGET),host) @@ -61,7 +61,7 @@ TOOL_CC := cl.exe TOOL_CXX := cl.exe TOOL_LD := link.exe TOOL_LIBEXE := lib.exe -LINK_STT := $(TFDIR)\bazel-bin\native_client\libstt.so.if.lib +LINK_STT := $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libstt.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libkenlm.so.if.lib") LINK_PATH_STT := CFLAGS_STT := -nologo -Fe$(STT_BIN) SOX_CFLAGS := @@ -185,7 +185,7 @@ define copy_missing_libs new_missing="$$( (for f in $$(otool -L $$lib 2>/dev/null | tail -n +2 | awk '{ print $$1 }' | grep -v '$$lib'); do ls -hal $$f; done;) 2>&1 | grep 'No such' | cut -d':' -f2 | xargs basename -a)"; \ missing_libs="$$missing_libs $$new_missing"; \ elif [ "$(OS)" = "${CI_MSYS_VERSION}" ]; then \ - missing_libs="libstt.so"; \ + missing_libs="libstt.so libkenlm.so"; \ else \ missing_libs="$$missing_libs $$($(LDD) $$lib | grep 'not found' | awk '{ print $$1 }')"; \ fi; \ diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index d71e12a0..c4d1dd82 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -50,7 +50,7 @@ configure: stt_wrap.cxx package.json npm-dev PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE) build: configure stt_wrap.cxx - PATH="$(NODE_MODULES_BIN):${PATH}" NODE_PRE_GYP_ABI_CROSSWALK=$(NODE_PRE_GYP_ABI_CROSSWALK_FILE) AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DEVDIR) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE) + PATH="$(NODE_MODULES_BIN):${PATH}" NODE_PRE_GYP_ABI_CROSSWALK=$(NODE_PRE_GYP_ABI_CROSSWALK_FILE) AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS="$(LIBS)" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DEVDIR) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE) copy-deps: build $(call copy_missing_libs,lib/binding/*/*/*/stt.node,lib/binding/*/*/) diff --git a/native_client/javascript/binding.gyp b/native_client/javascript/binding.gyp index 21124cc8..91d9d19a 100644 --- a/native_client/javascript/binding.gyp +++ b/native_client/javascript/binding.gyp @@ -3,7 +3,7 @@ { "target_name": "stt", "sources": ["stt_wrap.cxx"], - "libraries": ["$(LIBS)"], + "libraries": [], "include_dirs": ["../"], "conditions": [ [ @@ -20,7 +20,22 @@ ], } }, - ] + ], + [ + "OS=='win'", + { + "libraries": [ + "../../../tensorflow/bazel-bin/native_client/libstt.so.if.lib", + "../../../tensorflow/bazel-bin/native_client/libkenlm.so.if.lib", + ], + }, + { + "libraries": [ + "../../../tensorflow/bazel-bin/native_client/libstt.so", + "../../../tensorflow/bazel-bin/native_client/libkenlm.so", + ], + }, + ], ], }, { diff --git a/native_client/kenlm/README.coqui b/native_client/kenlm/README.coqui index 1f0f327a..4f94f048 100644 --- a/native_client/kenlm/README.coqui +++ b/native_client/kenlm/README.coqui @@ -13,3 +13,84 @@ git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/do Cherry-pick fix for MSVC: curl -vsSL https://github.com/kpu/kenlm/commit/d70e28403f07e88b276c6bd9f162d2a428530f2e.patch | git am -p1 --directory=native_client/kenlm + +Most of the KenLM code is licensed under the LGPL. There are exceptions that +have their own licenses, listed below. See comments in those files for more +details. + +util/getopt.* is getopt for Windows +util/murmur_hash.cc +util/string_piece.hh and util/string_piece.cc +util/double-conversion/LICENSE covers util/double-conversion except the build files +util/file.cc contains a modified implementation of mkstemp under the LGPL +util/integer_to_string.* is BSD + +For the rest: + + KenLM is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation, either version 2.1 of the License, or + (at your option) any later version. + + KenLM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License 2.1 + along with KenLM code. If not, see . + + + +util/double-conversion: + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +util/integer_to_string.*: + +Copyright (C) 2014 Milo Yip + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/native_client/kenlm/lm/binary_format.hh b/native_client/kenlm/lm/binary_format.hh index ff99b957..b0815bad 100644 --- a/native_client/kenlm/lm/binary_format.hh +++ b/native_client/kenlm/lm/binary_format.hh @@ -23,7 +23,7 @@ extern const char *kModelNames[6]; * If so, return true and set recognized to the type. This is the only API in * this header designed for use by decoder authors. */ -bool RecognizeBinary(const char *file, ModelType &recognized); +KENLM_EXPORT bool RecognizeBinary(const char *file, ModelType &recognized); struct FixedWidthParameters { unsigned char order; diff --git a/native_client/kenlm/lm/config.hh b/native_client/kenlm/lm/config.hh index 21b9e7ee..05a4db93 100644 --- a/native_client/kenlm/lm/config.hh +++ b/native_client/kenlm/lm/config.hh @@ -10,13 +10,19 @@ /* Configuration for ngram model. Separate header to reduce pollution. */ +#if defined _MSC_VER + #define KENLM_EXPORT __declspec(dllexport) +#else + #define KENLM_EXPORT __attribute__ ((visibility("default"))) +#endif /* _MSC_VER */ + namespace lm { class EnumerateVocab; namespace ngram { -struct Config { +struct KENLM_EXPORT Config { // EFFECTIVE FOR BOTH ARPA AND BINARY READS // (default true) print progress bar to messages diff --git a/native_client/kenlm/lm/model.hh b/native_client/kenlm/lm/model.hh index 9b7206e8..060e5f2c 100644 --- a/native_client/kenlm/lm/model.hh +++ b/native_client/kenlm/lm/model.hh @@ -149,7 +149,7 @@ typedef ProbingModel Model; /* Autorecognize the file type, load, and return the virtual base class. Don't * use the virtual base class if you can avoid it. Instead, use the above * classes as template arguments to your own virtual feature function.*/ -base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING); +KENLM_EXPORT base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING); } // namespace ngram } // namespace lm diff --git a/native_client/kenlm/util/file.hh b/native_client/kenlm/util/file.hh index 4a50e730..22146190 100644 --- a/native_client/kenlm/util/file.hh +++ b/native_client/kenlm/util/file.hh @@ -10,9 +10,16 @@ #include #include +#if defined _MSC_VER + #define KENLM_EXPORT __declspec(dllexport) +#else + #define KENLM_EXPORT __attribute__ ((visibility("default"))) +#endif /* _MSC_VER */ + + namespace util { -class scoped_fd { +class KENLM_EXPORT scoped_fd { public: scoped_fd() : fd_(-1) {} @@ -82,7 +89,7 @@ class EndOfFileException : public Exception { class UnsupportedOSException : public Exception {}; // Open for read only. -int OpenReadOrThrow(const char *name); +KENLM_EXPORT int OpenReadOrThrow(const char *name); // Create file if it doesn't exist, truncate if it does. Opened for write. int CreateOrThrow(const char *name); @@ -110,7 +117,7 @@ bool OutputPathIsStdout(StringPiece path); // Return value for SizeFile when it can't size properly. const uint64_t kBadSize = (uint64_t)-1; -uint64_t SizeFile(int fd); +KENLM_EXPORT uint64_t SizeFile(int fd); uint64_t SizeOrThrow(int fd); void ResizeOrThrow(int fd, uint64_t to);