From b7428d114ea6b970f8d49331d08a950c92efa0dc Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 21 Jul 2021 10:40:29 +0200 Subject: [PATCH] Dynamically link KenLM and distribute with packages --- ci_scripts/package-utils.sh | 2 + native_client/BUILD | 21 +++++++- native_client/definitions.mk | 2 +- native_client/javascript/Makefile | 2 +- native_client/kenlm/README.coqui | 81 +++++++++++++++++++++++++++++++ 5 files changed, 104 insertions(+), 4 deletions(-) diff --git a/ci_scripts/package-utils.sh b/ci_scripts/package-utils.sh index 3330cf4e..eb3f5e55 100755 --- a/ci_scripts/package-utils.sh +++ b/ci_scripts/package-utils.sh @@ -27,7 +27,9 @@ package_native_client() fi; ${TAR} --verbose -cf - \ + --transform='flags=r;s|README.coqui|KenLM_License_Info.txt|' \ -C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so \ + -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so \ ${win_lib} \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${stt_dir}/ LICENSE \ diff --git a/native_client/BUILD b/native_client/BUILD index 2600ecd5..cd6881dd 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -60,10 +60,12 @@ LINUX_LINKOPTS = [ "-Wl,-export-dynamic", ] -cc_library( - name = "kenlm", +tf_cc_shared_object( + name = "libkenlm.so", srcs = glob([ + "kenlm/lm/*.hh", "kenlm/lm/*.cc", + "kenlm/util/*.hh", "kenlm/util/*.cc", "kenlm/util/double-conversion/*.cc", "kenlm/util/double-conversion/*.h", @@ -72,6 +74,20 @@ cc_library( "kenlm/*/*test.cc", "kenlm/*/*main.cc", ],), + copts = [ + "-std=c++11" + ] + select({ + "//tensorflow:windows": [], + "//conditions:default": ["-fvisibility=default"], + }), + defines = ["KENLM_MAX_ORDER=6"], + includes = ["kenlm"], + framework_so = [], + linkopts = [], +) + +cc_library( + name="kenlm", hdrs = glob([ "kenlm/lm/*.hh", "kenlm/util/*.hh", @@ -79,6 +95,7 @@ cc_library( copts = ["-std=c++11"], defines = ["KENLM_MAX_ORDER=6"], includes = ["kenlm"], + srcs = ["libkenlm.so"], ) cc_library( diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 8efa65f2..d7710f4d 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -20,7 +20,7 @@ endif STT_BIN := stt$(PLATFORM_EXE_SUFFIX) CFLAGS_STT := -std=c++11 -o $(STT_BIN) -LINK_STT := -lstt +LINK_STT := -lstt -lkenlm LINK_PATH_STT := -L${TFDIR}/bazel-bin/native_client ifeq ($(TARGET),host) diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index d71e12a0..c4d1dd82 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -50,7 +50,7 @@ configure: stt_wrap.cxx package.json npm-dev PATH="$(NODE_MODULES_BIN):${PATH}" $(NODE_BUILD_TOOL) configure $(NODE_BUILD_VERBOSE) build: configure stt_wrap.cxx - PATH="$(NODE_MODULES_BIN):${PATH}" NODE_PRE_GYP_ABI_CROSSWALK=$(NODE_PRE_GYP_ABI_CROSSWALK_FILE) AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS=$(LIBS) $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DEVDIR) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE) + PATH="$(NODE_MODULES_BIN):${PATH}" NODE_PRE_GYP_ABI_CROSSWALK=$(NODE_PRE_GYP_ABI_CROSSWALK_FILE) AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(RPATH_NODEJS) $(LDFLAGS)" LIBS="$(LIBS)" $(NODE_BUILD_TOOL) $(NODE_PLATFORM_TARGET) $(NODE_RUNTIME) $(NODE_ABI_TARGET) $(NODE_DEVDIR) $(NODE_DIST_URL) --no-color rebuild $(NODE_BUILD_VERBOSE) copy-deps: build $(call copy_missing_libs,lib/binding/*/*/*/stt.node,lib/binding/*/*/) diff --git a/native_client/kenlm/README.coqui b/native_client/kenlm/README.coqui index 1f0f327a..4f94f048 100644 --- a/native_client/kenlm/README.coqui +++ b/native_client/kenlm/README.coqui @@ -13,3 +13,84 @@ git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/do Cherry-pick fix for MSVC: curl -vsSL https://github.com/kpu/kenlm/commit/d70e28403f07e88b276c6bd9f162d2a428530f2e.patch | git am -p1 --directory=native_client/kenlm + +Most of the KenLM code is licensed under the LGPL. There are exceptions that +have their own licenses, listed below. See comments in those files for more +details. + +util/getopt.* is getopt for Windows +util/murmur_hash.cc +util/string_piece.hh and util/string_piece.cc +util/double-conversion/LICENSE covers util/double-conversion except the build files +util/file.cc contains a modified implementation of mkstemp under the LGPL +util/integer_to_string.* is BSD + +For the rest: + + KenLM is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation, either version 2.1 of the License, or + (at your option) any later version. + + KenLM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License 2.1 + along with KenLM code. If not, see . + + + +util/double-conversion: + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +util/integer_to_string.*: + +Copyright (C) 2014 Milo Yip + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE.