Add encoding data to support core MIME/Unix/Windows encodings, including core
ISO 8859 sets (Latin 1-5, Cyrillic, Arabic, Greek, Hebrew), various EUCs, and common CJK encodings. Data is hard-coded in order to achieve a hermetic runtime, and to simplify the build process. Data is in little-endian byte order. Trying to decode/encode CJK or other code-mapped encodings on a big-endian platform will result in a runtime InvalidArgumentError ("Could not create converter for..."). PiperOrigin-RevId: 221478869
This commit is contained in:
parent
67593ee405
commit
6d11cff780
@ -4800,6 +4800,7 @@ tf_kernel_library(
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:string_ops_op_lib",
|
||||
"//third_party/icu/data:conversion_data",
|
||||
"@icu//:common",
|
||||
],
|
||||
)
|
||||
|
@ -378,6 +378,60 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
self.assertAllEqual([b"AbCdE", b"HiJkL"], transcoded)
|
||||
|
||||
def test_cjk_encodings(self):
|
||||
strings_ja = [
|
||||
b"\x5c\x5c", # Yen sign
|
||||
b"\x8f\x70", # kanji character "waza"
|
||||
b"\x83\x4f"
|
||||
] # katakana character "gu"
|
||||
strings_zh_cn = [b"\xca\xf5"] # simplified "shu4"
|
||||
strings_zh_tw = [b"\xb3\x4e"] # traditional "shu4"
|
||||
strings_ko = [b"\xc7\xd1\xb9\xce"] # hangul "hanmin"
|
||||
|
||||
expected_ja = [s.decode("shift_jis").encode("UTF-8") for s in strings_ja]
|
||||
expected_zh_cn = [
|
||||
s.decode("gb18030").encode("UTF-8") for s in strings_zh_cn
|
||||
]
|
||||
expected_zh_tw = [s.decode("big5").encode("UTF-8") for s in strings_zh_tw]
|
||||
expected_ko = [s.decode("euc_kr").encode("UTF-8") for s in strings_ko]
|
||||
|
||||
with self.cached_session() as sess:
|
||||
outputs_ja = string_ops.unicode_transcode(
|
||||
strings_ja,
|
||||
input_encoding="shift_jis",
|
||||
output_encoding="UTF-8",
|
||||
replacement_char=ord(" "),
|
||||
replace_control_characters=False)
|
||||
|
||||
outputs_zh_cn = string_ops.unicode_transcode(
|
||||
strings_zh_cn,
|
||||
input_encoding="gb18030",
|
||||
output_encoding="UTF-8",
|
||||
replacement_char=ord(" "),
|
||||
replace_control_characters=False)
|
||||
|
||||
outputs_zh_tw = string_ops.unicode_transcode(
|
||||
strings_zh_tw,
|
||||
input_encoding="big5",
|
||||
output_encoding="UTF-8",
|
||||
replacement_char=ord(" "),
|
||||
replace_control_characters=False)
|
||||
|
||||
outputs_ko = string_ops.unicode_transcode(
|
||||
strings_ko,
|
||||
input_encoding="euc_kr",
|
||||
output_encoding="UTF-8",
|
||||
replacement_char=ord(" "),
|
||||
replace_control_characters=False)
|
||||
|
||||
result_ja, result_zh_cn, result_zh_tw, result_ko = sess.run(
|
||||
[outputs_ja, outputs_zh_cn, outputs_zh_tw, outputs_ko])
|
||||
|
||||
self.assertAllEqual(result_ja, expected_ja)
|
||||
self.assertAllEqual(result_zh_cn, expected_zh_cn)
|
||||
self.assertAllEqual(result_zh_tw, expected_zh_tw)
|
||||
self.assertAllEqual(result_ko, expected_ko)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -112,6 +112,7 @@ pkg_tar(
|
||||
genrule(
|
||||
name = "clicenses_generate",
|
||||
srcs = [
|
||||
"//third_party/icu/data:LICENSE",
|
||||
"//third_party/hadoop:LICENSE.txt",
|
||||
"//third_party/eigen3:LICENSE",
|
||||
"//third_party/fft2d:LICENSE",
|
||||
@ -180,6 +181,7 @@ genrule(
|
||||
genrule(
|
||||
name = "jnilicenses_generate",
|
||||
srcs = [
|
||||
"//third_party/icu/data:LICENSE",
|
||||
"//third_party/hadoop:LICENSE.txt",
|
||||
"//third_party/eigen3:LICENSE",
|
||||
"//third_party/fft2d:LICENSE",
|
||||
|
@ -132,6 +132,7 @@ py_binary(
|
||||
filegroup(
|
||||
name = "licenses",
|
||||
data = [
|
||||
"//third_party/icu/data:LICENSE",
|
||||
"//third_party/eigen3:LICENSE",
|
||||
"//third_party/fft2d:LICENSE",
|
||||
"//third_party/hadoop:LICENSE.txt",
|
||||
|
46
third_party/icu/data/BUILD
vendored
Normal file
46
third_party/icu/data/BUILD
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
# Data for core MIME/Unix/Windows encodings:
|
||||
# ISO 8859-2..9, 15; Windows-125x; EUC-CN; GBK (Windows cp936); GB 18030;
|
||||
# Big5 (Windows cp950); SJIS (Windows cp932); EUC-JP; EUC-KR, KS C 5601;
|
||||
# Windows cp949. Data is pre-processed for little-endian platforms. To replicate
|
||||
# this pre-processing (if you want additional encodings, for example), do the
|
||||
# following:
|
||||
#
|
||||
# First, download, build, and install ICU. This installs tools such as makeconv.
|
||||
# Then, run the following from your icu4c/source directory:
|
||||
# $ cd data/mappings
|
||||
# $ rm *.cnv # there shouldn't be any .cnv files here to begin with
|
||||
# $ grep \.ucm ucmcore.mk | \
|
||||
# sed 's/\(UCM_SOURCE_CORE=\)\?\([^ ]\+\.ucm\)\\\?/\2/g' | \
|
||||
# tr '\n' ' ' | xargs makeconv
|
||||
# $ ls *.cnv > filelist.lst
|
||||
# $ pkgdata -m common -p ucmcore filelist.lst
|
||||
# $ genccode -f custom_conversion_data ucmcore.dat
|
||||
# This creates custom_conversion_data.c. You will need to change the target
|
||||
# :conversion_data to depend on your custom source instead of :conversion_data.c
|
||||
filegroup(
|
||||
name = "conversion_files",
|
||||
srcs = glob(["icu_conversion_data.c.gz.*"]),
|
||||
)
|
||||
|
||||
# Data files are compressed and split to work around git performance degradation
|
||||
# around large files.
|
||||
genrule(
|
||||
name = "merge_conversion_data",
|
||||
srcs = [":conversion_files"],
|
||||
outs = ["conversion_data.c"],
|
||||
cmd = "cat $(locations :conversion_files) | gunzip > $@",
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "conversion_data",
|
||||
srcs = [":conversion_data.c"],
|
||||
deps = ["@icu//:headers"],
|
||||
)
|
414
third_party/icu/data/LICENSE
vendored
Normal file
414
third_party/icu/data/LICENSE
vendored
Normal file
@ -0,0 +1,414 @@
|
||||
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
|
||||
|
||||
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
---------------------
|
||||
|
||||
Third-Party Software Licenses
|
||||
|
||||
This section contains third-party software notices and/or additional
|
||||
terms for licensed third-party software components included within ICU
|
||||
libraries.
|
||||
|
||||
1. ICU License - ICU 1.8.1 to ICU 57.1
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright (c) 1995-2016 International Business Machines Corporation and others
|
||||
All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, and/or sell copies of the Software, and to permit persons
|
||||
to whom the Software is furnished to do so, provided that the above
|
||||
copyright notice(s) and this permission notice appear in all copies of
|
||||
the Software and that both the above copyright notice(s) and this
|
||||
permission notice appear in supporting documentation.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
|
||||
SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
|
||||
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
|
||||
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale, use
|
||||
or other dealings in this Software without prior written authorization
|
||||
of the copyright holder.
|
||||
|
||||
All trademarks and registered trademarks mentioned herein are the
|
||||
property of their respective owners.
|
||||
|
||||
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
||||
|
||||
# The Google Chrome software developed by Google is licensed under
|
||||
# the BSD license. Other software included in this distribution is
|
||||
# provided under other licenses, as set forth below.
|
||||
#
|
||||
# The BSD License
|
||||
# http://opensource.org/licenses/bsd-license.php
|
||||
# Copyright (C) 2006-2008, Google Inc.
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided with
|
||||
# the distribution.
|
||||
# Neither the name of Google Inc. nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
# The word list in cjdict.txt are generated by combining three word lists
|
||||
# listed below with further processing for compound word breaking. The
|
||||
# frequency is generated with an iterative training against Google web
|
||||
# corpora.
|
||||
#
|
||||
# * Libtabe (Chinese)
|
||||
# - https://sourceforge.net/project/?group_id=1519
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# * IPADIC (Japanese)
|
||||
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# ---------COPYING.libtabe ---- BEGIN--------------------
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 TaBE Project.
|
||||
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
|
||||
# * All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the TaBE Project nor the names of its
|
||||
# * contributors may be used to endorse or promote products derived
|
||||
# * from this software without specific prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 Computer Systems and Communication Lab,
|
||||
# * Institute of Information Science, Academia
|
||||
# * Sinica. All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the Computer Systems and Communication Lab
|
||||
# * nor the names of its contributors may be used to endorse or
|
||||
# * promote products derived from this software without specific
|
||||
# * prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
|
||||
# University of Illinois
|
||||
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
|
||||
#
|
||||
# ---------------COPYING.libtabe-----END--------------------------------
|
||||
#
|
||||
#
|
||||
# ---------------COPYING.ipadic-----BEGIN-------------------------------
|
||||
#
|
||||
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
|
||||
# and Technology. All Rights Reserved.
|
||||
#
|
||||
# Use, reproduction, and distribution of this software is permitted.
|
||||
# Any copy of this software, whether in its original form or modified,
|
||||
# must include both the above copyright notice and the following
|
||||
# paragraphs.
|
||||
#
|
||||
# Nara Institute of Science and Technology (NAIST),
|
||||
# the copyright holders, disclaims all warranties with regard to this
|
||||
# software, including all implied warranties of merchantability and
|
||||
# fitness, in no event shall NAIST be liable for
|
||||
# any special, indirect or consequential damages or any damages
|
||||
# whatsoever resulting from loss of use, data or profits, whether in an
|
||||
# action of contract, negligence or other tortuous action, arising out
|
||||
# of or in connection with the use or performance of this software.
|
||||
#
|
||||
# A large portion of the dictionary entries
|
||||
# originate from ICOT Free Software. The following conditions for ICOT
|
||||
# Free Software applies to the current dictionary as well.
|
||||
#
|
||||
# Each User may also freely distribute the Program, whether in its
|
||||
# original form or modified, to any third party or parties, PROVIDED
|
||||
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
||||
# on, or be attached to, the Program, which is distributed substantially
|
||||
# in the same form as set out herein and that such intended
|
||||
# distribution, if actually made, will neither violate or otherwise
|
||||
# contravene any of the laws and regulations of the countries having
|
||||
# jurisdiction over the User or the intended distribution itself.
|
||||
#
|
||||
# NO WARRANTY
|
||||
#
|
||||
# The program was produced on an experimental basis in the course of the
|
||||
# research and development conducted during the project and is provided
|
||||
# to users as so produced on an experimental basis. Accordingly, the
|
||||
# program is provided without any warranty whatsoever, whether express,
|
||||
# implied, statutory or otherwise. The term "warranty" used herein
|
||||
# includes, but is not limited to, any warranty of the quality,
|
||||
# performance, merchantability and fitness for a particular purpose of
|
||||
# the program and the nonexistence of any infringement or violation of
|
||||
# any right of any third party.
|
||||
#
|
||||
# Each user of the program will agree and understand, and be deemed to
|
||||
# have agreed and understood, that there is no warranty whatsoever for
|
||||
# the program and, accordingly, the entire risk arising from or
|
||||
# otherwise connected with the program is assumed by the user.
|
||||
#
|
||||
# Therefore, neither ICOT, the copyright holder, or any other
|
||||
# organization that participated in or was otherwise related to the
|
||||
# development of the program and their respective officials, directors,
|
||||
# officers and other employees shall be held liable for any and all
|
||||
# damages, including, without limitation, general, special, incidental
|
||||
# and consequential damages, arising out of or otherwise in connection
|
||||
# with the use or inability to use the program or any product, material
|
||||
# or result produced or otherwise obtained by using the program,
|
||||
# regardless of whether they have been advised of, or otherwise had
|
||||
# knowledge of, the possibility of such damages at any time during the
|
||||
# project or thereafter. Each user will be deemed to have agreed to the
|
||||
# foregoing by his or her commencement of use of the program. The term
|
||||
# "use" as used herein includes, but is not limited to, the use,
|
||||
# modification, copying and distribution of the program and the
|
||||
# production of secondary products from the program.
|
||||
#
|
||||
# In the case where the program, whether in its original form or
|
||||
# modified, was distributed or delivered to or received by a user from
|
||||
# any person, organization or entity other than ICOT, unless it makes or
|
||||
# grants independently of ICOT any specific warranty to the user in
|
||||
# writing, such person, organization or entity, will also be exempted
|
||||
# from and not be held liable to the user for any such damages as noted
|
||||
# above as far as the program is concerned.
|
||||
#
|
||||
# ---------------COPYING.ipadic-----END----------------------------------
|
||||
|
||||
3. Lao Word Break Dictionary Data (laodict.txt)
|
||||
|
||||
# Copyright (c) 2013 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# Project: http://code.google.com/p/lao-dictionary/
|
||||
# Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
|
||||
# License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
|
||||
# (copied below)
|
||||
#
|
||||
# This file is derived from the above dictionary, with slight
|
||||
# modifications.
|
||||
# ----------------------------------------------------------------------
|
||||
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer. Redistributions in
|
||||
# binary form must reproduce the above copyright notice, this list of
|
||||
# conditions and the following disclaimer in the documentation and/or
|
||||
# other materials provided with the distribution.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
4. Burmese Word Break Dictionary Data (burmesedict.txt)
|
||||
|
||||
# Copyright (c) 2014 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# This list is part of a project hosted at:
|
||||
# github.com/kanyawtech/myanmar-karen-word-lists
|
||||
#
|
||||
# --------------------------------------------------------------------------
|
||||
# Copyright (c) 2013, LeRoy Benjamin Sharon
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met: Redistributions of source code must retain the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer. Redistributions in binary form must reproduce the
|
||||
# above copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
#
|
||||
# Neither the name Myanmar Karen Word Lists, nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
5. Time Zone Database
|
||||
|
||||
ICU uses the public domain data and code derived from Time Zone
|
||||
Database for its time zone support. The ownership of the TZ database
|
||||
is explained in BCP 175: Procedure for Maintaining the Time Zone
|
||||
Database section 7.
|
||||
|
||||
# 7. Database Ownership
|
||||
#
|
||||
# The TZ database itself is not an IETF Contribution or an IETF
|
||||
# document. Rather it is a pre-existing and regularly updated work
|
||||
# that is in the public domain, and is intended to remain in the
|
||||
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
|
||||
# not apply to the TZ Database or contributions that individuals make
|
||||
# to it. Should any claims be made and substantiated against the TZ
|
||||
# Database, the organization that is providing the IANA
|
||||
# Considerations defined in this RFC, under the memorandum of
|
||||
# understanding with the IETF, currently ICANN, may act in accordance
|
||||
# with all competent court orders. No ownership claims will be made
|
||||
# by ICANN or the IETF Trust on the database or the code. Any person
|
||||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
6. Google double-conversion
|
||||
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
BIN
third_party/icu/data/icu_conversion_data.c.gz.aa
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.aa
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ab
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ab
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ac
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ac
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ad
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ad
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ae
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ae
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.af
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.af
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ag
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ag
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ah
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ah
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.ai
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.ai
vendored
Normal file
Binary file not shown.
BIN
third_party/icu/data/icu_conversion_data.c.gz.aj
vendored
Normal file
BIN
third_party/icu/data/icu_conversion_data.c.gz.aj
vendored
Normal file
Binary file not shown.
53
third_party/icu/udata.patch
vendored
Normal file
53
third_party/icu/udata.patch
vendored
Normal file
@ -0,0 +1,53 @@
|
||||
--- /icu4c/source/common/udata.cpp.old 2018-06-19 22:34:56.000000000 -0700
|
||||
+++ /icu4c/source/common/udata.cpp 2018-10-19 14:26:09.778950855 -0700
|
||||
@@ -18,15 +18,15 @@
|
||||
|
||||
#include "unicode/utypes.h" /* U_PLATFORM etc. */
|
||||
|
||||
-#ifdef __GNUC__
|
||||
-/* if gcc
|
||||
-#define ATTRIBUTE_WEAK __attribute__ ((weak))
|
||||
-might have to #include some other header
|
||||
-*/
|
||||
+#if defined(__GNUC__) || defined(__SUNPRO_CC)
|
||||
+# define ATTRIBUTE_WEAK __attribute__ ((weak))
|
||||
+#else
|
||||
+# define ATTRIBUTE_WEAK
|
||||
#endif
|
||||
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/udata.h"
|
||||
+#include "unicode/umachine.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
@@ -641,10 +641,11 @@
|
||||
* partial-data-library access functions where each returns a pointer
|
||||
* to its data package, if it is linked in.
|
||||
*/
|
||||
-/*
|
||||
-extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
|
||||
-extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
|
||||
-*/
|
||||
+
|
||||
+//extern "C" const void *uprv_getICUData_collation(void);
|
||||
+U_CDECL_BEGIN
|
||||
+const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
|
||||
+U_CDECL_END
|
||||
|
||||
/*----------------------------------------------------------------------*
|
||||
* *
|
||||
@@ -702,10 +703,11 @@
|
||||
if (uprv_getICUData_collation) {
|
||||
setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
|
||||
}
|
||||
+ */
|
||||
if (uprv_getICUData_conversion) {
|
||||
- setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
|
||||
+ setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
|
||||
}
|
||||
- */
|
||||
+
|
||||
#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time
|
||||
setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
|
||||
{
|
6
third_party/icu/workspace.bzl
vendored
6
third_party/icu/workspace.bzl
vendored
@ -2,6 +2,11 @@
|
||||
|
||||
load("//third_party:repo.bzl", "third_party_http_archive")
|
||||
|
||||
# Sanitize a dependency so that it works correctly from code that includes
|
||||
# TensorFlow as a submodule.
|
||||
def clean_dep(dep):
|
||||
return str(Label(dep))
|
||||
|
||||
def repo():
|
||||
third_party_http_archive(
|
||||
name = "icu",
|
||||
@ -13,4 +18,5 @@ def repo():
|
||||
],
|
||||
build_file = "//third_party/icu:BUILD.bazel",
|
||||
system_build_file = "//third_party/icu:BUILD.system",
|
||||
patch_file = clean_dep("//third_party/icu:udata.patch"),
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user