diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 0f843ef1259..5fd0f5d5d17 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4800,6 +4800,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:string_ops_op_lib", + "//third_party/icu/data:conversion_data", "@icu//:common", ], ) diff --git a/tensorflow/python/kernel_tests/unicode_transcode_op_test.py b/tensorflow/python/kernel_tests/unicode_transcode_op_test.py index 4406907a4cc..4ad5ee4103e 100644 --- a/tensorflow/python/kernel_tests/unicode_transcode_op_test.py +++ b/tensorflow/python/kernel_tests/unicode_transcode_op_test.py @@ -378,6 +378,60 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase): self.assertAllEqual([b"AbCdE", b"HiJkL"], transcoded) + def test_cjk_encodings(self): + strings_ja = [ + b"\x5c\x5c", # Yen sign + b"\x8f\x70", # kanji character "waza" + b"\x83\x4f" + ] # katakana character "gu" + strings_zh_cn = [b"\xca\xf5"] # simplified "shu4" + strings_zh_tw = [b"\xb3\x4e"] # traditional "shu4" + strings_ko = [b"\xc7\xd1\xb9\xce"] # hangul "hanmin" + + expected_ja = [s.decode("shift_jis").encode("UTF-8") for s in strings_ja] + expected_zh_cn = [ + s.decode("gb18030").encode("UTF-8") for s in strings_zh_cn + ] + expected_zh_tw = [s.decode("big5").encode("UTF-8") for s in strings_zh_tw] + expected_ko = [s.decode("euc_kr").encode("UTF-8") for s in strings_ko] + + with self.cached_session() as sess: + outputs_ja = string_ops.unicode_transcode( + strings_ja, + input_encoding="shift_jis", + output_encoding="UTF-8", + replacement_char=ord(" "), + replace_control_characters=False) + + outputs_zh_cn = string_ops.unicode_transcode( + strings_zh_cn, + input_encoding="gb18030", + output_encoding="UTF-8", + replacement_char=ord(" "), + replace_control_characters=False) + + outputs_zh_tw = string_ops.unicode_transcode( + strings_zh_tw, + input_encoding="big5", + output_encoding="UTF-8", + replacement_char=ord(" "), + replace_control_characters=False) + + outputs_ko = string_ops.unicode_transcode( + strings_ko, + input_encoding="euc_kr", + output_encoding="UTF-8", + replacement_char=ord(" "), + replace_control_characters=False) + + result_ja, result_zh_cn, result_zh_tw, result_ko = sess.run( + [outputs_ja, outputs_zh_cn, outputs_zh_tw, outputs_ko]) + + self.assertAllEqual(result_ja, expected_ja) + self.assertAllEqual(result_zh_cn, expected_zh_cn) + self.assertAllEqual(result_zh_tw, expected_zh_tw) + self.assertAllEqual(result_ko, expected_ko) + if __name__ == "__main__": test.main() diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 7aaa845ae92..1cac5ee1383 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -112,6 +112,7 @@ pkg_tar( genrule( name = "clicenses_generate", srcs = [ + "//third_party/icu/data:LICENSE", "//third_party/hadoop:LICENSE.txt", "//third_party/eigen3:LICENSE", "//third_party/fft2d:LICENSE", @@ -180,6 +181,7 @@ genrule( genrule( name = "jnilicenses_generate", srcs = [ + "//third_party/icu/data:LICENSE", "//third_party/hadoop:LICENSE.txt", "//third_party/eigen3:LICENSE", "//third_party/fft2d:LICENSE", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 3a863d3c523..fa372dcd74b 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -132,6 +132,7 @@ py_binary( filegroup( name = "licenses", data = [ + "//third_party/icu/data:LICENSE", "//third_party/eigen3:LICENSE", "//third_party/fft2d:LICENSE", "//third_party/hadoop:LICENSE.txt", diff --git a/third_party/icu/data/BUILD b/third_party/icu/data/BUILD new file mode 100644 index 00000000000..7db21566e4e --- /dev/null +++ b/third_party/icu/data/BUILD @@ -0,0 +1,46 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +# Data for core MIME/Unix/Windows encodings: +# ISO 8859-2..9, 15; Windows-125x; EUC-CN; GBK (Windows cp936); GB 18030; +# Big5 (Windows cp950); SJIS (Windows cp932); EUC-JP; EUC-KR, KS C 5601; +# Windows cp949. Data is pre-processed for little-endian platforms. To replicate +# this pre-processing (if you want additional encodings, for example), do the +# following: +# +# First, download, build, and install ICU. This installs tools such as makeconv. +# Then, run the following from your icu4c/source directory: +# $ cd data/mappings +# $ rm *.cnv # there shouldn't be any .cnv files here to begin with +# $ grep \.ucm ucmcore.mk | \ +# sed 's/\(UCM_SOURCE_CORE=\)\?\([^ ]\+\.ucm\)\\\?/\2/g' | \ +# tr '\n' ' ' | xargs makeconv +# $ ls *.cnv > filelist.lst +# $ pkgdata -m common -p ucmcore filelist.lst +# $ genccode -f custom_conversion_data ucmcore.dat +# This creates custom_conversion_data.c. You will need to change the target +# :conversion_data to depend on your custom source instead of :conversion_data.c +filegroup( + name = "conversion_files", + srcs = glob(["icu_conversion_data.c.gz.*"]), +) + +# Data files are compressed and split to work around git performance degradation +# around large files. +genrule( + name = "merge_conversion_data", + srcs = [":conversion_files"], + outs = ["conversion_data.c"], + cmd = "cat $(locations :conversion_files) | gunzip > $@", +) + +cc_library( + name = "conversion_data", + srcs = [":conversion_data.c"], + deps = ["@icu//:headers"], +) diff --git a/third_party/icu/data/LICENSE b/third_party/icu/data/LICENSE new file mode 100644 index 00000000000..25b6eb9d341 --- /dev/null +++ b/third_party/icu/data/LICENSE @@ -0,0 +1,414 @@ +COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) + +Copyright © 1991-2018 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in http://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +--------------------- + +Third-Party Software Licenses + +This section contains third-party software notices and/or additional +terms for licensed third-party software components included within ICU +libraries. + +1. ICU License - ICU 1.8.1 to ICU 57.1 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2016 International Business Machines Corporation and others +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY +SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, use +or other dealings in this Software without prior written authorization +of the copyright holder. + +All trademarks and registered trademarks mentioned herein are the +property of their respective owners. + +2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt) + + # The Google Chrome software developed by Google is licensed under + # the BSD license. Other software included in this distribution is + # provided under other licenses, as set forth below. + # + # The BSD License + # http://opensource.org/licenses/bsd-license.php + # Copyright (C) 2006-2008, Google Inc. + # + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # Redistributions of source code must retain the above copyright notice, + # this list of conditions and the following disclaimer. + # Redistributions in binary form must reproduce the above + # copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided with + # the distribution. + # Neither the name of Google Inc. nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # + # + # The word list in cjdict.txt are generated by combining three word lists + # listed below with further processing for compound word breaking. The + # frequency is generated with an iterative training against Google web + # corpora. + # + # * Libtabe (Chinese) + # - https://sourceforge.net/project/?group_id=1519 + # - Its license terms and conditions are shown below. + # + # * IPADIC (Japanese) + # - http://chasen.aist-nara.ac.jp/chasen/distribution.html + # - Its license terms and conditions are shown below. + # + # ---------COPYING.libtabe ---- BEGIN-------------------- + # + # /* + # * Copyright (c) 1999 TaBE Project. + # * Copyright (c) 1999 Pai-Hsiang Hsiao. + # * All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the TaBE Project nor the names of its + # * contributors may be used to endorse or promote products derived + # * from this software without specific prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # /* + # * Copyright (c) 1999 Computer Systems and Communication Lab, + # * Institute of Information Science, Academia + # * Sinica. All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the Computer Systems and Communication Lab + # * nor the names of its contributors may be used to endorse or + # * promote products derived from this software without specific + # * prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # Copyright 1996 Chih-Hao Tsai @ Beckman Institute, + # University of Illinois + # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4 + # + # ---------------COPYING.libtabe-----END-------------------------------- + # + # + # ---------------COPYING.ipadic-----BEGIN------------------------------- + # + # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science + # and Technology. All Rights Reserved. + # + # Use, reproduction, and distribution of this software is permitted. + # Any copy of this software, whether in its original form or modified, + # must include both the above copyright notice and the following + # paragraphs. + # + # Nara Institute of Science and Technology (NAIST), + # the copyright holders, disclaims all warranties with regard to this + # software, including all implied warranties of merchantability and + # fitness, in no event shall NAIST be liable for + # any special, indirect or consequential damages or any damages + # whatsoever resulting from loss of use, data or profits, whether in an + # action of contract, negligence or other tortuous action, arising out + # of or in connection with the use or performance of this software. + # + # A large portion of the dictionary entries + # originate from ICOT Free Software. The following conditions for ICOT + # Free Software applies to the current dictionary as well. + # + # Each User may also freely distribute the Program, whether in its + # original form or modified, to any third party or parties, PROVIDED + # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear + # on, or be attached to, the Program, which is distributed substantially + # in the same form as set out herein and that such intended + # distribution, if actually made, will neither violate or otherwise + # contravene any of the laws and regulations of the countries having + # jurisdiction over the User or the intended distribution itself. + # + # NO WARRANTY + # + # The program was produced on an experimental basis in the course of the + # research and development conducted during the project and is provided + # to users as so produced on an experimental basis. Accordingly, the + # program is provided without any warranty whatsoever, whether express, + # implied, statutory or otherwise. The term "warranty" used herein + # includes, but is not limited to, any warranty of the quality, + # performance, merchantability and fitness for a particular purpose of + # the program and the nonexistence of any infringement or violation of + # any right of any third party. + # + # Each user of the program will agree and understand, and be deemed to + # have agreed and understood, that there is no warranty whatsoever for + # the program and, accordingly, the entire risk arising from or + # otherwise connected with the program is assumed by the user. + # + # Therefore, neither ICOT, the copyright holder, or any other + # organization that participated in or was otherwise related to the + # development of the program and their respective officials, directors, + # officers and other employees shall be held liable for any and all + # damages, including, without limitation, general, special, incidental + # and consequential damages, arising out of or otherwise in connection + # with the use or inability to use the program or any product, material + # or result produced or otherwise obtained by using the program, + # regardless of whether they have been advised of, or otherwise had + # knowledge of, the possibility of such damages at any time during the + # project or thereafter. Each user will be deemed to have agreed to the + # foregoing by his or her commencement of use of the program. The term + # "use" as used herein includes, but is not limited to, the use, + # modification, copying and distribution of the program and the + # production of secondary products from the program. + # + # In the case where the program, whether in its original form or + # modified, was distributed or delivered to or received by a user from + # any person, organization or entity other than ICOT, unless it makes or + # grants independently of ICOT any specific warranty to the user in + # writing, such person, organization or entity, will also be exempted + # from and not be held liable to the user for any such damages as noted + # above as far as the program is concerned. + # + # ---------------COPYING.ipadic-----END---------------------------------- + +3. Lao Word Break Dictionary Data (laodict.txt) + + # Copyright (c) 2013 International Business Machines Corporation + # and others. All Rights Reserved. + # + # Project: http://code.google.com/p/lao-dictionary/ + # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt + # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt + # (copied below) + # + # This file is derived from the above dictionary, with slight + # modifications. + # ---------------------------------------------------------------------- + # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell. + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, + # are permitted provided that the following conditions are met: + # + # + # Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. Redistributions in + # binary form must reproduce the above copyright notice, this list of + # conditions and the following disclaimer in the documentation and/or + # other materials provided with the distribution. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # OF THE POSSIBILITY OF SUCH DAMAGE. + # -------------------------------------------------------------------------- + +4. Burmese Word Break Dictionary Data (burmesedict.txt) + + # Copyright (c) 2014 International Business Machines Corporation + # and others. All Rights Reserved. + # + # This list is part of a project hosted at: + # github.com/kanyawtech/myanmar-karen-word-lists + # + # -------------------------------------------------------------------------- + # Copyright (c) 2013, LeRoy Benjamin Sharon + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: Redistributions of source code must retain the above + # copyright notice, this list of conditions and the following + # disclaimer. Redistributions in binary form must reproduce the + # above copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided + # with the distribution. + # + # Neither the name Myanmar Karen Word Lists, nor the names of its + # contributors may be used to endorse or promote products derived + # from this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + # -------------------------------------------------------------------------- + +5. Time Zone Database + + ICU uses the public domain data and code derived from Time Zone +Database for its time zone support. The ownership of the TZ database +is explained in BCP 175: Procedure for Maintaining the Time Zone +Database section 7. + + # 7. Database Ownership + # + # The TZ database itself is not an IETF Contribution or an IETF + # document. Rather it is a pre-existing and regularly updated work + # that is in the public domain, and is intended to remain in the + # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do + # not apply to the TZ Database or contributions that individuals make + # to it. Should any claims be made and substantiated against the TZ + # Database, the organization that is providing the IANA + # Considerations defined in this RFC, under the memorandum of + # understanding with the IETF, currently ICANN, may act in accordance + # with all competent court orders. No ownership claims will be made + # by ICANN or the IETF Trust on the database or the code. Any person + # making a contribution to the database or code waives all rights to + # future claims in that contribution or in the TZ Database. + +6. Google double-conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/icu/data/icu_conversion_data.c.gz.aa b/third_party/icu/data/icu_conversion_data.c.gz.aa new file mode 100644 index 00000000000..b68a2c6516f Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.aa differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ab b/third_party/icu/data/icu_conversion_data.c.gz.ab new file mode 100644 index 00000000000..d60aa92d675 Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ab differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ac b/third_party/icu/data/icu_conversion_data.c.gz.ac new file mode 100644 index 00000000000..de9b69ff947 Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ac differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ad b/third_party/icu/data/icu_conversion_data.c.gz.ad new file mode 100644 index 00000000000..d5abb06b8ca Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ad differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ae b/third_party/icu/data/icu_conversion_data.c.gz.ae new file mode 100644 index 00000000000..0e54fdb9eaf Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ae differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.af b/third_party/icu/data/icu_conversion_data.c.gz.af new file mode 100644 index 00000000000..cfbeb165ad3 Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.af differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ag b/third_party/icu/data/icu_conversion_data.c.gz.ag new file mode 100644 index 00000000000..bde20b6da62 Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ag differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ah b/third_party/icu/data/icu_conversion_data.c.gz.ah new file mode 100644 index 00000000000..ae31dffbe2a Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ah differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.ai b/third_party/icu/data/icu_conversion_data.c.gz.ai new file mode 100644 index 00000000000..981b869561a Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.ai differ diff --git a/third_party/icu/data/icu_conversion_data.c.gz.aj b/third_party/icu/data/icu_conversion_data.c.gz.aj new file mode 100644 index 00000000000..1ae6bce382a Binary files /dev/null and b/third_party/icu/data/icu_conversion_data.c.gz.aj differ diff --git a/third_party/icu/udata.patch b/third_party/icu/udata.patch new file mode 100644 index 00000000000..d6d59100e48 --- /dev/null +++ b/third_party/icu/udata.patch @@ -0,0 +1,53 @@ +--- /icu4c/source/common/udata.cpp.old 2018-06-19 22:34:56.000000000 -0700 ++++ /icu4c/source/common/udata.cpp 2018-10-19 14:26:09.778950855 -0700 +@@ -18,15 +18,15 @@ + + #include "unicode/utypes.h" /* U_PLATFORM etc. */ + +-#ifdef __GNUC__ +-/* if gcc +-#define ATTRIBUTE_WEAK __attribute__ ((weak)) +-might have to #include some other header +-*/ ++#if defined(__GNUC__) || defined(__SUNPRO_CC) ++# define ATTRIBUTE_WEAK __attribute__ ((weak)) ++#else ++# define ATTRIBUTE_WEAK + #endif + + #include "unicode/putil.h" + #include "unicode/udata.h" ++#include "unicode/umachine.h" + #include "unicode/uversion.h" + #include "charstr.h" + #include "cmemory.h" +@@ -641,10 +641,11 @@ + * partial-data-library access functions where each returns a pointer + * to its data package, if it is linked in. + */ +-/* +-extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK; +-extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK; +-*/ ++ ++//extern "C" const void *uprv_getICUData_collation(void); ++U_CDECL_BEGIN ++const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK; ++U_CDECL_END + + /*----------------------------------------------------------------------* + * * +@@ -702,10 +703,11 @@ + if (uprv_getICUData_collation) { + setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode); + } ++ */ + if (uprv_getICUData_conversion) { +- setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); ++ setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); + } +- */ ++ + #if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time + setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); + { diff --git a/third_party/icu/workspace.bzl b/third_party/icu/workspace.bzl index a4f653e0261..f100836b410 100644 --- a/third_party/icu/workspace.bzl +++ b/third_party/icu/workspace.bzl @@ -2,6 +2,11 @@ load("//third_party:repo.bzl", "third_party_http_archive") +# Sanitize a dependency so that it works correctly from code that includes +# TensorFlow as a submodule. +def clean_dep(dep): + return str(Label(dep)) + def repo(): third_party_http_archive( name = "icu", @@ -13,4 +18,5 @@ def repo(): ], build_file = "//third_party/icu:BUILD.bazel", system_build_file = "//third_party/icu:BUILD.system", + patch_file = clean_dep("//third_party/icu:udata.patch"), )