From d5c5df164cedcd8ae43fff41256592818bc6c2de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 25 Sep 2018 11:56:33 -0700 Subject: [PATCH] Add "encoding" attribute to string length op, which controls how "string length" is defined: * BYTE: The number of bytes in each string. (Default) * UTF8: The number of UTF-8 encoded Unicode code points in each string. RELNOTES: Add option to calculate string length in Unicode characters PiperOrigin-RevId: 214478470 --- tensorflow/contrib/makefile/tf_op_files.txt | 1 + .../base_api/api_def_StringLength.pbtxt | 10 +++ .../python_api/api_def_StringLength.pbtxt | 4 +- tensorflow/core/kernels/BUILD | 10 +++ tensorflow/core/kernels/string_length_op.cc | 23 ++++++- tensorflow/core/kernels/string_util.cc | 63 +++++++++++++++++++ tensorflow/core/kernels/string_util.h | 45 +++++++++++++ tensorflow/core/ops/string_ops.cc | 1 + .../kernel_tests/string_length_op_test.py | 27 ++++++++ tensorflow/python/ops/string_ops.py | 13 ++++ .../api/golden/v1/tensorflow.strings.pbtxt | 2 +- .../api/golden/v2/tensorflow.strings.pbtxt | 2 +- 12 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 tensorflow/core/kernels/string_util.cc create mode 100644 tensorflow/core/kernels/string_util.h diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 08de54b8e18..f81a90809ae 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -253,6 +253,7 @@ tensorflow/core/kernels/strided_slice_op_inst_5.cc tensorflow/core/kernels/strided_slice_op_inst_6.cc tensorflow/core/kernels/strided_slice_op_inst_7.cc tensorflow/core/kernels/string_join_op.cc +tensorflow/core/kernels/string_util.cc tensorflow/core/kernels/tensor_array.cc tensorflow/core/kernels/tensor_array_ops.cc tensorflow/core/kernels/tile_functor_cpu.cc diff --git a/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt index cc21ddc8157..7d2fbcd00bd 100644 --- a/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt @@ -1,5 +1,15 @@ op { graph_op_name: "StringLength" + attr { + name: "unit" + description: <