diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index aabbfe86a63..d60b711749e 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -716,8 +716,13 @@ cc_library( cc_library( name = "tstring", - hdrs = ["tstring.h"], + hdrs = [ + "ctstring.h", + "ctstring_internal.h", + "tstring.h", + ], deps = [ + ":cord", "@com_google_absl//absl/strings", ], ) @@ -894,6 +899,29 @@ tf_cc_test( ], ) +tf_cc_test( + name = "ctstring_test", + size = "small", + srcs = ["ctstring_test.cc"], + deps = [ + ":tstring", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "tstring_test", + size = "small", + srcs = ["tstring_test.cc"], + deps = [ + ":cord", + ":tstring", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + tf_cc_test( name = "platform_strings_test", size = "small", @@ -1076,6 +1104,8 @@ filegroup( srcs = [ "byte_order.h", "cord.h", + "ctstring.h", + "ctstring_internal.h", "env_time.h", "logging.h", "macros.h", @@ -1138,6 +1168,8 @@ filegroup( filegroup( name = "lib_proto_parsing_hdrs", srcs = [ + "ctstring.h", + "ctstring_internal.h", "init_main.h", "logging.h", "macros.h", @@ -1172,6 +1204,8 @@ filegroup( srcs = [ "byte_order.h", "cpu_info.h", + "ctstring.h", + "ctstring_internal.h", "dynamic_annotations.h", "macros.h", "mutex.h", @@ -1234,6 +1268,8 @@ filegroup( filegroup( name = "tflite_portable_logging_hdrs", srcs = [ + "ctstring.h", + "ctstring_internal.h", "logging.h", "macros.h", "platform.h", @@ -1246,6 +1282,8 @@ filegroup( filegroup( name = "jpeg_internal_hdrs", srcs = [ + "ctstring.h", + "ctstring_internal.h", "dynamic_annotations.h", "logging.h", "macros.h", @@ -1261,6 +1299,8 @@ filegroup( filegroup( name = "gif_internal_hdrs", srcs = [ + "ctstring.h", + "ctstring_internal.h", "dynamic_annotations.h", "logging.h", "macros.h", @@ -1286,6 +1326,8 @@ filegroup( "cord.h", "cpu_info.cc", "cpu_info.h", + "ctstring.h", + "ctstring_internal.h", "demangle.h", "denormal.cc", "denormal.h", diff --git a/tensorflow/core/platform/ctstring.h b/tensorflow/core/platform/ctstring.h new file mode 100644 index 00000000000..3209a8e7303 --- /dev/null +++ b/tensorflow/core/platform/ctstring.h @@ -0,0 +1,120 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_CTSTRING_H_ +#define TENSORFLOW_CORE_PLATFORM_CTSTRING_H_ + +#include <stdint.h> +#include <stdlib.h> + +#include "tensorflow/core/platform/ctstring_internal.h" + +// Initialize a new tstring. This must be called before using any function +// below. +inline void TF_TString_Init(TF_TString *str); +// Deallocate a tstring. +inline void TF_TString_Dealloc(TF_TString *str); + +// Resizes `str' to `new_size'. This function will appropriately grow or shrink +// the string buffer to fit a `new_size' string. Grown regions of the string +// will be initialized with `c'. +inline char *TF_TString_Resize(TF_TString *str, size_t new_size, char c); +// Similar to TF_TString_Resize, except the newly allocated regions will remain +// uninitialized. This is useful if you plan on overwriting the newly grown +// regions immediately after allocation; doing so will elide a superfluous +// initialization of the new buffer. +inline char *TF_TString_ResizeUninitialized(TF_TString *str, size_t new_size); +// Reserves a string buffer with a capacity of at least `new_cap'. +// ResizeUninitialized will not change the size, or the contents of the existing +// string. This is useful if you have a rough idea of `str's upperbound in +// size, and want to avoid allocations as you append to `str'. It should not be +// considered safe to write in the region between size and capacity; explicitly +// resize before doing so. +inline void TF_TString_Reserve(TF_TString *str, size_t new_cap); + +// Returns the size of the string. +inline size_t TF_TString_GetSize(const TF_TString *str); +// Returns the capacity of the string buffer. It should not be considered safe +// to write in the region between size and capacity---call Resize or +// ResizeUninitialized before doing so. +inline size_t TF_TString_GetCapacity(const TF_TString *str); +// Returns the underlying type of the tstring: +// TF_TSTR_SMALL: +// Small string optimization; the contents of strings +// less than 22-bytes are stored in the TF_TString struct. This avoids any +// heap allocations. +// TF_TSTR_LARGE: +// Heap allocated string. +// TF_TSTR_OFFSET: (currently unused) +// An offset defined string. The string buffer begins at an internally +// defined little-endian offset from `str'; i.e. GetDataPointer() = str + +// offset. This type is useful for memory mapping or reading string tensors +// directly from file, without the need to deserialize the data. For +// security reasons, it is imperative that OFFSET based string tensors are +// validated before use, or are from a trusted source. +// TF_TSTR_VIEW: +// A view into an unowned character string. +// +// NOTE: +// VIEW and OFFSET types are immutable, so any modifcation via Append, +// AppendN, or GetMutableDataPointer of a VIEW/OFFSET based tstring will +// result in a conversion to an owned type (SMALL/LARGE). +inline TF_TString_Type TF_TString_GetType(const TF_TString *str); + +// Returns a const char pointer to the start of the underlying string. The +// underlying character buffer may not be null-terminated. +inline const char *TF_TString_GetDataPointer(const TF_TString *str); +// Returns a char pointer to a mutable representation of the underlying string. +// In the case of VIEW and OFFSET types, `src' is converted to an owned type +// (SMALL/LARGE). The underlying character buffer may not be null-terminated. +inline char *TF_TString_GetMutableDataPointer(TF_TString *str); + +// Sets `dst' as a VIEW type to `src'. `dst' will not take ownership of `src'. +// It is the user's responsibility to ensure that the lifetime of `src' exceeds +// `dst'. Any mutations to `dst' via Append, AppendN, or GetMutableDataPointer, +// will result in a copy into an owned SMALL or LARGE type, and will not modify +// `src'. +inline void TF_TString_AssignView(TF_TString *dst, const char *src, + size_t size); + +// Appends `src' onto `dst'. If `dst' is a VIEW or OFFSET type, it will first +// be converted to an owned LARGE or SMALL type. `dst' should not point to +// memory owned by `src'. +inline void TF_TString_Append(TF_TString *dst, const TF_TString *src); +inline void TF_TString_AppendN(TF_TString *dst, const char *src, size_t size); + +// Copy/Move/Assign semantics +// +// | src | dst | complexity +// Copy | * | SMALL/LARGE | fixed/O(size) +// Assign | SMALL | SMALL | fixed +// Assign | OFFSET | VIEW | fixed +// Assign | VIEW | VIEW | fixed +// Assign | LARGE | LARGE | O(size) +// Move | * | same as src | fixed + +// Copies `src' to `dst'. `dst' will be an owned type (SMALL/LARGE). `src' +// should not point to memory owned by `dst'. +inline void TF_TString_Copy(TF_TString *dst, const char *src, size_t size); +// Assigns a `src' tstring to `dst'. An OFFSET `src' type will yield a `VIEW' +// `dst'. LARGE `src' types will be copied to a new buffer; all other `src' +// types will incur a fixed cost. +inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src); +// Moves a `src' tstring to `dst'. Moving a LARGE `src' to `dst' will result in +// a valid but unspecified `src'. This function incurs a fixed cost for all +// inputs. +inline void TF_TString_Move(TF_TString *dst, TF_TString *src); + +#endif // TENSORFLOW_CORE_PLATFORM_CTSTRING_H_ diff --git a/tensorflow/core/platform/ctstring_internal.h b/tensorflow/core/platform/ctstring_internal.h new file mode 100644 index 00000000000..0d199aed0c6 --- /dev/null +++ b/tensorflow/core/platform/ctstring_internal.h @@ -0,0 +1,449 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_CTSTRING_INTERNAL_H_ +#define TENSORFLOW_CORE_PLATFORM_CTSTRING_INTERNAL_H_ + +#include <limits.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#if (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || \ + defined(_WIN32) +#define TF_TSTRING_LITTLE_ENDIAN 1 +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define TF_TSTRING_LITTLE_ENDIAN 0 +#else +#error "Unable to detect endianness." +#endif + +#if defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) +static inline uint32_t TF_swap32(uint32_t host_int) { + return __builtin_bswap32(host_int); +} + +#elif defined(_MSC_VER) +static inline uint32_t TF_swap32(uint32_t host_int) { + return _byteswap_ulong(host_int); +} + +#elif defined(__APPLE__) +static inline uint32_t TF_swap32(uint32_t host_int) { + return OSSwapInt32(host_int); +} + +#else +static inline uint32_t TF_swap32(uint32_t host_int) { +#if defined(__GLIBC__) + return bswap_32(host_int); +#else // defined(__GLIBC__) + return (((host_int & uint32_t{0xFF}) << 24) | + ((host_int & uint32_t{0xFF00}) << 8) | + ((host_int & uint32_t{0xFF0000}) >> 8) | + ((host_int & uint32_t{0xFF000000}) >> 24)); +#endif // defined(__GLIBC__) +} +#endif + +#if TF_TSTRING_LITTLE_ENDIAN +#define TF_le32toh(x) TF_swap32(x) +#else // TF_TSTRING_LITTLE_ENDIAN +#define TF_le32toh(x) x +#endif // TF_TSTRING_LITTLE_ENDIAN + +static inline size_t TF_align16(size_t i) { return (i + 0xF) & ~0xF; } + +static inline size_t TF_max(size_t a, size_t b) { return a > b ? a : b; } +static inline size_t TF_min(size_t a, size_t b) { return a < b ? a : b; } + +typedef enum TF_TString_Type { // NOLINT + TF_TSTR_SMALL = 0x00, + TF_TSTR_LARGE = 0x01, + TF_TSTR_OFFSET = 0x02, + TF_TSTR_VIEW = 0x03, + TF_TSTR_TYPE_MASK = 0x03 +} TF_TString_Type; + +typedef struct TF_TString_Large { // NOLINT + size_t size; + size_t cap; + char *ptr; +} TF_TString_Large; + +typedef struct TF_TString_Offset { // NOLINT + uint32_t size; + uint32_t offset; + uint32_t count; +} TF_TString_Offset; + +typedef struct TF_TString_View { // NOLINT + size_t size; + const char *ptr; +} TF_TString_View; + +typedef struct TF_TString_Raw { // NOLINT + uint8_t raw[24]; +} TF_TString_Raw; + +typedef union TF_TString_Union { // NOLINT + TF_TString_Large large; + TF_TString_Offset offset; + TF_TString_View view; + TF_TString_Raw raw; +} TF_TString_Union; + +enum { + TF_TString_SmallCapacity = + (sizeof(TF_TString_Union) - sizeof(/* null delim */ char) - + sizeof(/* uint8_t size */ uint8_t)), +}; + +typedef struct TF_TString_Small { // NOLINT + uint8_t size; + char str[TF_TString_SmallCapacity + sizeof(/* null delim */ char)]; +} TF_TString_Small; + +typedef struct TF_TString { // NOLINT + union { + // small conflicts with '#define small char' in RpcNdr.h for MSVC, so we use + // smll instead. + TF_TString_Small smll; + TF_TString_Large large; + TF_TString_Offset offset; + TF_TString_View view; + TF_TString_Raw raw; + } u; +} TF_TString; + +// TODO(dero): Fix for OSS, and add C only build test. +// _Static_assert(CHAR_BIT == 8); +// _Static_assert(sizeof(TF_TString) == 24); + +extern inline TF_TString_Type TF_TString_GetType(const TF_TString *str) { + return (TF_TString_Type)(str->u.raw.raw[0] & TF_TSTR_TYPE_MASK); // NOLINT +} + +// XXX(dero): For the big-endian case, this function could potentially be more +// performant and readable by always storing the string size as little-endian +// and always byte-swapping on big endian, resulting in a simple 'bswap'+'shr' +// (for architectures that have a bswap op). +static inline size_t TF_TString_ToActualSizeT(size_t size) { +#ifdef TF_TSTRING_LITTLE_ENDIAN + return size >> 2; +#else // TF_TSTRING_LITTLE_ENDIAN + // 0xFF000000 or 0xFF00000000000000 depending on platform + static const size_t mask = ~((~(size_t)0) >> 8); + + return (((mask << 2) & size) >> 2) | (~mask & size); +#endif // TF_TSTRING_LITTLE_ENDIAN +} + +static inline size_t TF_TString_ToInternalSizeT(size_t size, + TF_TString_Type type) { +#ifdef TF_TSTRING_LITTLE_ENDIAN + return (size << 2) | type; +#else // TF_TSTRING_LITTLE_ENDIAN + // 0xFF000000 or 0xFF00000000000000 depending on platform + static const size_t mask = ~((~(size_t)0) >> 8); + + return (mask & (size << 2)) | (~mask & size) | + ((size_t)type << ((sizeof(size_t) - 1) * 8)); // NOLINT +#endif // TF_TSTRING_LITTLE_ENDIAN +} + +extern inline void TF_TString_Init(TF_TString *str) { + str->u.smll.size = 0; + str->u.smll.str[0] = '\0'; +} + +extern inline void TF_TString_Dealloc(TF_TString *str) { + if (TF_TString_GetType(str) == TF_TSTR_LARGE && + str->u.large.ptr != NULL) { // NOLINT + free(str->u.large.ptr); + TF_TString_Init(str); + } +} + +extern inline size_t TF_TString_GetSize(const TF_TString *str) { + switch (TF_TString_GetType(str)) { + case TF_TSTR_SMALL: + return str->u.smll.size >> 2; + case TF_TSTR_LARGE: + return TF_TString_ToActualSizeT(str->u.large.size); + case TF_TSTR_OFFSET: + return TF_le32toh(str->u.offset.size) >> 2; + case TF_TSTR_VIEW: + return TF_TString_ToActualSizeT(str->u.view.size); + default: + return 0; // Unreachable. + } +} + +extern inline size_t TF_TString_GetCapacity(const TF_TString *str) { + switch (TF_TString_GetType(str)) { + case TF_TSTR_SMALL: + return TF_TString_SmallCapacity; + case TF_TSTR_LARGE: + return str->u.large.cap; + case TF_TSTR_OFFSET: + case TF_TSTR_VIEW: + default: + return 0; + } +} + +extern inline const char *TF_TString_GetDataPointer(const TF_TString *str) { + switch (TF_TString_GetType(str)) { + case TF_TSTR_SMALL: + return str->u.smll.str; + case TF_TSTR_LARGE: + return str->u.large.ptr; + case TF_TSTR_OFFSET: + return (const char *)str + str->u.offset.offset; // NOLINT + case TF_TSTR_VIEW: + return str->u.view.ptr; + default: + // Unreachable. + return NULL; // NOLINT + } +} + +extern inline char *TF_TString_ResizeUninitialized(TF_TString *str, + size_t new_size) { + size_t curr_size = TF_TString_GetSize(str); + size_t copy_size = TF_min(new_size, curr_size); + + TF_TString_Type curr_type = TF_TString_GetType(str); + const char *curr_ptr = TF_TString_GetDataPointer(str); + + // Case: SMALL/LARGE/VIEW/OFFSET -> SMALL + if (new_size <= TF_TString_SmallCapacity) { + str->u.smll.size = (uint8_t)((new_size << 2) | TF_TSTR_SMALL); // NOLINT + str->u.smll.str[new_size] = '\0'; + + if (curr_type != TF_TSTR_SMALL && copy_size) { + memcpy(str->u.smll.str, curr_ptr, copy_size); + } + + if (curr_type == TF_TSTR_LARGE) { + free((void *)curr_ptr); // NOLINT + } + + // We do not clear out the newly excluded region. + + return str->u.smll.str; + } + + // Case: SMALL/LARGE/VIEW/OFFSET -> LARGE + size_t new_cap; + size_t curr_cap = TF_TString_GetCapacity(str); + // We assume SIZE_MAX % 16 == 0. + size_t curr_cap_x2 = curr_cap >= SIZE_MAX / 2 ? SIZE_MAX - 1 : curr_cap * 2; + + if (new_size < curr_size && new_size < curr_cap / 2) { + // TODO(dero): Replace with shrink_to_fit flag. + new_cap = TF_align16(curr_cap / 2 + 1) - 1; + } else if (new_size > curr_cap_x2) { + new_cap = TF_align16(new_size + 1) - 1; + } else if (new_size > curr_cap) { + new_cap = TF_align16(curr_cap_x2 + 1) - 1; + } else { + new_cap = curr_cap; + } + + char *new_ptr; + if (new_cap == curr_cap) { + new_ptr = str->u.large.ptr; + } else if (curr_type == TF_TSTR_LARGE) { + new_ptr = (char *)realloc(str->u.large.ptr, new_cap + 1); // NOLINT + } else { + new_ptr = (char *)malloc(new_cap + 1); // NOLINT + if (copy_size) { + memcpy(new_ptr, curr_ptr, copy_size); + } + } + + str->u.large.size = TF_TString_ToInternalSizeT(new_size, TF_TSTR_LARGE); + str->u.large.ptr = new_ptr; + str->u.large.ptr[new_size] = '\0'; + str->u.large.cap = new_cap; + + return str->u.large.ptr; +} + +extern inline char *TF_TString_GetMutableDataPointer(TF_TString *str) { + switch (TF_TString_GetType(str)) { + case TF_TSTR_SMALL: + return str->u.smll.str; + case TF_TSTR_OFFSET: + case TF_TSTR_VIEW: + // Convert OFFSET/VIEW to LARGE + TF_TString_ResizeUninitialized(str, TF_TString_GetSize(str)); + return str->u.large.ptr; + case TF_TSTR_LARGE: + return str->u.large.ptr; + default: + // Unreachable. + return NULL; // NOLINT + } +} + +extern inline void TF_TString_Reserve(TF_TString *str, size_t new_cap) { + TF_TString_Type curr_type = TF_TString_GetType(str); + + if (new_cap <= TF_TString_SmallCapacity) { + // We do nothing, we let Resize/GetMutableDataPointer handle the + // conversion to SMALL from VIEW/OFFSET when the need arises. + // In the degenerate case, where new_cap <= TF_TString_SmallCapacity, + // curr_size > TF_TString_SmallCapacity, and the type is VIEW/OFFSET, we + // defer the malloc to Resize/GetMutableDataPointer. + return; + } + + if (curr_type == TF_TSTR_LARGE && new_cap <= str->u.large.cap) { + // We handle reduced cap in resize. + return; + } + + // Case: VIEW/OFFSET -> LARGE or grow an existing LARGE type + size_t curr_size = TF_TString_GetSize(str); + const char *curr_ptr = TF_TString_GetDataPointer(str); + + // Since VIEW and OFFSET types are read-only, their capacity is effectively 0. + // So we make sure we have enough room in the VIEW and OFFSET cases. + new_cap = TF_align16(TF_max(new_cap, curr_size) + 1) - 1; + + if (curr_type == TF_TSTR_LARGE) { + str->u.large.ptr = + (char *)realloc(str->u.large.ptr, new_cap + 1); // NOLINT + } else { + // Convert to Large + char *new_ptr = (char *)malloc(new_cap + 1); // NOLINT + memcpy(new_ptr, curr_ptr, curr_size); + + str->u.large.size = TF_TString_ToInternalSizeT(curr_size, TF_TSTR_LARGE); + str->u.large.ptr = new_ptr; + str->u.large.ptr[curr_size] = '\0'; + } + + str->u.large.cap = new_cap; +} + +extern inline char *TF_TString_Resize(TF_TString *str, size_t new_size, + char c) { + size_t curr_size = TF_TString_GetSize(str); + char *cstr = TF_TString_ResizeUninitialized(str, new_size); + + if (new_size > curr_size) { + memset(cstr + curr_size, c, new_size - curr_size); + } + + return cstr; +} + +extern inline void TF_TString_AssignView(TF_TString *dst, const char *src, + size_t size) { + TF_TString_Dealloc(dst); + + dst->u.view.size = TF_TString_ToInternalSizeT(size, TF_TSTR_VIEW); + dst->u.view.ptr = src; +} + +extern inline void TF_TString_AppendN(TF_TString *dst, const char *src, + size_t src_size) { + if (!src_size) return; + + size_t dst_size = TF_TString_GetSize(dst); + + char *dst_c = TF_TString_ResizeUninitialized(dst, dst_size + src_size); + + memcpy(dst_c + dst_size, src, src_size); +} + +extern inline void TF_TString_Append(TF_TString *dst, const TF_TString *src) { + const char *src_c = TF_TString_GetDataPointer(src); + size_t size = TF_TString_GetSize(src); + + TF_TString_AppendN(dst, src_c, size); +} + +extern inline void TF_TString_Copy(TF_TString *dst, const char *src, + size_t size) { + char *dst_c = TF_TString_ResizeUninitialized(dst, size); + + if (size) memcpy(dst_c, src, size); +} + +extern inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src) { + if (dst == src) return; + + TF_TString_Dealloc(dst); + + switch (TF_TString_GetType(src)) { + case TF_TSTR_SMALL: + case TF_TSTR_VIEW: + *dst = *src; + return; + case TF_TSTR_LARGE: { + const char *src_c = TF_TString_GetDataPointer(src); + size_t size = TF_TString_GetSize(src); + + TF_TString_Copy(dst, src_c, size); + } + return; + case TF_TSTR_OFFSET: { + const char *src_c = TF_TString_GetDataPointer(src); + size_t size = TF_TString_GetSize(src); + + TF_TString_AssignView(dst, src_c, size); + } + return; + default: + return; // Unreachable. + } +} + +extern inline void TF_TString_Move(TF_TString *dst, TF_TString *src) { + if (dst == src) return; + + TF_TString_Dealloc(dst); + + switch (TF_TString_GetType(src)) { + case TF_TSTR_SMALL: + case TF_TSTR_VIEW: + *dst = *src; + return; + case TF_TSTR_LARGE: + *dst = *src; + TF_TString_Init(src); + return; + case TF_TSTR_OFFSET: { + const char *src_c = TF_TString_GetDataPointer(src); + size_t size = TF_TString_GetSize(src); + + TF_TString_AssignView(dst, src_c, size); + } + return; + default: + return; // Unreachable. + } +} + +#endif // TENSORFLOW_CORE_PLATFORM_CTSTRING_INTERNAL_H_ diff --git a/tensorflow/core/platform/ctstring_test.cc b/tensorflow/core/platform/ctstring_test.cc new file mode 100644 index 00000000000..4d82bcd87c3 --- /dev/null +++ b/tensorflow/core/platform/ctstring_test.cc @@ -0,0 +1,331 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/ctstring.h" + +#include <memory> +#include <string> + +#include "tensorflow/core/platform/test.h" + +static const char kLongString[] = + "abcdefghij" + "klmnopqrst" + "uvwxyz0123" + "456789ABCD" + "EFGHIKLMNO"; +const size_t kLongStringLen = sizeof(kLongString) / sizeof(char) - sizeof(char); + +TEST(TF_CTStringTest, InitAssignMoveDealloc) { + EXPECT_GT(::strlen(kLongString), TF_TString_SmallCapacity); + + { + // Empty String + TF_TString s10, s11, s12; + TF_TString_Init(&s10); + TF_TString_Init(&s11); + TF_TString_Init(&s12); + + EXPECT_EQ(0, TF_TString_GetSize(&s10)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10)); + EXPECT_STREQ("", TF_TString_GetDataPointer(&s10)); + EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s10)); + + TF_TString_Assign(&s11, &s10); + + EXPECT_EQ(0, TF_TString_GetSize(&s11)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10)); + EXPECT_STREQ("", TF_TString_GetDataPointer(&s11)); + EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s11)); + + TF_TString_Move(&s12, &s11); + + EXPECT_EQ(0, TF_TString_GetSize(&s11)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10)); + EXPECT_STREQ("", TF_TString_GetDataPointer(&s11)); + EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s11)); + + EXPECT_EQ(0, TF_TString_GetSize(&s12)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10)); + EXPECT_STREQ("", TF_TString_GetDataPointer(&s12)); + EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s12)); + + TF_TString_Dealloc(&s10); + TF_TString_Dealloc(&s11); + TF_TString_Dealloc(&s12); + } + + { + // Small String + TF_TString s20, s21, s22; + TF_TString_Init(&s20); + TF_TString_Init(&s21); + TF_TString_Init(&s22); + + TF_TString_Copy(&s20, "a", 1); + + EXPECT_EQ(1, TF_TString_GetSize(&s20)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s20)); + EXPECT_STREQ("a", TF_TString_GetDataPointer(&s20)); + EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s20)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s20)); + + TF_TString_Assign(&s21, &s20); + + EXPECT_EQ(1, TF_TString_GetSize(&s21)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s21)); + EXPECT_STREQ("a", TF_TString_GetDataPointer(&s21)); + EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s21)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s21)); + + TF_TString_Move(&s22, &s21); + + EXPECT_EQ(1, TF_TString_GetSize(&s22)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s22)); + EXPECT_STREQ("a", TF_TString_GetDataPointer(&s22)); + EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s22)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s22)); + + TF_TString_Dealloc(&s20); + TF_TString_Dealloc(&s21); // Nothing to dealloc, since it was moved. + TF_TString_Dealloc(&s22); + } + + { + // Small String -> Large String and View + TF_TString s30, s31; + TF_TString_Init(&s30); + TF_TString_Init(&s31); + + size_t s = TF_TString_SmallCapacity - 1; + + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30)); + + // Small String + TF_TString_Copy(&s30, kLongString, s); + + EXPECT_STREQ(std::string(kLongString, s).data(), + TF_TString_GetDataPointer(&s30)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s30)); + EXPECT_GT(TF_TString_SmallCapacity, TF_TString_GetSize(&s30)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30)); + + // Small String at capacity + TF_TString_AppendN(&s30, &kLongString[s++], 1); + + EXPECT_STREQ(std::string(kLongString, s).data(), + TF_TString_GetDataPointer(&s30)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s30)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetSize(&s30)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30)); + + // Large String + TF_TString_AppendN(&s30, &kLongString[s++], 1); + + EXPECT_STREQ(std::string(kLongString, s).data(), + TF_TString_GetDataPointer(&s30)); + EXPECT_STREQ(std::string(kLongString, s).data(), + TF_TString_GetMutableDataPointer(&s30)); + EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s30)); + EXPECT_EQ(s, TF_TString_GetSize(&s30)); + EXPECT_LT(TF_TString_SmallCapacity, TF_TString_GetSize(&s30)); + EXPECT_LT(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30)); + + // Large String Move + TF_TString_Move(&s31, &s30); + + EXPECT_STREQ("", TF_TString_GetDataPointer(&s30)); + EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s30)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s30)); + EXPECT_EQ(0, TF_TString_GetSize(&s30)); + + EXPECT_STREQ(std::string(kLongString, s).data(), + TF_TString_GetDataPointer(&s31)); + EXPECT_STREQ(std::string(kLongString, s).data(), + TF_TString_GetMutableDataPointer(&s31)); + EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s31)); + EXPECT_EQ(s, TF_TString_GetSize(&s31)); + EXPECT_LT(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s31)); + + TF_TString_Dealloc(&s30); + TF_TString_Dealloc(&s31); + } + + { + // Small String -> Large String -> Larger -> View + const char kStr[] = "abcdef"; + const char kStrLen = sizeof(kStr) / sizeof(char) - sizeof(char); + TF_TString s40, s41; + + TF_TString_Init(&s40); + TF_TString_Init(&s41); + + TF_TString_Copy(&s40, kLongString, kLongStringLen); + + EXPECT_EQ(kLongStringLen, TF_TString_GetSize(&s40)); + + TF_TString_Assign(&s41, &s40); + + EXPECT_STREQ(kLongString, TF_TString_GetDataPointer(&s40)); + EXPECT_STREQ(kLongString, TF_TString_GetMutableDataPointer(&s40)); + EXPECT_EQ(kLongStringLen, TF_TString_GetSize(&s41)); + + TF_TString_AppendN(&s40, kLongString, kLongStringLen); + TF_TString_Append(&s40, &s41); + + std::string longerString(kLongString); + longerString += kLongString; + longerString += kLongString; + EXPECT_STREQ(longerString.data(), TF_TString_GetDataPointer(&s40)); + EXPECT_STREQ(longerString.data(), TF_TString_GetMutableDataPointer(&s40)); + EXPECT_EQ(longerString.size(), TF_TString_GetSize(&s40)); + + TF_TString_AssignView(&s40, kStr, kStrLen); + + EXPECT_EQ(TF_TSTR_VIEW, TF_TString_GetType(&s40)); + EXPECT_EQ(kStr, TF_TString_GetDataPointer(&s40)); + EXPECT_EQ(6, TF_TString_GetSize(&s40)); + EXPECT_EQ(0, TF_TString_GetCapacity(&s40)); + + EXPECT_NE(kStr, TF_TString_GetMutableDataPointer(&s40)); + EXPECT_STREQ(kStr, TF_TString_GetMutableDataPointer(&s40)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s40)); + EXPECT_EQ(6, TF_TString_GetSize(&s40)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s40)); + + TF_TString_Dealloc(&s40); + TF_TString_Dealloc(&s41); + } + + { + // Small String -> Large String -> Smaller + TF_TString s50; + + TF_TString_Init(&s50); + + TF_TString_Copy(&s50, "a", 1); + + EXPECT_STREQ("a", TF_TString_GetDataPointer(&s50)); + EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s50)); + EXPECT_EQ(1, TF_TString_GetSize(&s50)); + + TF_TString_Copy(&s50, kLongString, kLongStringLen); + + EXPECT_STREQ(kLongString, TF_TString_GetDataPointer(&s50)); + EXPECT_STREQ(kLongString, TF_TString_GetMutableDataPointer(&s50)); + EXPECT_EQ(kLongStringLen, TF_TString_GetSize(&s50)); + + // align16(kLongStringLen) - 1 = 63 + size_t cap1 = TF_TString_GetCapacity(&s50); + + // Test reduced allocation with on large type. + TF_TString_Copy(&s50, kLongString, TF_TString_SmallCapacity + 1); + + // align16(TF_TString_SmallCapacity+1) - 1 = 31 + size_t cap2 = TF_TString_GetCapacity(&s50); + + EXPECT_STREQ(std::string(kLongString, TF_TString_SmallCapacity + 1).data(), + TF_TString_GetMutableDataPointer(&s50)); + EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s50)); + + EXPECT_GT(cap1, cap2); + + TF_TString_Copy(&s50, "c", 1); + + EXPECT_STREQ("c", TF_TString_GetDataPointer(&s50)); + EXPECT_STREQ("c", TF_TString_GetMutableDataPointer(&s50)); + EXPECT_EQ(1, TF_TString_GetSize(&s50)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s50)); + + TF_TString_Dealloc(&s50); + } +} + +TEST(TF_CTStringTest, ResizeReserve) { + { + // Resize + TF_TString s60; + + TF_TString_Init(&s60); + + TF_TString_Resize(&s60, 2, 'a'); + + EXPECT_EQ(0, ::memcmp("aa", TF_TString_GetDataPointer(&s60), 2)); + + TF_TString_Resize(&s60, 4, '\0'); + + EXPECT_EQ(0, ::memcmp("aa\0\0", TF_TString_GetDataPointer(&s60), 4)); + + TF_TString_Resize(&s60, 6, 'b'); + + EXPECT_EQ(0, ::memcmp("aa\0\0bb", TF_TString_GetDataPointer(&s60), 6)); + + TF_TString_Resize(&s60, 2, 'c'); + + EXPECT_EQ(0, ::memcmp("aa", TF_TString_GetDataPointer(&s60), 2)); + + TF_TString_Dealloc(&s60); + } + { + // Reserve + TF_TString s70; + + TF_TString_Init(&s70); + + TF_TString_Reserve(&s70, TF_TString_SmallCapacity - 1); + + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s70)); + EXPECT_EQ(0, TF_TString_GetSize(&s70)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s70)); + + TF_TString_Reserve(&s70, TF_TString_SmallCapacity); + + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s70)); + EXPECT_EQ(0, TF_TString_GetSize(&s70)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s70)); + + TF_TString_Copy(&s70, "hello", 5); + + EXPECT_EQ(5, TF_TString_GetSize(&s70)); + EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s70)); + EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s70)); + + TF_TString_Reserve(&s70, 100); + + // Test 16 byte alignment (7*16 - 1 = 111) + EXPECT_EQ(111, TF_TString_GetCapacity(&s70)); + EXPECT_EQ(5, TF_TString_GetSize(&s70)); + EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s70)); + + TF_TString_AssignView(&s70, kLongString, kLongStringLen); + TF_TString_Reserve(&s70, 10); + + EXPECT_EQ(TF_TSTR_VIEW, TF_TString_GetType(&s70)); + EXPECT_EQ(0, TF_TString_GetCapacity(&s70)); + + TF_TString_Reserve(&s70, 100); + + // Converted to LARGE since it can no longer fit in SMALL. + EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s70)); + EXPECT_EQ(111, TF_TString_GetCapacity(&s70)); + + TF_TString_Reserve(&s70, 200); + + EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s70)); + EXPECT_EQ(207, TF_TString_GetCapacity(&s70)); + + TF_TString_Dealloc(&s70); + } +} diff --git a/tensorflow/core/platform/tstring.h b/tensorflow/core/platform/tstring.h index 867fbc8dea9..c8a85e984a0 100644 --- a/tensorflow/core/platform/tstring.h +++ b/tensorflow/core/platform/tstring.h @@ -16,24 +16,26 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_ #define TENSORFLOW_CORE_PLATFORM_TSTRING_H_ +#include <assert.h> + #include <ostream> #include <string> -// TODO(b/138799229): Used to toggle until global presubmits pass. +#include "tensorflow/core/platform/cord.h" +#include "tensorflow/core/platform/ctstring.h" + #define USE_TSTRING #ifdef USE_TSTRING +// TODO(dero): This include is temporary, and will be superfluous once +// absl::string_view is aliased to std::string_view. #include "absl/strings/string_view.h" - namespace absl { #ifdef ABSL_NAMESPACE_BEGIN ABSL_NAMESPACE_BEGIN #endif // ABSL_NAMESPACE_BEGIN class AlphaNum; -#ifdef PLATFORM_GOOGLE -class Cord; -#endif // PLATFORM_GOOGLE #ifdef ABSL_NAMESPACE_END ABSL_NAMESPACE_END #endif // ABSL_NAMESPACE_END @@ -43,243 +45,554 @@ namespace tensorflow { // tensorflow::tstring is the scalar type for DT_STRING tensors. // -// TODO(b/138799229): In order to ease migration from tensorflow::string to -// tensorflow::tstring, we define a simplified tstring class which wraps -// std::string. The API defined below is the expected subset of methods for -// tstring. +// tstrings are meant to be used when interfacing with string tensors, and +// should not be considered as a general replacement for std::string in +// tensorflow. The primary purpose of tstring is to provide a unified and +// stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating +// unnecessary conversions across language boundaries, and allowing for compiler +// agnostic interoperability across dynamically loaded modules. // -// The underlying implementation of tstring will be replaced with the one -// defined in [1] once the migration in tensorflow/ is complete. +// In addition to ABI stability, tstrings features two string subtypes, VIEW and +// OFFSET. // -// [1] https://github.com/tensorflow/community/pull/91 +// VIEW tstrings are views into unowned character buffers; they can be used to +// pass around existing character strings without incurring a per object heap +// allocation. Note that, like std::string_view, it is the user's +// responsibility to ensure that the underlying buffer of a VIEW tstring exceeds +// the lifetime of the associated tstring object. +// +// TODO(dero): Methods for creating OFFSET tensors are not currently +// implemented. +// +// OFFSET tstrings are platform independent offset defined strings which can be +// directly mmaped or copied into a tensor buffer without the need for +// deserialization or processing. For security reasons, it is imperative that +// OFFSET based string tensors are validated before use, or are from a trusted +// source. +// +// Underlying VIEW and OFFSET buffers are considered immutable, so l-value +// assignment, mutation, or non-const access to data() of tstrings will result +// in the conversion to an owned SMALL/LARGE type. +// +// The interface for tstring largely overlaps with std::string. Except where +// noted, expect equivalent semantics with synonymous std::string methods. class tstring { - std::string str_; - - template <typename T, typename = void> - struct ResizeUninitialized { - static void Resize(T& s, size_t new_size) { s.resize(new_size); } - }; - - template <typename T> - struct ResizeUninitialized< - T, decltype(std::declval<T>().__resize_default_init(0))> { - static void Resize(T& s, size_t new_size) { - s.__resize_default_init(new_size); - } - }; + TF_TString tstr_; public: - typedef char* iterator; + enum Type { + // See cstring.h + SMALL = TF_TSTR_SMALL, + LARGE = TF_TSTR_LARGE, + OFFSET = TF_TSTR_OFFSET, + VIEW = TF_TSTR_VIEW, + }; + + // Assignment to a tstring object with a tstring::view type will create a VIEW + // type tstring. + class view { + const char* data_; + size_t size_; + + public: + explicit view(const char* data, size_t size) : data_(data), size_(size) {} + explicit view(const char* data) : data_(data), size_(::strlen(data)) {} + + const char* data() const { return data_; } + + size_t size() const { return size_; } + + view() = delete; + view(const view&) = delete; + view& operator=(const view&) = delete; + }; + typedef const char* const_iterator; - tstring() = default; - - tstring(const tstring&) = default; - - tstring(const std::string& str) : str_(str) {} - - tstring(const char* str, size_t len) : str_(str, len) {} - - tstring(const char* str) : str_(str) {} - - tstring(size_t n, char c) : str_(n, c) {} - - explicit tstring(const absl::string_view& str) - : str_(str.data(), str.size()) {} - + // Ctor + tstring(); + tstring(const std::string& str); // NOLINT TODO(b/147740521): Make explicit. + tstring(const char* str, size_t len); + tstring(const char* str); // NOLINT TODO(b/147740521): Make explicit. + tstring(size_t n, char c); + explicit tstring(const absl::string_view str); #ifdef PLATFORM_GOOGLE - template <typename T, - typename std::enable_if<std::is_same<T, absl::Cord>::value, - T>::type* = nullptr> - explicit tstring(const T& cord) : str_(string(cord)) {} + explicit tstring(const absl::Cord& cord); #endif // PLATFORM_GOOGLE - tstring(tstring&&) = default; + // Copy + tstring(const tstring& str); - ~tstring() = default; + // Move + tstring(tstring&& str) noexcept; - tstring& operator=(const tstring& str) = default; - - tstring& operator=(const std::string& str) { - str_ = str; - - return *this; - } - - tstring& operator=(const absl::string_view& str) { - str_.assign(str.data(), str.size()); - - return *this; - } + // Dtor + ~tstring(); + // Copy Assignment + tstring& operator=(const tstring& str); + tstring& operator=(const std::string& str); + tstring& operator=(const char* str); + tstring& operator=(char ch); + tstring& operator=(const absl::string_view str); #ifdef PLATFORM_GOOGLE - template <typename T, - typename std::enable_if<std::is_same<T, absl::Cord>::value, - T>::type* = nullptr> - tstring& operator=(const T& cord) { - str_ = string(cord); - - return *this; - } + tstring& operator=(const absl::Cord& cord); #endif // PLATFORM_GOOGLE - tstring& operator=(const char* str) { - str_ = str; + // View Assignment + tstring& operator=(const view& tsv); - return *this; - } + // Move Assignment + tstring& operator=(tstring&& str); - tstring& operator=(char ch) { - str_ = ch; - - return *this; - } - - tstring& operator=(tstring&&) = default; - - bool operator<(const tstring& o) const { return str_ < o.str_; } - - bool operator>(const tstring& o) const { return str_ > o.str_; } - - bool operator==(const char* o) const { return str_ == o; } - - bool operator==(const tstring& o) const { return str_ == o.str_; } - - bool operator!=(const char* o) const { return str_ != o; } - - bool operator!=(const tstring& o) const { return str_ != o.str_; } - - operator std::string() const { return str_; } - - operator absl::string_view() const { - return absl::string_view(str_.data(), str_.size()); - } + // Comparison + int compare(const char* str, size_t len) const; + bool operator<(const tstring& o) const; + bool operator>(const tstring& o) const; + bool operator==(const char* str) const; + bool operator==(const tstring& o) const; + bool operator!=(const char* str) const; + bool operator!=(const tstring& o) const; + // Conversion Operators + // TODO(b/147740521): Make explicit. + operator std::string() const; // NOLINT + // TODO(b/147740521): Make explicit. + operator absl::string_view() const; // NOLINT #ifdef PLATFORM_GOOGLE template <typename T, typename std::enable_if<std::is_same<T, absl::AlphaNum>::value, T>::type* = nullptr> - operator T() const { - return T(str_); - } + operator T() const; // NOLINT TODO(b/147740521): Remove. #endif // PLATFORM_GOOGLE - bool empty() const { return str_.empty(); } + // Attributes + size_t size() const; + size_t length() const; + size_t capacity() const; + bool empty() const; + Type type() const; - size_t length() const { return str_.length(); } + // Allocation + void resize(size_t new_size, char c = 0); + // Similar to resize, but will leave the newly grown region uninitialized. + void resize_uninitialized(size_t new_size); + void clear() noexcept; + void reserve(size_t n); - size_t size() const { return str_.size(); } + // Iterators + const_iterator begin() const; + const_iterator end() const; - size_t capacity() const { return str_.capacity(); } + // Const Element Access + const char* c_str() const; + const char* data() const; + const char& operator[](size_t i) const; + const char& back() const; - const char* c_str() const { return str_.c_str(); } + // Mutable Element Access + // NOTE: For VIEW/OFFSET types, calling these methods will result in the + // conversion to a SMALL or heap allocated LARGE type. As a result, + // previously obtained pointers, references, or iterators to the underlying + // buffer will point to the original VIEW/OFFSET and not the new allocation. + char* mdata(); + char* data(); // DEPRECATED: Use mdata(). + char& operator[](size_t i); - const char* data() const { return str_.data(); } + // Assignment + tstring& assign(const char* str, size_t len); + tstring& assign(const char* str); - const_iterator begin() const { return data(); } - const_iterator end() const { return data() + size(); } + // View Assignment + tstring& assign_as_view(const tstring& str); + tstring& assign_as_view(const std::string& str); + tstring& assign_as_view(const absl::string_view str); + tstring& assign_as_view(const char* str, size_t len); + tstring& assign_as_view(const char* str); - char back() const { return str_.back(); } + // Modifiers + // NOTE: Invalid input will result in undefined behavior. + tstring& append(const tstring& str); + tstring& append(const char* str, size_t len); + tstring& append(const char* str); + tstring& append(size_t n, char c); - const char& operator[](size_t i) const { return str_[i]; } + tstring& erase(size_t pos, size_t len); - char* data() { return &str_[0]; } + tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen); + tstring& insert(size_t pos, size_t n, char c); + void swap(tstring& str); + void push_back(char ch); - iterator begin() { return data(); } - iterator end() { return data() + size(); } - - char& operator[](size_t i) { return str_[i]; } - - void clear() noexcept { str_.clear(); } - - void resize(size_t new_size) { str_.resize(new_size); } - - void resize(size_t new_size, char c) { str_.resize(new_size, c); } - - void resize_uninitialized(size_t new_size) { - ResizeUninitialized<decltype(str_)>::Resize(str_, new_size); - } - - void reserve(size_t n) { str_.reserve(n); } - - tstring& assign(const char* str, size_t len) { - str_.assign(str, len); - - return *this; - } - - tstring& assign(const char* str) { - str_.assign(str); - - return *this; - } - - tstring& append(const tstring& str) { - str_.append(str.str_); - - return *this; - } - - tstring& append(const char* str, size_t len) { - str_.append(str, len); - - return *this; - } - - tstring& append(const char* str) { - str_.append(str); - - return *this; - } - - tstring& append(size_t n, char c) { - str_.append(n, c); - - return *this; - } - - void swap(tstring& str) { str_.swap(str.str_); } - - tstring& insert(size_t pos, const tstring& str, size_t subpos, - size_t sublen) { - str_.insert(pos, str.str_, subpos, sublen); - - return *this; - } - - tstring& insert(size_t pos, size_t n, char c) { - str_.insert(pos, n, c); - - return *this; - } - - tstring& erase(size_t pos, size_t len) { - str_.erase(pos, len); - - return *this; - } - - void push_back(char ch) { str_.push_back(ch); } - - friend const tstring operator+(const tstring& a, const tstring& b); + // Friends friend bool operator==(const char* a, const tstring& b); friend bool operator==(const std::string& a, const tstring& b); + friend tstring operator+(const tstring& a, const tstring& b); friend std::ostream& operator<<(std::ostream& o, const tstring& str); friend std::hash<tstring>; }; -inline bool operator==(const char* a, const tstring& b) { return a == b.str_; } +// Non-member function overloads -inline bool operator==(const std::string& a, const tstring& b) { - return a == b.str_; +bool operator==(const char* a, const tstring& b); +bool operator==(const std::string& a, const tstring& b); +tstring operator+(const tstring& a, const tstring& b); +std::ostream& operator<<(std::ostream& o, const tstring& str); + +// Implementations + +// Ctor + +inline tstring::tstring() { TF_TString_Init(&tstr_); } + +inline tstring::tstring(const char* str, size_t len) { + TF_TString_Init(&tstr_); + TF_TString_Copy(&tstr_, str, len); } -inline const tstring operator+(const tstring& a, const tstring& b) { - return tstring(a.str_ + b.str_); +inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {} + +inline tstring::tstring(size_t n, char c) { + TF_TString_Init(&tstr_); + TF_TString_Resize(&tstr_, n, c); +} + +inline tstring::tstring(const std::string& str) + : tstring(str.data(), str.size()) {} + +inline tstring::tstring(const absl::string_view str) + : tstring(str.data(), str.size()) {} + +#ifdef PLATFORM_GOOGLE +inline tstring::tstring(const absl::Cord& cord) { + TF_TString_Init(&tstr_); + TF_TString_ResizeUninitialized(&tstr_, cord.size()); + + cord.CopyToArray(data()); +} +#endif // PLATFORM_GOOGLE + +// Copy + +inline tstring::tstring(const tstring& str) { + TF_TString_Init(&tstr_); + TF_TString_Assign(&tstr_, &str.tstr_); +} + +// Move + +inline tstring::tstring(tstring&& str) noexcept { + TF_TString_Init(&tstr_); + TF_TString_Move(&tstr_, &str.tstr_); +} + +// Dtor + +inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); } + +// Copy Assignment + +inline tstring& tstring::operator=(const tstring& str) { + TF_TString_Assign(&tstr_, &str.tstr_); + + return *this; +} + +inline tstring& tstring::operator=(const std::string& str) { + TF_TString_Copy(&tstr_, str.data(), str.size()); + return *this; +} + +inline tstring& tstring::operator=(const char* str) { + TF_TString_Copy(&tstr_, str, ::strlen(str)); + + return *this; +} + +inline tstring& tstring::operator=(char c) { + resize_uninitialized(1); + (*this)[0] = c; + + return *this; +} + +inline tstring& tstring::operator=(const absl::string_view str) { + TF_TString_Copy(&tstr_, str.data(), str.size()); + + return *this; +} + +#ifdef PLATFORM_GOOGLE +inline tstring& tstring::operator=(const absl::Cord& cord) { + TF_TString_ResizeUninitialized(&tstr_, cord.size()); + + cord.CopyToArray(data()); + + return *this; +} +#endif // PLATFORM_GOOGLE + +// View Assignment + +inline tstring& tstring::operator=(const tstring::view& tsv) { + assign_as_view(tsv.data(), tsv.size()); + + return *this; +} + +// Move Assignment + +inline tstring& tstring::operator=(tstring&& str) { + TF_TString_Move(&tstr_, &str.tstr_); + + return *this; +} + +// Comparison + +inline int tstring::compare(const char* str, size_t len) const { + int ret = ::memcmp(data(), str, std::min(len, size())); + + if (ret < 0) return -1; + if (ret > 0) return +1; + + if (size() < len) return -1; + if (size() > len) return +1; + + return 0; +} + +inline bool tstring::operator<(const tstring& o) const { + return compare(o.data(), o.size()) < 0; +} + +inline bool tstring::operator>(const tstring& o) const { + return compare(o.data(), o.size()) > 0; +} + +inline bool tstring::operator==(const char* str) const { + return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0; +} + +inline bool tstring::operator==(const tstring& o) const { + return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0; +} + +inline bool tstring::operator!=(const char* str) const { + return !(*this == str); +} + +inline bool tstring::operator!=(const tstring& o) const { + return !(*this == o); +} + +// Conversion Operators + +inline tstring::operator std::string() const { + return std::string(data(), size()); +} + +inline tstring::operator absl::string_view() const { + return absl::string_view(data(), size()); +} + +#ifdef PLATFORM_GOOGLE +template <typename T, typename std::enable_if< + std::is_same<T, absl::AlphaNum>::value, T>::type*> +inline tstring::operator T() const { + return T(absl::string_view(*this)); +} +#endif // PLATFORM_GOOGLE + +// Attributes + +inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); } + +inline size_t tstring::length() const { return size(); } + +inline size_t tstring::capacity() const { + return TF_TString_GetCapacity(&tstr_); +} + +inline bool tstring::empty() const { return size() == 0; } + +inline tstring::Type tstring::type() const { + return static_cast<tstring::Type>(TF_TString_GetType(&tstr_)); +} + +// Allocation + +inline void tstring::resize(size_t new_size, char c) { + TF_TString_Resize(&tstr_, new_size, c); +} + +inline void tstring::resize_uninitialized(size_t new_size) { + TF_TString_ResizeUninitialized(&tstr_, new_size); +} + +inline void tstring::clear() noexcept { + TF_TString_ResizeUninitialized(&tstr_, 0); +} + +inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); } + +// Iterators + +inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; } +inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; } + +// Element Access + +inline const char* tstring::c_str() const { return data(); } + +inline const char* tstring::data() const { + return TF_TString_GetDataPointer(&tstr_); +} + +inline const char& tstring::operator[](size_t i) const { return data()[i]; } + +inline const char& tstring::back() const { return (*this)[size() - 1]; } + +inline char* tstring::mdata() { + return TF_TString_GetMutableDataPointer(&tstr_); +} + +inline char* tstring::data() { + // Deprecated + return mdata(); +} + +inline char& tstring::operator[](size_t i) { return mdata()[i]; } + +// Assignment + +inline tstring& tstring::assign(const char* str, size_t len) { + TF_TString_Copy(&tstr_, str, len); + + return *this; +} + +inline tstring& tstring::assign(const char* str) { + assign(str, ::strlen(str)); + + return *this; +} + +// View Assignment + +inline tstring& tstring::assign_as_view(const tstring& str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +inline tstring& tstring::assign_as_view(const std::string& str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +inline tstring& tstring::assign_as_view(const absl::string_view str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +inline tstring& tstring::assign_as_view(const char* str, size_t len) { + TF_TString_AssignView(&tstr_, str, len); + + return *this; +} + +inline tstring& tstring::assign_as_view(const char* str) { + assign_as_view(str, ::strlen(str)); + + return *this; +} + +// Modifiers + +inline tstring& tstring::append(const tstring& str) { + TF_TString_Append(&tstr_, &str.tstr_); + + return *this; +} + +inline tstring& tstring::append(const char* str, size_t len) { + TF_TString_AppendN(&tstr_, str, len); + + return *this; +} + +inline tstring& tstring::append(const char* str) { + append(str, ::strlen(str)); + + return *this; +} + +inline tstring& tstring::append(size_t n, char c) { + resize(size() + n, c); + + return *this; +} + +inline tstring& tstring::erase(size_t pos, size_t len) { + memmove(mdata() + pos, data() + pos + len, size() - len - pos); + + resize(size() - len); + + return *this; +} + +inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos, + size_t sublen) { + size_t orig_size = size(); + TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen); + + memmove(mdata() + pos + sublen, data() + pos, orig_size - pos); + memmove(mdata() + pos, str.data() + subpos, sublen); + + return *this; +} + +inline tstring& tstring::insert(size_t pos, size_t n, char c) { + size_t size_ = size(); + TF_TString_ResizeUninitialized(&tstr_, size_ + n); + + memmove(mdata() + pos + n, data() + pos, size_ - pos); + memset(mdata() + pos, c, n); + + return *this; +} + +inline void tstring::swap(tstring& str) { + // TODO(dero): Invalid for OFFSET (unimplemented). + std::swap(tstr_, str.tstr_); +} + +inline void tstring::push_back(char ch) { append(1, ch); } + +// Friends + +inline bool operator==(const char* a, const tstring& b) { + return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0; +} + +inline bool operator==(const std::string& a, const tstring& b) { + return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0; +} + +inline tstring operator+(const tstring& a, const tstring& b) { + tstring r; + r.reserve(a.size() + b.size()); + r.append(a); + r.append(b); + + return r; } inline std::ostream& operator<<(std::ostream& o, const tstring& str) { - return o << str.str_; + return o.write(str.data(), str.size()); } } // namespace tensorflow diff --git a/tensorflow/core/platform/tstring_test.cc b/tensorflow/core/platform/tstring_test.cc new file mode 100644 index 00000000000..ced5dc97d52 --- /dev/null +++ b/tensorflow/core/platform/tstring_test.cc @@ -0,0 +1,407 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <string> + +#include "tensorflow/core/platform/cord.h" +#include "tensorflow/core/platform/test.h" + +// TODO(dero): fix ordering issue. +#include "tensorflow/core/platform/tstring.h" // NOLINT + +using tensorflow::tstring; + +static const char kLongString[] = + "abcdefghij" + "klmnopqrst" + "uvwxyz0123" + "456789ABCD" + "EFGHIKLMNO"; +const size_t kLongStringLen = sizeof(kLongString) / sizeof(char) - sizeof(char); + +TEST(TF_TStringTest, Construction) { + tstring s10; + tstring s11("a\0a", 3); + tstring s12(kLongString); + tstring s13(3, 'b'); + tstring s14(absl::string_view("hi")); + tstring s15(std::string("bye")); + + EXPECT_EQ("", s10); + EXPECT_TRUE(s10.empty()); + EXPECT_EQ(tstring::Type::SMALL, s10.type()); + EXPECT_EQ(0, s10.size()); + EXPECT_EQ(0, s10.length()); + EXPECT_EQ(TF_TString_SmallCapacity, s10.capacity()); + + EXPECT_EQ(std::string("a\0a", 3), s11); + EXPECT_FALSE(s11.empty()); + EXPECT_EQ(3, s11.size()); + EXPECT_EQ(3, s11.length()); + EXPECT_EQ(kLongString, s12); + EXPECT_EQ(kLongStringLen, s12.size()); + EXPECT_EQ(tstring::Type::LARGE, s12.type()); + EXPECT_LT(TF_TString_SmallCapacity, s12.capacity()); + EXPECT_EQ("bbb", s13); + EXPECT_EQ("hi", s14); + EXPECT_EQ(tstring::Type::SMALL, s14.type()); + EXPECT_EQ("bye", s15); +} + +TEST(TF_TStringTest, CopyMove) { + tstring s20(kLongString); + tstring s21(s20); + tstring s22; + + EXPECT_EQ(s20, s21); + + s22 = std::move(s21); + + EXPECT_EQ(s20, s22); + EXPECT_EQ("", s21); // NOLINT + EXPECT_EQ(tstring::Type::SMALL, s21.type()); +} + +TEST(TF_TStringTest, Assignment) { + tstring s30("123456789012345678901234567890"); + tstring s31; + tstring s32; + + s31 = s30; + + EXPECT_EQ(s30, s31); + EXPECT_EQ(tstring::Type::LARGE, s31.type()); + EXPECT_EQ(s30.size(), s31.size()); + + s32 = std::move(s30); + + EXPECT_EQ(s31, s32); + EXPECT_EQ("", s30); // NOLINT + EXPECT_EQ(tstring::Type::SMALL, s30.type()); + EXPECT_EQ(tstring::Type::LARGE, s32.type()); + + s32 = tstring::view(kLongString); + + EXPECT_EQ(kLongString, s32); + EXPECT_EQ(tstring::Type::VIEW, s32.type()); + EXPECT_EQ(kLongStringLen, s32.size()); + EXPECT_EQ(0, s32.capacity()); + + tstring s33(std::move(s32)); + + EXPECT_EQ(kLongString, s33); + EXPECT_EQ(tstring::Type::VIEW, s33.type()); + EXPECT_EQ(kLongStringLen, s33.size()); + + s32 = std::string(kLongString); + + EXPECT_EQ(kLongString, s32); + EXPECT_EQ(tstring::Type::LARGE, s32.type()); + EXPECT_EQ(kLongStringLen, s32.size()); + + // LARGE -> SMALL + s32 = "hello"; + + EXPECT_EQ("hello", s32); + EXPECT_EQ(tstring::Type::SMALL, s32.type()); + EXPECT_EQ(5, s32.size()); + + s33 = 'a'; + + EXPECT_EQ("a", s33); + EXPECT_EQ(tstring::Type::SMALL, s33.type()); + EXPECT_EQ(1, s33.size()); + + s32 = absl::string_view(kLongString); + + EXPECT_EQ(kLongString, s32); + EXPECT_EQ(tstring::Type::LARGE, s32.type()); + EXPECT_EQ(kLongStringLen, s32.size()); + + // LARGE -> SMALL but still LARGE + s32.resize(TF_TString_SmallCapacity * 2); + + EXPECT_EQ(absl::string_view(kLongString, TF_TString_SmallCapacity * 2), s32); + EXPECT_EQ(tstring::Type::LARGE, s32.type()); + EXPECT_EQ(TF_TString_SmallCapacity * 2, s32.size()); + + s32 = tstring::view(kLongString, kLongStringLen); + + EXPECT_EQ(kLongString, s32); + EXPECT_EQ(tstring::Type::VIEW, s32.type()); + EXPECT_EQ(kLongStringLen, s32.size()); + + s32.assign("hello1"); + + EXPECT_EQ("hello1", s32); + + s32.assign("hello2", 5); + + EXPECT_EQ("hello", s32); + + s30.assign_as_view(kLongString); + + EXPECT_EQ(tstring::Type::VIEW, s30.type()); + + s31.assign_as_view(s30); + + EXPECT_EQ(tstring::Type::VIEW, s31.type()); + + EXPECT_EQ(kLongString, s30.c_str()); + EXPECT_EQ(kLongString, s31.c_str()); + + std::string tmp(kLongString); + s32.assign_as_view(tmp); + + EXPECT_EQ(tstring::Type::VIEW, s32.type()); + EXPECT_STREQ(kLongString, s32.c_str()); + + s33.assign_as_view(kLongString, 2); + + EXPECT_EQ(2, s33.size()); + + s32.assign_as_view(absl::string_view(kLongString)); + + EXPECT_EQ(tstring::Type::VIEW, s32.type()); + EXPECT_EQ(kLongString, s32.c_str()); + +#ifdef PLATFORM_GOOGLE + s33 = absl::Cord(kLongString); + + EXPECT_EQ(kLongString, s33); + EXPECT_EQ(tstring::Type::LARGE, s33.type()); + EXPECT_EQ(kLongStringLen, s33.size()); + + tstring s34((absl::Cord(kLongString))); + + EXPECT_EQ(kLongString, s34); + EXPECT_EQ(tstring::Type::LARGE, s34.type()); + EXPECT_EQ(kLongStringLen, s34.size()); +#endif // PLATFORM_GOOGLE +} + +TEST(TF_TStringTest, Comparison) { + tstring empty(""); + tstring a("a"); + tstring aa("aa"); + tstring a_("a"); + tstring b("b"); + const char c[] = "c"; + tstring nulla("\0a", 2); + tstring nullb("\0b", 2); + tstring nullaa("\0aa", 3); + + EXPECT_TRUE(a < b); + EXPECT_TRUE(a != b); + EXPECT_FALSE(a > b); + EXPECT_FALSE(a == b); + + EXPECT_TRUE(a < aa); + EXPECT_TRUE(a != aa); + EXPECT_FALSE(a > aa); + EXPECT_FALSE(a == aa); + + EXPECT_TRUE(b > a); + EXPECT_TRUE(b != a); + EXPECT_FALSE(b < a); + EXPECT_FALSE(b == a); + EXPECT_FALSE(a == b); + + EXPECT_FALSE(b == c); + EXPECT_TRUE(b != c); + + EXPECT_TRUE(empty < a); + EXPECT_TRUE(empty != a); + EXPECT_FALSE(empty > a); + EXPECT_FALSE(empty == a); + + EXPECT_TRUE(a > empty); + EXPECT_TRUE(a != empty); + EXPECT_FALSE(a < empty); + EXPECT_FALSE(a == empty); + + EXPECT_FALSE(a < a_); + EXPECT_FALSE(a != a_); + EXPECT_FALSE(a > a_); + EXPECT_TRUE(a == a_); + + EXPECT_TRUE(nulla < nullaa); + EXPECT_TRUE(nulla != nullaa); + EXPECT_FALSE(nulla > nullaa); + EXPECT_FALSE(nulla == nullaa); + + EXPECT_TRUE(nulla < nullb); + + EXPECT_TRUE(nullaa > nulla); + EXPECT_TRUE(nullaa != nulla); + EXPECT_FALSE(nullaa < nulla); + EXPECT_FALSE(nullaa == nulla); +} + +TEST(TF_TStringTest, Conversion) { + tstring s50(kLongString); + std::string s51(s50); + absl::string_view s52(s50); + EXPECT_EQ(kLongString, s51); + EXPECT_EQ(kLongStringLen, s51.size()); + EXPECT_EQ(kLongString, s52); + EXPECT_EQ(kLongStringLen, s52.size()); + +#ifdef PLATFORM_GOOGLE + absl::AlphaNum s53(s50); + + EXPECT_STREQ(kLongString, s53.data()); + EXPECT_EQ(kLongStringLen, s53.size()); +#endif // PLATFORM_GOOGLE +} + +TEST(TF_TStringTest, Allocation) { + tstring s60; + + s60.resize(2); + + EXPECT_EQ(std::string("\0\0", 2), s60); + EXPECT_EQ(2, s60.size()); + EXPECT_EQ(2, s60.length()); + + s60.resize(6, 'a'); + + EXPECT_EQ(std::string("\0\0aaaa", 6), s60); + EXPECT_EQ(6, s60.size()); + EXPECT_EQ(6, s60.length()); + + s60.resize(3, 'b'); + + EXPECT_EQ(std::string("\0\0a", 3), s60); + EXPECT_EQ(3, s60.size()); + EXPECT_EQ(3, s60.length()); + + s60.clear(); + EXPECT_EQ("", s60); + EXPECT_TRUE(s60.empty()); + EXPECT_EQ(0, s60.size()); + EXPECT_EQ(0, s60.length()); + + s60.reserve(100); + // 16-byte alignment 7*16-1 = 111 + EXPECT_EQ(111, s60.capacity()); + s60.reserve(100); +} + +TEST(TF_TStringTest, ElementAccess) { + tstring s70(kLongString); + + EXPECT_STREQ(kLongString, s70.data()); + EXPECT_EQ(s70.data(), s70.c_str()); + + for (size_t i = 0; i < s70.size(); i++) { + EXPECT_EQ(kLongString[i], s70.data()[i]); + } + + tstring::const_iterator i = s70.begin(); + const char* j = kLongString; + for (; *j != '\0'; i++, j++) { + EXPECT_EQ(*j, *i); + } + EXPECT_EQ('\0', *s70.end()); + EXPECT_EQ(*i, *s70.end()); + EXPECT_EQ(*(i - 1), s70.back()); +} + +TEST(TF_TStringTest, Modifiers) { + // Modifiers + tstring s80("ba"); + tstring s81; + tstring s82(kLongString); + + s81.append(s80); + + EXPECT_EQ("ba", s81); + + s81.append(s80); + + EXPECT_EQ("baba", s81); + + s81.append("\0c", 2); + + EXPECT_EQ(std::string("baba\0c", 6), s81); + + s81.append("dd"); + + EXPECT_EQ(std::string("baba\0cdd", 8), s81); + + s81.append(3, 'z'); + + EXPECT_EQ(tstring("baba\0cddzzz", 11), s81); + + s81.append(0, 'z'); + s81.append("dd", 0); + s81.append(""); + s81.append(tstring()); + + EXPECT_EQ(std::string("baba\0cddzzz", 11), s81); + + s81.erase(0, 1); + + EXPECT_EQ(std::string("aba\0cddzzz", 10), s81); + + s81.erase(4, 6); + + EXPECT_EQ(std::string("aba\0", 4), s81); + + s81.insert(1, tstring("\0moo\0", 5), 1, 4); + + EXPECT_EQ(std::string("amoo\0ba\0", 8), s81); + + s81.insert(0, 2, '\0'); + s81.insert(s81.size() - 1, 1, 'q'); + + EXPECT_EQ(std::string("\0\0amoo\0baq\0", 11), s81); + + s81.erase(0, s81.size()); + + EXPECT_EQ(tstring(), s81); + + s80.swap(s82); + + EXPECT_EQ(kLongString, s80); + EXPECT_EQ("ba", s82); + + s82.push_back('\0'); + s82.push_back('q'); + + EXPECT_EQ(std::string("ba\0q", 4), s82); +} + +TEST(TF_TStringTest, Friends) { + tstring s90("b"); + tstring s91("\0a\0", 3); + tstring s92; + + EXPECT_EQ("b", s90 + s92); + EXPECT_EQ("b", s92 + s90); + + EXPECT_EQ(std::string("\0a\0", 3), s92 + s91); + EXPECT_EQ(std::string("\0a\0", 3), s91 + s92); + + EXPECT_EQ(std::string("b\0a\0", 4), s90 + s91); + EXPECT_EQ(std::string("\0a\0b", 4), s91 + s90); + + std::stringstream ss; + ss << s91; + + EXPECT_EQ(std::string("\0a\0", 3), ss.str()); +}