ABI stable tensorflow::tstring.

See: https://github.com/tensorflow/community/pull/91
PiperOrigin-RevId: 291049893
Change-Id: I79d809bd3e581da27fc87e8b53921fadce3b7a93
This commit is contained in:
Dero Gharibian 2020-01-22 15:55:42 -08:00 committed by TensorFlower Gardener
parent bce01458eb
commit b9b042cfd9
6 changed files with 1858 additions and 196 deletions

View File

@ -716,8 +716,13 @@ cc_library(
cc_library(
name = "tstring",
hdrs = ["tstring.h"],
hdrs = [
"ctstring.h",
"ctstring_internal.h",
"tstring.h",
],
deps = [
":cord",
"@com_google_absl//absl/strings",
],
)
@ -894,6 +899,29 @@ tf_cc_test(
],
)
tf_cc_test(
name = "ctstring_test",
size = "small",
srcs = ["ctstring_test.cc"],
deps = [
":tstring",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)
tf_cc_test(
name = "tstring_test",
size = "small",
srcs = ["tstring_test.cc"],
deps = [
":cord",
":tstring",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)
tf_cc_test(
name = "platform_strings_test",
size = "small",
@ -1076,6 +1104,8 @@ filegroup(
srcs = [
"byte_order.h",
"cord.h",
"ctstring.h",
"ctstring_internal.h",
"env_time.h",
"logging.h",
"macros.h",
@ -1138,6 +1168,8 @@ filegroup(
filegroup(
name = "lib_proto_parsing_hdrs",
srcs = [
"ctstring.h",
"ctstring_internal.h",
"init_main.h",
"logging.h",
"macros.h",
@ -1172,6 +1204,8 @@ filegroup(
srcs = [
"byte_order.h",
"cpu_info.h",
"ctstring.h",
"ctstring_internal.h",
"dynamic_annotations.h",
"macros.h",
"mutex.h",
@ -1234,6 +1268,8 @@ filegroup(
filegroup(
name = "tflite_portable_logging_hdrs",
srcs = [
"ctstring.h",
"ctstring_internal.h",
"logging.h",
"macros.h",
"platform.h",
@ -1246,6 +1282,8 @@ filegroup(
filegroup(
name = "jpeg_internal_hdrs",
srcs = [
"ctstring.h",
"ctstring_internal.h",
"dynamic_annotations.h",
"logging.h",
"macros.h",
@ -1261,6 +1299,8 @@ filegroup(
filegroup(
name = "gif_internal_hdrs",
srcs = [
"ctstring.h",
"ctstring_internal.h",
"dynamic_annotations.h",
"logging.h",
"macros.h",
@ -1286,6 +1326,8 @@ filegroup(
"cord.h",
"cpu_info.cc",
"cpu_info.h",
"ctstring.h",
"ctstring_internal.h",
"demangle.h",
"denormal.cc",
"denormal.h",

View File

@ -0,0 +1,120 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PLATFORM_CTSTRING_H_
#define TENSORFLOW_CORE_PLATFORM_CTSTRING_H_
#include <stdint.h>
#include <stdlib.h>
#include "tensorflow/core/platform/ctstring_internal.h"
// Initialize a new tstring. This must be called before using any function
// below.
inline void TF_TString_Init(TF_TString *str);
// Deallocate a tstring.
inline void TF_TString_Dealloc(TF_TString *str);
// Resizes `str' to `new_size'. This function will appropriately grow or shrink
// the string buffer to fit a `new_size' string. Grown regions of the string
// will be initialized with `c'.
inline char *TF_TString_Resize(TF_TString *str, size_t new_size, char c);
// Similar to TF_TString_Resize, except the newly allocated regions will remain
// uninitialized. This is useful if you plan on overwriting the newly grown
// regions immediately after allocation; doing so will elide a superfluous
// initialization of the new buffer.
inline char *TF_TString_ResizeUninitialized(TF_TString *str, size_t new_size);
// Reserves a string buffer with a capacity of at least `new_cap'.
// ResizeUninitialized will not change the size, or the contents of the existing
// string. This is useful if you have a rough idea of `str's upperbound in
// size, and want to avoid allocations as you append to `str'. It should not be
// considered safe to write in the region between size and capacity; explicitly
// resize before doing so.
inline void TF_TString_Reserve(TF_TString *str, size_t new_cap);
// Returns the size of the string.
inline size_t TF_TString_GetSize(const TF_TString *str);
// Returns the capacity of the string buffer. It should not be considered safe
// to write in the region between size and capacity---call Resize or
// ResizeUninitialized before doing so.
inline size_t TF_TString_GetCapacity(const TF_TString *str);
// Returns the underlying type of the tstring:
// TF_TSTR_SMALL:
// Small string optimization; the contents of strings
// less than 22-bytes are stored in the TF_TString struct. This avoids any
// heap allocations.
// TF_TSTR_LARGE:
// Heap allocated string.
// TF_TSTR_OFFSET: (currently unused)
// An offset defined string. The string buffer begins at an internally
// defined little-endian offset from `str'; i.e. GetDataPointer() = str +
// offset. This type is useful for memory mapping or reading string tensors
// directly from file, without the need to deserialize the data. For
// security reasons, it is imperative that OFFSET based string tensors are
// validated before use, or are from a trusted source.
// TF_TSTR_VIEW:
// A view into an unowned character string.
//
// NOTE:
// VIEW and OFFSET types are immutable, so any modifcation via Append,
// AppendN, or GetMutableDataPointer of a VIEW/OFFSET based tstring will
// result in a conversion to an owned type (SMALL/LARGE).
inline TF_TString_Type TF_TString_GetType(const TF_TString *str);
// Returns a const char pointer to the start of the underlying string. The
// underlying character buffer may not be null-terminated.
inline const char *TF_TString_GetDataPointer(const TF_TString *str);
// Returns a char pointer to a mutable representation of the underlying string.
// In the case of VIEW and OFFSET types, `src' is converted to an owned type
// (SMALL/LARGE). The underlying character buffer may not be null-terminated.
inline char *TF_TString_GetMutableDataPointer(TF_TString *str);
// Sets `dst' as a VIEW type to `src'. `dst' will not take ownership of `src'.
// It is the user's responsibility to ensure that the lifetime of `src' exceeds
// `dst'. Any mutations to `dst' via Append, AppendN, or GetMutableDataPointer,
// will result in a copy into an owned SMALL or LARGE type, and will not modify
// `src'.
inline void TF_TString_AssignView(TF_TString *dst, const char *src,
size_t size);
// Appends `src' onto `dst'. If `dst' is a VIEW or OFFSET type, it will first
// be converted to an owned LARGE or SMALL type. `dst' should not point to
// memory owned by `src'.
inline void TF_TString_Append(TF_TString *dst, const TF_TString *src);
inline void TF_TString_AppendN(TF_TString *dst, const char *src, size_t size);
// Copy/Move/Assign semantics
//
// | src | dst | complexity
// Copy | * | SMALL/LARGE | fixed/O(size)
// Assign | SMALL | SMALL | fixed
// Assign | OFFSET | VIEW | fixed
// Assign | VIEW | VIEW | fixed
// Assign | LARGE | LARGE | O(size)
// Move | * | same as src | fixed
// Copies `src' to `dst'. `dst' will be an owned type (SMALL/LARGE). `src'
// should not point to memory owned by `dst'.
inline void TF_TString_Copy(TF_TString *dst, const char *src, size_t size);
// Assigns a `src' tstring to `dst'. An OFFSET `src' type will yield a `VIEW'
// `dst'. LARGE `src' types will be copied to a new buffer; all other `src'
// types will incur a fixed cost.
inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src);
// Moves a `src' tstring to `dst'. Moving a LARGE `src' to `dst' will result in
// a valid but unspecified `src'. This function incurs a fixed cost for all
// inputs.
inline void TF_TString_Move(TF_TString *dst, TF_TString *src);
#endif // TENSORFLOW_CORE_PLATFORM_CTSTRING_H_

View File

@ -0,0 +1,449 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PLATFORM_CTSTRING_INTERNAL_H_
#define TENSORFLOW_CORE_PLATFORM_CTSTRING_INTERNAL_H_
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#if (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || \
defined(_WIN32)
#define TF_TSTRING_LITTLE_ENDIAN 1
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define TF_TSTRING_LITTLE_ENDIAN 0
#else
#error "Unable to detect endianness."
#endif
#if defined(__clang__) || \
(defined(__GNUC__) && \
((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5))
static inline uint32_t TF_swap32(uint32_t host_int) {
return __builtin_bswap32(host_int);
}
#elif defined(_MSC_VER)
static inline uint32_t TF_swap32(uint32_t host_int) {
return _byteswap_ulong(host_int);
}
#elif defined(__APPLE__)
static inline uint32_t TF_swap32(uint32_t host_int) {
return OSSwapInt32(host_int);
}
#else
static inline uint32_t TF_swap32(uint32_t host_int) {
#if defined(__GLIBC__)
return bswap_32(host_int);
#else // defined(__GLIBC__)
return (((host_int & uint32_t{0xFF}) << 24) |
((host_int & uint32_t{0xFF00}) << 8) |
((host_int & uint32_t{0xFF0000}) >> 8) |
((host_int & uint32_t{0xFF000000}) >> 24));
#endif // defined(__GLIBC__)
}
#endif
#if TF_TSTRING_LITTLE_ENDIAN
#define TF_le32toh(x) TF_swap32(x)
#else // TF_TSTRING_LITTLE_ENDIAN
#define TF_le32toh(x) x
#endif // TF_TSTRING_LITTLE_ENDIAN
static inline size_t TF_align16(size_t i) { return (i + 0xF) & ~0xF; }
static inline size_t TF_max(size_t a, size_t b) { return a > b ? a : b; }
static inline size_t TF_min(size_t a, size_t b) { return a < b ? a : b; }
typedef enum TF_TString_Type { // NOLINT
TF_TSTR_SMALL = 0x00,
TF_TSTR_LARGE = 0x01,
TF_TSTR_OFFSET = 0x02,
TF_TSTR_VIEW = 0x03,
TF_TSTR_TYPE_MASK = 0x03
} TF_TString_Type;
typedef struct TF_TString_Large { // NOLINT
size_t size;
size_t cap;
char *ptr;
} TF_TString_Large;
typedef struct TF_TString_Offset { // NOLINT
uint32_t size;
uint32_t offset;
uint32_t count;
} TF_TString_Offset;
typedef struct TF_TString_View { // NOLINT
size_t size;
const char *ptr;
} TF_TString_View;
typedef struct TF_TString_Raw { // NOLINT
uint8_t raw[24];
} TF_TString_Raw;
typedef union TF_TString_Union { // NOLINT
TF_TString_Large large;
TF_TString_Offset offset;
TF_TString_View view;
TF_TString_Raw raw;
} TF_TString_Union;
enum {
TF_TString_SmallCapacity =
(sizeof(TF_TString_Union) - sizeof(/* null delim */ char) -
sizeof(/* uint8_t size */ uint8_t)),
};
typedef struct TF_TString_Small { // NOLINT
uint8_t size;
char str[TF_TString_SmallCapacity + sizeof(/* null delim */ char)];
} TF_TString_Small;
typedef struct TF_TString { // NOLINT
union {
// small conflicts with '#define small char' in RpcNdr.h for MSVC, so we use
// smll instead.
TF_TString_Small smll;
TF_TString_Large large;
TF_TString_Offset offset;
TF_TString_View view;
TF_TString_Raw raw;
} u;
} TF_TString;
// TODO(dero): Fix for OSS, and add C only build test.
// _Static_assert(CHAR_BIT == 8);
// _Static_assert(sizeof(TF_TString) == 24);
extern inline TF_TString_Type TF_TString_GetType(const TF_TString *str) {
return (TF_TString_Type)(str->u.raw.raw[0] & TF_TSTR_TYPE_MASK); // NOLINT
}
// XXX(dero): For the big-endian case, this function could potentially be more
// performant and readable by always storing the string size as little-endian
// and always byte-swapping on big endian, resulting in a simple 'bswap'+'shr'
// (for architectures that have a bswap op).
static inline size_t TF_TString_ToActualSizeT(size_t size) {
#ifdef TF_TSTRING_LITTLE_ENDIAN
return size >> 2;
#else // TF_TSTRING_LITTLE_ENDIAN
// 0xFF000000 or 0xFF00000000000000 depending on platform
static const size_t mask = ~((~(size_t)0) >> 8);
return (((mask << 2) & size) >> 2) | (~mask & size);
#endif // TF_TSTRING_LITTLE_ENDIAN
}
static inline size_t TF_TString_ToInternalSizeT(size_t size,
TF_TString_Type type) {
#ifdef TF_TSTRING_LITTLE_ENDIAN
return (size << 2) | type;
#else // TF_TSTRING_LITTLE_ENDIAN
// 0xFF000000 or 0xFF00000000000000 depending on platform
static const size_t mask = ~((~(size_t)0) >> 8);
return (mask & (size << 2)) | (~mask & size) |
((size_t)type << ((sizeof(size_t) - 1) * 8)); // NOLINT
#endif // TF_TSTRING_LITTLE_ENDIAN
}
extern inline void TF_TString_Init(TF_TString *str) {
str->u.smll.size = 0;
str->u.smll.str[0] = '\0';
}
extern inline void TF_TString_Dealloc(TF_TString *str) {
if (TF_TString_GetType(str) == TF_TSTR_LARGE &&
str->u.large.ptr != NULL) { // NOLINT
free(str->u.large.ptr);
TF_TString_Init(str);
}
}
extern inline size_t TF_TString_GetSize(const TF_TString *str) {
switch (TF_TString_GetType(str)) {
case TF_TSTR_SMALL:
return str->u.smll.size >> 2;
case TF_TSTR_LARGE:
return TF_TString_ToActualSizeT(str->u.large.size);
case TF_TSTR_OFFSET:
return TF_le32toh(str->u.offset.size) >> 2;
case TF_TSTR_VIEW:
return TF_TString_ToActualSizeT(str->u.view.size);
default:
return 0; // Unreachable.
}
}
extern inline size_t TF_TString_GetCapacity(const TF_TString *str) {
switch (TF_TString_GetType(str)) {
case TF_TSTR_SMALL:
return TF_TString_SmallCapacity;
case TF_TSTR_LARGE:
return str->u.large.cap;
case TF_TSTR_OFFSET:
case TF_TSTR_VIEW:
default:
return 0;
}
}
extern inline const char *TF_TString_GetDataPointer(const TF_TString *str) {
switch (TF_TString_GetType(str)) {
case TF_TSTR_SMALL:
return str->u.smll.str;
case TF_TSTR_LARGE:
return str->u.large.ptr;
case TF_TSTR_OFFSET:
return (const char *)str + str->u.offset.offset; // NOLINT
case TF_TSTR_VIEW:
return str->u.view.ptr;
default:
// Unreachable.
return NULL; // NOLINT
}
}
extern inline char *TF_TString_ResizeUninitialized(TF_TString *str,
size_t new_size) {
size_t curr_size = TF_TString_GetSize(str);
size_t copy_size = TF_min(new_size, curr_size);
TF_TString_Type curr_type = TF_TString_GetType(str);
const char *curr_ptr = TF_TString_GetDataPointer(str);
// Case: SMALL/LARGE/VIEW/OFFSET -> SMALL
if (new_size <= TF_TString_SmallCapacity) {
str->u.smll.size = (uint8_t)((new_size << 2) | TF_TSTR_SMALL); // NOLINT
str->u.smll.str[new_size] = '\0';
if (curr_type != TF_TSTR_SMALL && copy_size) {
memcpy(str->u.smll.str, curr_ptr, copy_size);
}
if (curr_type == TF_TSTR_LARGE) {
free((void *)curr_ptr); // NOLINT
}
// We do not clear out the newly excluded region.
return str->u.smll.str;
}
// Case: SMALL/LARGE/VIEW/OFFSET -> LARGE
size_t new_cap;
size_t curr_cap = TF_TString_GetCapacity(str);
// We assume SIZE_MAX % 16 == 0.
size_t curr_cap_x2 = curr_cap >= SIZE_MAX / 2 ? SIZE_MAX - 1 : curr_cap * 2;
if (new_size < curr_size && new_size < curr_cap / 2) {
// TODO(dero): Replace with shrink_to_fit flag.
new_cap = TF_align16(curr_cap / 2 + 1) - 1;
} else if (new_size > curr_cap_x2) {
new_cap = TF_align16(new_size + 1) - 1;
} else if (new_size > curr_cap) {
new_cap = TF_align16(curr_cap_x2 + 1) - 1;
} else {
new_cap = curr_cap;
}
char *new_ptr;
if (new_cap == curr_cap) {
new_ptr = str->u.large.ptr;
} else if (curr_type == TF_TSTR_LARGE) {
new_ptr = (char *)realloc(str->u.large.ptr, new_cap + 1); // NOLINT
} else {
new_ptr = (char *)malloc(new_cap + 1); // NOLINT
if (copy_size) {
memcpy(new_ptr, curr_ptr, copy_size);
}
}
str->u.large.size = TF_TString_ToInternalSizeT(new_size, TF_TSTR_LARGE);
str->u.large.ptr = new_ptr;
str->u.large.ptr[new_size] = '\0';
str->u.large.cap = new_cap;
return str->u.large.ptr;
}
extern inline char *TF_TString_GetMutableDataPointer(TF_TString *str) {
switch (TF_TString_GetType(str)) {
case TF_TSTR_SMALL:
return str->u.smll.str;
case TF_TSTR_OFFSET:
case TF_TSTR_VIEW:
// Convert OFFSET/VIEW to LARGE
TF_TString_ResizeUninitialized(str, TF_TString_GetSize(str));
return str->u.large.ptr;
case TF_TSTR_LARGE:
return str->u.large.ptr;
default:
// Unreachable.
return NULL; // NOLINT
}
}
extern inline void TF_TString_Reserve(TF_TString *str, size_t new_cap) {
TF_TString_Type curr_type = TF_TString_GetType(str);
if (new_cap <= TF_TString_SmallCapacity) {
// We do nothing, we let Resize/GetMutableDataPointer handle the
// conversion to SMALL from VIEW/OFFSET when the need arises.
// In the degenerate case, where new_cap <= TF_TString_SmallCapacity,
// curr_size > TF_TString_SmallCapacity, and the type is VIEW/OFFSET, we
// defer the malloc to Resize/GetMutableDataPointer.
return;
}
if (curr_type == TF_TSTR_LARGE && new_cap <= str->u.large.cap) {
// We handle reduced cap in resize.
return;
}
// Case: VIEW/OFFSET -> LARGE or grow an existing LARGE type
size_t curr_size = TF_TString_GetSize(str);
const char *curr_ptr = TF_TString_GetDataPointer(str);
// Since VIEW and OFFSET types are read-only, their capacity is effectively 0.
// So we make sure we have enough room in the VIEW and OFFSET cases.
new_cap = TF_align16(TF_max(new_cap, curr_size) + 1) - 1;
if (curr_type == TF_TSTR_LARGE) {
str->u.large.ptr =
(char *)realloc(str->u.large.ptr, new_cap + 1); // NOLINT
} else {
// Convert to Large
char *new_ptr = (char *)malloc(new_cap + 1); // NOLINT
memcpy(new_ptr, curr_ptr, curr_size);
str->u.large.size = TF_TString_ToInternalSizeT(curr_size, TF_TSTR_LARGE);
str->u.large.ptr = new_ptr;
str->u.large.ptr[curr_size] = '\0';
}
str->u.large.cap = new_cap;
}
extern inline char *TF_TString_Resize(TF_TString *str, size_t new_size,
char c) {
size_t curr_size = TF_TString_GetSize(str);
char *cstr = TF_TString_ResizeUninitialized(str, new_size);
if (new_size > curr_size) {
memset(cstr + curr_size, c, new_size - curr_size);
}
return cstr;
}
extern inline void TF_TString_AssignView(TF_TString *dst, const char *src,
size_t size) {
TF_TString_Dealloc(dst);
dst->u.view.size = TF_TString_ToInternalSizeT(size, TF_TSTR_VIEW);
dst->u.view.ptr = src;
}
extern inline void TF_TString_AppendN(TF_TString *dst, const char *src,
size_t src_size) {
if (!src_size) return;
size_t dst_size = TF_TString_GetSize(dst);
char *dst_c = TF_TString_ResizeUninitialized(dst, dst_size + src_size);
memcpy(dst_c + dst_size, src, src_size);
}
extern inline void TF_TString_Append(TF_TString *dst, const TF_TString *src) {
const char *src_c = TF_TString_GetDataPointer(src);
size_t size = TF_TString_GetSize(src);
TF_TString_AppendN(dst, src_c, size);
}
extern inline void TF_TString_Copy(TF_TString *dst, const char *src,
size_t size) {
char *dst_c = TF_TString_ResizeUninitialized(dst, size);
if (size) memcpy(dst_c, src, size);
}
extern inline void TF_TString_Assign(TF_TString *dst, const TF_TString *src) {
if (dst == src) return;
TF_TString_Dealloc(dst);
switch (TF_TString_GetType(src)) {
case TF_TSTR_SMALL:
case TF_TSTR_VIEW:
*dst = *src;
return;
case TF_TSTR_LARGE: {
const char *src_c = TF_TString_GetDataPointer(src);
size_t size = TF_TString_GetSize(src);
TF_TString_Copy(dst, src_c, size);
}
return;
case TF_TSTR_OFFSET: {
const char *src_c = TF_TString_GetDataPointer(src);
size_t size = TF_TString_GetSize(src);
TF_TString_AssignView(dst, src_c, size);
}
return;
default:
return; // Unreachable.
}
}
extern inline void TF_TString_Move(TF_TString *dst, TF_TString *src) {
if (dst == src) return;
TF_TString_Dealloc(dst);
switch (TF_TString_GetType(src)) {
case TF_TSTR_SMALL:
case TF_TSTR_VIEW:
*dst = *src;
return;
case TF_TSTR_LARGE:
*dst = *src;
TF_TString_Init(src);
return;
case TF_TSTR_OFFSET: {
const char *src_c = TF_TString_GetDataPointer(src);
size_t size = TF_TString_GetSize(src);
TF_TString_AssignView(dst, src_c, size);
}
return;
default:
return; // Unreachable.
}
}
#endif // TENSORFLOW_CORE_PLATFORM_CTSTRING_INTERNAL_H_

View File

@ -0,0 +1,331 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/ctstring.h"
#include <memory>
#include <string>
#include "tensorflow/core/platform/test.h"
static const char kLongString[] =
"abcdefghij"
"klmnopqrst"
"uvwxyz0123"
"456789ABCD"
"EFGHIKLMNO";
const size_t kLongStringLen = sizeof(kLongString) / sizeof(char) - sizeof(char);
TEST(TF_CTStringTest, InitAssignMoveDealloc) {
EXPECT_GT(::strlen(kLongString), TF_TString_SmallCapacity);
{
// Empty String
TF_TString s10, s11, s12;
TF_TString_Init(&s10);
TF_TString_Init(&s11);
TF_TString_Init(&s12);
EXPECT_EQ(0, TF_TString_GetSize(&s10));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10));
EXPECT_STREQ("", TF_TString_GetDataPointer(&s10));
EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s10));
TF_TString_Assign(&s11, &s10);
EXPECT_EQ(0, TF_TString_GetSize(&s11));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10));
EXPECT_STREQ("", TF_TString_GetDataPointer(&s11));
EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s11));
TF_TString_Move(&s12, &s11);
EXPECT_EQ(0, TF_TString_GetSize(&s11));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10));
EXPECT_STREQ("", TF_TString_GetDataPointer(&s11));
EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s11));
EXPECT_EQ(0, TF_TString_GetSize(&s12));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s10));
EXPECT_STREQ("", TF_TString_GetDataPointer(&s12));
EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s12));
TF_TString_Dealloc(&s10);
TF_TString_Dealloc(&s11);
TF_TString_Dealloc(&s12);
}
{
// Small String
TF_TString s20, s21, s22;
TF_TString_Init(&s20);
TF_TString_Init(&s21);
TF_TString_Init(&s22);
TF_TString_Copy(&s20, "a", 1);
EXPECT_EQ(1, TF_TString_GetSize(&s20));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s20));
EXPECT_STREQ("a", TF_TString_GetDataPointer(&s20));
EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s20));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s20));
TF_TString_Assign(&s21, &s20);
EXPECT_EQ(1, TF_TString_GetSize(&s21));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s21));
EXPECT_STREQ("a", TF_TString_GetDataPointer(&s21));
EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s21));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s21));
TF_TString_Move(&s22, &s21);
EXPECT_EQ(1, TF_TString_GetSize(&s22));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s22));
EXPECT_STREQ("a", TF_TString_GetDataPointer(&s22));
EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s22));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s22));
TF_TString_Dealloc(&s20);
TF_TString_Dealloc(&s21); // Nothing to dealloc, since it was moved.
TF_TString_Dealloc(&s22);
}
{
// Small String -> Large String and View
TF_TString s30, s31;
TF_TString_Init(&s30);
TF_TString_Init(&s31);
size_t s = TF_TString_SmallCapacity - 1;
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30));
// Small String
TF_TString_Copy(&s30, kLongString, s);
EXPECT_STREQ(std::string(kLongString, s).data(),
TF_TString_GetDataPointer(&s30));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s30));
EXPECT_GT(TF_TString_SmallCapacity, TF_TString_GetSize(&s30));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30));
// Small String at capacity
TF_TString_AppendN(&s30, &kLongString[s++], 1);
EXPECT_STREQ(std::string(kLongString, s).data(),
TF_TString_GetDataPointer(&s30));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s30));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetSize(&s30));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30));
// Large String
TF_TString_AppendN(&s30, &kLongString[s++], 1);
EXPECT_STREQ(std::string(kLongString, s).data(),
TF_TString_GetDataPointer(&s30));
EXPECT_STREQ(std::string(kLongString, s).data(),
TF_TString_GetMutableDataPointer(&s30));
EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s30));
EXPECT_EQ(s, TF_TString_GetSize(&s30));
EXPECT_LT(TF_TString_SmallCapacity, TF_TString_GetSize(&s30));
EXPECT_LT(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s30));
// Large String Move
TF_TString_Move(&s31, &s30);
EXPECT_STREQ("", TF_TString_GetDataPointer(&s30));
EXPECT_STREQ("", TF_TString_GetMutableDataPointer(&s30));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s30));
EXPECT_EQ(0, TF_TString_GetSize(&s30));
EXPECT_STREQ(std::string(kLongString, s).data(),
TF_TString_GetDataPointer(&s31));
EXPECT_STREQ(std::string(kLongString, s).data(),
TF_TString_GetMutableDataPointer(&s31));
EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s31));
EXPECT_EQ(s, TF_TString_GetSize(&s31));
EXPECT_LT(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s31));
TF_TString_Dealloc(&s30);
TF_TString_Dealloc(&s31);
}
{
// Small String -> Large String -> Larger -> View
const char kStr[] = "abcdef";
const char kStrLen = sizeof(kStr) / sizeof(char) - sizeof(char);
TF_TString s40, s41;
TF_TString_Init(&s40);
TF_TString_Init(&s41);
TF_TString_Copy(&s40, kLongString, kLongStringLen);
EXPECT_EQ(kLongStringLen, TF_TString_GetSize(&s40));
TF_TString_Assign(&s41, &s40);
EXPECT_STREQ(kLongString, TF_TString_GetDataPointer(&s40));
EXPECT_STREQ(kLongString, TF_TString_GetMutableDataPointer(&s40));
EXPECT_EQ(kLongStringLen, TF_TString_GetSize(&s41));
TF_TString_AppendN(&s40, kLongString, kLongStringLen);
TF_TString_Append(&s40, &s41);
std::string longerString(kLongString);
longerString += kLongString;
longerString += kLongString;
EXPECT_STREQ(longerString.data(), TF_TString_GetDataPointer(&s40));
EXPECT_STREQ(longerString.data(), TF_TString_GetMutableDataPointer(&s40));
EXPECT_EQ(longerString.size(), TF_TString_GetSize(&s40));
TF_TString_AssignView(&s40, kStr, kStrLen);
EXPECT_EQ(TF_TSTR_VIEW, TF_TString_GetType(&s40));
EXPECT_EQ(kStr, TF_TString_GetDataPointer(&s40));
EXPECT_EQ(6, TF_TString_GetSize(&s40));
EXPECT_EQ(0, TF_TString_GetCapacity(&s40));
EXPECT_NE(kStr, TF_TString_GetMutableDataPointer(&s40));
EXPECT_STREQ(kStr, TF_TString_GetMutableDataPointer(&s40));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s40));
EXPECT_EQ(6, TF_TString_GetSize(&s40));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s40));
TF_TString_Dealloc(&s40);
TF_TString_Dealloc(&s41);
}
{
// Small String -> Large String -> Smaller
TF_TString s50;
TF_TString_Init(&s50);
TF_TString_Copy(&s50, "a", 1);
EXPECT_STREQ("a", TF_TString_GetDataPointer(&s50));
EXPECT_STREQ("a", TF_TString_GetMutableDataPointer(&s50));
EXPECT_EQ(1, TF_TString_GetSize(&s50));
TF_TString_Copy(&s50, kLongString, kLongStringLen);
EXPECT_STREQ(kLongString, TF_TString_GetDataPointer(&s50));
EXPECT_STREQ(kLongString, TF_TString_GetMutableDataPointer(&s50));
EXPECT_EQ(kLongStringLen, TF_TString_GetSize(&s50));
// align16(kLongStringLen) - 1 = 63
size_t cap1 = TF_TString_GetCapacity(&s50);
// Test reduced allocation with on large type.
TF_TString_Copy(&s50, kLongString, TF_TString_SmallCapacity + 1);
// align16(TF_TString_SmallCapacity+1) - 1 = 31
size_t cap2 = TF_TString_GetCapacity(&s50);
EXPECT_STREQ(std::string(kLongString, TF_TString_SmallCapacity + 1).data(),
TF_TString_GetMutableDataPointer(&s50));
EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s50));
EXPECT_GT(cap1, cap2);
TF_TString_Copy(&s50, "c", 1);
EXPECT_STREQ("c", TF_TString_GetDataPointer(&s50));
EXPECT_STREQ("c", TF_TString_GetMutableDataPointer(&s50));
EXPECT_EQ(1, TF_TString_GetSize(&s50));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s50));
TF_TString_Dealloc(&s50);
}
}
TEST(TF_CTStringTest, ResizeReserve) {
{
// Resize
TF_TString s60;
TF_TString_Init(&s60);
TF_TString_Resize(&s60, 2, 'a');
EXPECT_EQ(0, ::memcmp("aa", TF_TString_GetDataPointer(&s60), 2));
TF_TString_Resize(&s60, 4, '\0');
EXPECT_EQ(0, ::memcmp("aa\0\0", TF_TString_GetDataPointer(&s60), 4));
TF_TString_Resize(&s60, 6, 'b');
EXPECT_EQ(0, ::memcmp("aa\0\0bb", TF_TString_GetDataPointer(&s60), 6));
TF_TString_Resize(&s60, 2, 'c');
EXPECT_EQ(0, ::memcmp("aa", TF_TString_GetDataPointer(&s60), 2));
TF_TString_Dealloc(&s60);
}
{
// Reserve
TF_TString s70;
TF_TString_Init(&s70);
TF_TString_Reserve(&s70, TF_TString_SmallCapacity - 1);
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s70));
EXPECT_EQ(0, TF_TString_GetSize(&s70));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s70));
TF_TString_Reserve(&s70, TF_TString_SmallCapacity);
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s70));
EXPECT_EQ(0, TF_TString_GetSize(&s70));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s70));
TF_TString_Copy(&s70, "hello", 5);
EXPECT_EQ(5, TF_TString_GetSize(&s70));
EXPECT_EQ(TF_TString_SmallCapacity, TF_TString_GetCapacity(&s70));
EXPECT_EQ(TF_TSTR_SMALL, TF_TString_GetType(&s70));
TF_TString_Reserve(&s70, 100);
// Test 16 byte alignment (7*16 - 1 = 111)
EXPECT_EQ(111, TF_TString_GetCapacity(&s70));
EXPECT_EQ(5, TF_TString_GetSize(&s70));
EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s70));
TF_TString_AssignView(&s70, kLongString, kLongStringLen);
TF_TString_Reserve(&s70, 10);
EXPECT_EQ(TF_TSTR_VIEW, TF_TString_GetType(&s70));
EXPECT_EQ(0, TF_TString_GetCapacity(&s70));
TF_TString_Reserve(&s70, 100);
// Converted to LARGE since it can no longer fit in SMALL.
EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s70));
EXPECT_EQ(111, TF_TString_GetCapacity(&s70));
TF_TString_Reserve(&s70, 200);
EXPECT_EQ(TF_TSTR_LARGE, TF_TString_GetType(&s70));
EXPECT_EQ(207, TF_TString_GetCapacity(&s70));
TF_TString_Dealloc(&s70);
}
}

View File

@ -16,24 +16,26 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_
#define TENSORFLOW_CORE_PLATFORM_TSTRING_H_
#include <assert.h>
#include <ostream>
#include <string>
// TODO(b/138799229): Used to toggle until global presubmits pass.
#include "tensorflow/core/platform/cord.h"
#include "tensorflow/core/platform/ctstring.h"
#define USE_TSTRING
#ifdef USE_TSTRING
// TODO(dero): This include is temporary, and will be superfluous once
// absl::string_view is aliased to std::string_view.
#include "absl/strings/string_view.h"
namespace absl {
#ifdef ABSL_NAMESPACE_BEGIN
ABSL_NAMESPACE_BEGIN
#endif // ABSL_NAMESPACE_BEGIN
class AlphaNum;
#ifdef PLATFORM_GOOGLE
class Cord;
#endif // PLATFORM_GOOGLE
#ifdef ABSL_NAMESPACE_END
ABSL_NAMESPACE_END
#endif // ABSL_NAMESPACE_END
@ -43,243 +45,554 @@ namespace tensorflow {
// tensorflow::tstring is the scalar type for DT_STRING tensors.
//
// TODO(b/138799229): In order to ease migration from tensorflow::string to
// tensorflow::tstring, we define a simplified tstring class which wraps
// std::string. The API defined below is the expected subset of methods for
// tstring.
// tstrings are meant to be used when interfacing with string tensors, and
// should not be considered as a general replacement for std::string in
// tensorflow. The primary purpose of tstring is to provide a unified and
// stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating
// unnecessary conversions across language boundaries, and allowing for compiler
// agnostic interoperability across dynamically loaded modules.
//
// The underlying implementation of tstring will be replaced with the one
// defined in [1] once the migration in tensorflow/ is complete.
// In addition to ABI stability, tstrings features two string subtypes, VIEW and
// OFFSET.
//
// [1] https://github.com/tensorflow/community/pull/91
// VIEW tstrings are views into unowned character buffers; they can be used to
// pass around existing character strings without incurring a per object heap
// allocation. Note that, like std::string_view, it is the user's
// responsibility to ensure that the underlying buffer of a VIEW tstring exceeds
// the lifetime of the associated tstring object.
//
// TODO(dero): Methods for creating OFFSET tensors are not currently
// implemented.
//
// OFFSET tstrings are platform independent offset defined strings which can be
// directly mmaped or copied into a tensor buffer without the need for
// deserialization or processing. For security reasons, it is imperative that
// OFFSET based string tensors are validated before use, or are from a trusted
// source.
//
// Underlying VIEW and OFFSET buffers are considered immutable, so l-value
// assignment, mutation, or non-const access to data() of tstrings will result
// in the conversion to an owned SMALL/LARGE type.
//
// The interface for tstring largely overlaps with std::string. Except where
// noted, expect equivalent semantics with synonymous std::string methods.
class tstring {
std::string str_;
template <typename T, typename = void>
struct ResizeUninitialized {
static void Resize(T& s, size_t new_size) { s.resize(new_size); }
};
template <typename T>
struct ResizeUninitialized<
T, decltype(std::declval<T>().__resize_default_init(0))> {
static void Resize(T& s, size_t new_size) {
s.__resize_default_init(new_size);
}
};
TF_TString tstr_;
public:
typedef char* iterator;
enum Type {
// See cstring.h
SMALL = TF_TSTR_SMALL,
LARGE = TF_TSTR_LARGE,
OFFSET = TF_TSTR_OFFSET,
VIEW = TF_TSTR_VIEW,
};
// Assignment to a tstring object with a tstring::view type will create a VIEW
// type tstring.
class view {
const char* data_;
size_t size_;
public:
explicit view(const char* data, size_t size) : data_(data), size_(size) {}
explicit view(const char* data) : data_(data), size_(::strlen(data)) {}
const char* data() const { return data_; }
size_t size() const { return size_; }
view() = delete;
view(const view&) = delete;
view& operator=(const view&) = delete;
};
typedef const char* const_iterator;
tstring() = default;
tstring(const tstring&) = default;
tstring(const std::string& str) : str_(str) {}
tstring(const char* str, size_t len) : str_(str, len) {}
tstring(const char* str) : str_(str) {}
tstring(size_t n, char c) : str_(n, c) {}
explicit tstring(const absl::string_view& str)
: str_(str.data(), str.size()) {}
// Ctor
tstring();
tstring(const std::string& str); // NOLINT TODO(b/147740521): Make explicit.
tstring(const char* str, size_t len);
tstring(const char* str); // NOLINT TODO(b/147740521): Make explicit.
tstring(size_t n, char c);
explicit tstring(const absl::string_view str);
#ifdef PLATFORM_GOOGLE
template <typename T,
typename std::enable_if<std::is_same<T, absl::Cord>::value,
T>::type* = nullptr>
explicit tstring(const T& cord) : str_(string(cord)) {}
explicit tstring(const absl::Cord& cord);
#endif // PLATFORM_GOOGLE
tstring(tstring&&) = default;
// Copy
tstring(const tstring& str);
~tstring() = default;
// Move
tstring(tstring&& str) noexcept;
tstring& operator=(const tstring& str) = default;
tstring& operator=(const std::string& str) {
str_ = str;
return *this;
}
tstring& operator=(const absl::string_view& str) {
str_.assign(str.data(), str.size());
return *this;
}
// Dtor
~tstring();
// Copy Assignment
tstring& operator=(const tstring& str);
tstring& operator=(const std::string& str);
tstring& operator=(const char* str);
tstring& operator=(char ch);
tstring& operator=(const absl::string_view str);
#ifdef PLATFORM_GOOGLE
template <typename T,
typename std::enable_if<std::is_same<T, absl::Cord>::value,
T>::type* = nullptr>
tstring& operator=(const T& cord) {
str_ = string(cord);
return *this;
}
tstring& operator=(const absl::Cord& cord);
#endif // PLATFORM_GOOGLE
tstring& operator=(const char* str) {
str_ = str;
// View Assignment
tstring& operator=(const view& tsv);
return *this;
}
// Move Assignment
tstring& operator=(tstring&& str);
tstring& operator=(char ch) {
str_ = ch;
return *this;
}
tstring& operator=(tstring&&) = default;
bool operator<(const tstring& o) const { return str_ < o.str_; }
bool operator>(const tstring& o) const { return str_ > o.str_; }
bool operator==(const char* o) const { return str_ == o; }
bool operator==(const tstring& o) const { return str_ == o.str_; }
bool operator!=(const char* o) const { return str_ != o; }
bool operator!=(const tstring& o) const { return str_ != o.str_; }
operator std::string() const { return str_; }
operator absl::string_view() const {
return absl::string_view(str_.data(), str_.size());
}
// Comparison
int compare(const char* str, size_t len) const;
bool operator<(const tstring& o) const;
bool operator>(const tstring& o) const;
bool operator==(const char* str) const;
bool operator==(const tstring& o) const;
bool operator!=(const char* str) const;
bool operator!=(const tstring& o) const;
// Conversion Operators
// TODO(b/147740521): Make explicit.
operator std::string() const; // NOLINT
// TODO(b/147740521): Make explicit.
operator absl::string_view() const; // NOLINT
#ifdef PLATFORM_GOOGLE
template <typename T,
typename std::enable_if<std::is_same<T, absl::AlphaNum>::value,
T>::type* = nullptr>
operator T() const {
return T(str_);
}
operator T() const; // NOLINT TODO(b/147740521): Remove.
#endif // PLATFORM_GOOGLE
bool empty() const { return str_.empty(); }
// Attributes
size_t size() const;
size_t length() const;
size_t capacity() const;
bool empty() const;
Type type() const;
size_t length() const { return str_.length(); }
// Allocation
void resize(size_t new_size, char c = 0);
// Similar to resize, but will leave the newly grown region uninitialized.
void resize_uninitialized(size_t new_size);
void clear() noexcept;
void reserve(size_t n);
size_t size() const { return str_.size(); }
// Iterators
const_iterator begin() const;
const_iterator end() const;
size_t capacity() const { return str_.capacity(); }
// Const Element Access
const char* c_str() const;
const char* data() const;
const char& operator[](size_t i) const;
const char& back() const;
const char* c_str() const { return str_.c_str(); }
// Mutable Element Access
// NOTE: For VIEW/OFFSET types, calling these methods will result in the
// conversion to a SMALL or heap allocated LARGE type. As a result,
// previously obtained pointers, references, or iterators to the underlying
// buffer will point to the original VIEW/OFFSET and not the new allocation.
char* mdata();
char* data(); // DEPRECATED: Use mdata().
char& operator[](size_t i);
const char* data() const { return str_.data(); }
// Assignment
tstring& assign(const char* str, size_t len);
tstring& assign(const char* str);
const_iterator begin() const { return data(); }
const_iterator end() const { return data() + size(); }
// View Assignment
tstring& assign_as_view(const tstring& str);
tstring& assign_as_view(const std::string& str);
tstring& assign_as_view(const absl::string_view str);
tstring& assign_as_view(const char* str, size_t len);
tstring& assign_as_view(const char* str);
char back() const { return str_.back(); }
// Modifiers
// NOTE: Invalid input will result in undefined behavior.
tstring& append(const tstring& str);
tstring& append(const char* str, size_t len);
tstring& append(const char* str);
tstring& append(size_t n, char c);
const char& operator[](size_t i) const { return str_[i]; }
tstring& erase(size_t pos, size_t len);
char* data() { return &str_[0]; }
tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen);
tstring& insert(size_t pos, size_t n, char c);
void swap(tstring& str);
void push_back(char ch);
iterator begin() { return data(); }
iterator end() { return data() + size(); }
char& operator[](size_t i) { return str_[i]; }
void clear() noexcept { str_.clear(); }
void resize(size_t new_size) { str_.resize(new_size); }
void resize(size_t new_size, char c) { str_.resize(new_size, c); }
void resize_uninitialized(size_t new_size) {
ResizeUninitialized<decltype(str_)>::Resize(str_, new_size);
}
void reserve(size_t n) { str_.reserve(n); }
tstring& assign(const char* str, size_t len) {
str_.assign(str, len);
return *this;
}
tstring& assign(const char* str) {
str_.assign(str);
return *this;
}
tstring& append(const tstring& str) {
str_.append(str.str_);
return *this;
}
tstring& append(const char* str, size_t len) {
str_.append(str, len);
return *this;
}
tstring& append(const char* str) {
str_.append(str);
return *this;
}
tstring& append(size_t n, char c) {
str_.append(n, c);
return *this;
}
void swap(tstring& str) { str_.swap(str.str_); }
tstring& insert(size_t pos, const tstring& str, size_t subpos,
size_t sublen) {
str_.insert(pos, str.str_, subpos, sublen);
return *this;
}
tstring& insert(size_t pos, size_t n, char c) {
str_.insert(pos, n, c);
return *this;
}
tstring& erase(size_t pos, size_t len) {
str_.erase(pos, len);
return *this;
}
void push_back(char ch) { str_.push_back(ch); }
friend const tstring operator+(const tstring& a, const tstring& b);
// Friends
friend bool operator==(const char* a, const tstring& b);
friend bool operator==(const std::string& a, const tstring& b);
friend tstring operator+(const tstring& a, const tstring& b);
friend std::ostream& operator<<(std::ostream& o, const tstring& str);
friend std::hash<tstring>;
};
inline bool operator==(const char* a, const tstring& b) { return a == b.str_; }
// Non-member function overloads
inline bool operator==(const std::string& a, const tstring& b) {
return a == b.str_;
bool operator==(const char* a, const tstring& b);
bool operator==(const std::string& a, const tstring& b);
tstring operator+(const tstring& a, const tstring& b);
std::ostream& operator<<(std::ostream& o, const tstring& str);
// Implementations
// Ctor
inline tstring::tstring() { TF_TString_Init(&tstr_); }
inline tstring::tstring(const char* str, size_t len) {
TF_TString_Init(&tstr_);
TF_TString_Copy(&tstr_, str, len);
}
inline const tstring operator+(const tstring& a, const tstring& b) {
return tstring(a.str_ + b.str_);
inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {}
inline tstring::tstring(size_t n, char c) {
TF_TString_Init(&tstr_);
TF_TString_Resize(&tstr_, n, c);
}
inline tstring::tstring(const std::string& str)
: tstring(str.data(), str.size()) {}
inline tstring::tstring(const absl::string_view str)
: tstring(str.data(), str.size()) {}
#ifdef PLATFORM_GOOGLE
inline tstring::tstring(const absl::Cord& cord) {
TF_TString_Init(&tstr_);
TF_TString_ResizeUninitialized(&tstr_, cord.size());
cord.CopyToArray(data());
}
#endif // PLATFORM_GOOGLE
// Copy
inline tstring::tstring(const tstring& str) {
TF_TString_Init(&tstr_);
TF_TString_Assign(&tstr_, &str.tstr_);
}
// Move
inline tstring::tstring(tstring&& str) noexcept {
TF_TString_Init(&tstr_);
TF_TString_Move(&tstr_, &str.tstr_);
}
// Dtor
inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); }
// Copy Assignment
inline tstring& tstring::operator=(const tstring& str) {
TF_TString_Assign(&tstr_, &str.tstr_);
return *this;
}
inline tstring& tstring::operator=(const std::string& str) {
TF_TString_Copy(&tstr_, str.data(), str.size());
return *this;
}
inline tstring& tstring::operator=(const char* str) {
TF_TString_Copy(&tstr_, str, ::strlen(str));
return *this;
}
inline tstring& tstring::operator=(char c) {
resize_uninitialized(1);
(*this)[0] = c;
return *this;
}
inline tstring& tstring::operator=(const absl::string_view str) {
TF_TString_Copy(&tstr_, str.data(), str.size());
return *this;
}
#ifdef PLATFORM_GOOGLE
inline tstring& tstring::operator=(const absl::Cord& cord) {
TF_TString_ResizeUninitialized(&tstr_, cord.size());
cord.CopyToArray(data());
return *this;
}
#endif // PLATFORM_GOOGLE
// View Assignment
inline tstring& tstring::operator=(const tstring::view& tsv) {
assign_as_view(tsv.data(), tsv.size());
return *this;
}
// Move Assignment
inline tstring& tstring::operator=(tstring&& str) {
TF_TString_Move(&tstr_, &str.tstr_);
return *this;
}
// Comparison
inline int tstring::compare(const char* str, size_t len) const {
int ret = ::memcmp(data(), str, std::min(len, size()));
if (ret < 0) return -1;
if (ret > 0) return +1;
if (size() < len) return -1;
if (size() > len) return +1;
return 0;
}
inline bool tstring::operator<(const tstring& o) const {
return compare(o.data(), o.size()) < 0;
}
inline bool tstring::operator>(const tstring& o) const {
return compare(o.data(), o.size()) > 0;
}
inline bool tstring::operator==(const char* str) const {
return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0;
}
inline bool tstring::operator==(const tstring& o) const {
return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0;
}
inline bool tstring::operator!=(const char* str) const {
return !(*this == str);
}
inline bool tstring::operator!=(const tstring& o) const {
return !(*this == o);
}
// Conversion Operators
inline tstring::operator std::string() const {
return std::string(data(), size());
}
inline tstring::operator absl::string_view() const {
return absl::string_view(data(), size());
}
#ifdef PLATFORM_GOOGLE
template <typename T, typename std::enable_if<
std::is_same<T, absl::AlphaNum>::value, T>::type*>
inline tstring::operator T() const {
return T(absl::string_view(*this));
}
#endif // PLATFORM_GOOGLE
// Attributes
inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); }
inline size_t tstring::length() const { return size(); }
inline size_t tstring::capacity() const {
return TF_TString_GetCapacity(&tstr_);
}
inline bool tstring::empty() const { return size() == 0; }
inline tstring::Type tstring::type() const {
return static_cast<tstring::Type>(TF_TString_GetType(&tstr_));
}
// Allocation
inline void tstring::resize(size_t new_size, char c) {
TF_TString_Resize(&tstr_, new_size, c);
}
inline void tstring::resize_uninitialized(size_t new_size) {
TF_TString_ResizeUninitialized(&tstr_, new_size);
}
inline void tstring::clear() noexcept {
TF_TString_ResizeUninitialized(&tstr_, 0);
}
inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); }
// Iterators
inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; }
inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; }
// Element Access
inline const char* tstring::c_str() const { return data(); }
inline const char* tstring::data() const {
return TF_TString_GetDataPointer(&tstr_);
}
inline const char& tstring::operator[](size_t i) const { return data()[i]; }
inline const char& tstring::back() const { return (*this)[size() - 1]; }
inline char* tstring::mdata() {
return TF_TString_GetMutableDataPointer(&tstr_);
}
inline char* tstring::data() {
// Deprecated
return mdata();
}
inline char& tstring::operator[](size_t i) { return mdata()[i]; }
// Assignment
inline tstring& tstring::assign(const char* str, size_t len) {
TF_TString_Copy(&tstr_, str, len);
return *this;
}
inline tstring& tstring::assign(const char* str) {
assign(str, ::strlen(str));
return *this;
}
// View Assignment
inline tstring& tstring::assign_as_view(const tstring& str) {
assign_as_view(str.data(), str.size());
return *this;
}
inline tstring& tstring::assign_as_view(const std::string& str) {
assign_as_view(str.data(), str.size());
return *this;
}
inline tstring& tstring::assign_as_view(const absl::string_view str) {
assign_as_view(str.data(), str.size());
return *this;
}
inline tstring& tstring::assign_as_view(const char* str, size_t len) {
TF_TString_AssignView(&tstr_, str, len);
return *this;
}
inline tstring& tstring::assign_as_view(const char* str) {
assign_as_view(str, ::strlen(str));
return *this;
}
// Modifiers
inline tstring& tstring::append(const tstring& str) {
TF_TString_Append(&tstr_, &str.tstr_);
return *this;
}
inline tstring& tstring::append(const char* str, size_t len) {
TF_TString_AppendN(&tstr_, str, len);
return *this;
}
inline tstring& tstring::append(const char* str) {
append(str, ::strlen(str));
return *this;
}
inline tstring& tstring::append(size_t n, char c) {
resize(size() + n, c);
return *this;
}
inline tstring& tstring::erase(size_t pos, size_t len) {
memmove(mdata() + pos, data() + pos + len, size() - len - pos);
resize(size() - len);
return *this;
}
inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos,
size_t sublen) {
size_t orig_size = size();
TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen);
memmove(mdata() + pos + sublen, data() + pos, orig_size - pos);
memmove(mdata() + pos, str.data() + subpos, sublen);
return *this;
}
inline tstring& tstring::insert(size_t pos, size_t n, char c) {
size_t size_ = size();
TF_TString_ResizeUninitialized(&tstr_, size_ + n);
memmove(mdata() + pos + n, data() + pos, size_ - pos);
memset(mdata() + pos, c, n);
return *this;
}
inline void tstring::swap(tstring& str) {
// TODO(dero): Invalid for OFFSET (unimplemented).
std::swap(tstr_, str.tstr_);
}
inline void tstring::push_back(char ch) { append(1, ch); }
// Friends
inline bool operator==(const char* a, const tstring& b) {
return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0;
}
inline bool operator==(const std::string& a, const tstring& b) {
return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0;
}
inline tstring operator+(const tstring& a, const tstring& b) {
tstring r;
r.reserve(a.size() + b.size());
r.append(a);
r.append(b);
return r;
}
inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
return o << str.str_;
return o.write(str.data(), str.size());
}
} // namespace tensorflow

View File

@ -0,0 +1,407 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <memory>
#include <string>
#include "tensorflow/core/platform/cord.h"
#include "tensorflow/core/platform/test.h"
// TODO(dero): fix ordering issue.
#include "tensorflow/core/platform/tstring.h" // NOLINT
using tensorflow::tstring;
static const char kLongString[] =
"abcdefghij"
"klmnopqrst"
"uvwxyz0123"
"456789ABCD"
"EFGHIKLMNO";
const size_t kLongStringLen = sizeof(kLongString) / sizeof(char) - sizeof(char);
TEST(TF_TStringTest, Construction) {
tstring s10;
tstring s11("a\0a", 3);
tstring s12(kLongString);
tstring s13(3, 'b');
tstring s14(absl::string_view("hi"));
tstring s15(std::string("bye"));
EXPECT_EQ("", s10);
EXPECT_TRUE(s10.empty());
EXPECT_EQ(tstring::Type::SMALL, s10.type());
EXPECT_EQ(0, s10.size());
EXPECT_EQ(0, s10.length());
EXPECT_EQ(TF_TString_SmallCapacity, s10.capacity());
EXPECT_EQ(std::string("a\0a", 3), s11);
EXPECT_FALSE(s11.empty());
EXPECT_EQ(3, s11.size());
EXPECT_EQ(3, s11.length());
EXPECT_EQ(kLongString, s12);
EXPECT_EQ(kLongStringLen, s12.size());
EXPECT_EQ(tstring::Type::LARGE, s12.type());
EXPECT_LT(TF_TString_SmallCapacity, s12.capacity());
EXPECT_EQ("bbb", s13);
EXPECT_EQ("hi", s14);
EXPECT_EQ(tstring::Type::SMALL, s14.type());
EXPECT_EQ("bye", s15);
}
TEST(TF_TStringTest, CopyMove) {
tstring s20(kLongString);
tstring s21(s20);
tstring s22;
EXPECT_EQ(s20, s21);
s22 = std::move(s21);
EXPECT_EQ(s20, s22);
EXPECT_EQ("", s21); // NOLINT
EXPECT_EQ(tstring::Type::SMALL, s21.type());
}
TEST(TF_TStringTest, Assignment) {
tstring s30("123456789012345678901234567890");
tstring s31;
tstring s32;
s31 = s30;
EXPECT_EQ(s30, s31);
EXPECT_EQ(tstring::Type::LARGE, s31.type());
EXPECT_EQ(s30.size(), s31.size());
s32 = std::move(s30);
EXPECT_EQ(s31, s32);
EXPECT_EQ("", s30); // NOLINT
EXPECT_EQ(tstring::Type::SMALL, s30.type());
EXPECT_EQ(tstring::Type::LARGE, s32.type());
s32 = tstring::view(kLongString);
EXPECT_EQ(kLongString, s32);
EXPECT_EQ(tstring::Type::VIEW, s32.type());
EXPECT_EQ(kLongStringLen, s32.size());
EXPECT_EQ(0, s32.capacity());
tstring s33(std::move(s32));
EXPECT_EQ(kLongString, s33);
EXPECT_EQ(tstring::Type::VIEW, s33.type());
EXPECT_EQ(kLongStringLen, s33.size());
s32 = std::string(kLongString);
EXPECT_EQ(kLongString, s32);
EXPECT_EQ(tstring::Type::LARGE, s32.type());
EXPECT_EQ(kLongStringLen, s32.size());
// LARGE -> SMALL
s32 = "hello";
EXPECT_EQ("hello", s32);
EXPECT_EQ(tstring::Type::SMALL, s32.type());
EXPECT_EQ(5, s32.size());
s33 = 'a';
EXPECT_EQ("a", s33);
EXPECT_EQ(tstring::Type::SMALL, s33.type());
EXPECT_EQ(1, s33.size());
s32 = absl::string_view(kLongString);
EXPECT_EQ(kLongString, s32);
EXPECT_EQ(tstring::Type::LARGE, s32.type());
EXPECT_EQ(kLongStringLen, s32.size());
// LARGE -> SMALL but still LARGE
s32.resize(TF_TString_SmallCapacity * 2);
EXPECT_EQ(absl::string_view(kLongString, TF_TString_SmallCapacity * 2), s32);
EXPECT_EQ(tstring::Type::LARGE, s32.type());
EXPECT_EQ(TF_TString_SmallCapacity * 2, s32.size());
s32 = tstring::view(kLongString, kLongStringLen);
EXPECT_EQ(kLongString, s32);
EXPECT_EQ(tstring::Type::VIEW, s32.type());
EXPECT_EQ(kLongStringLen, s32.size());
s32.assign("hello1");
EXPECT_EQ("hello1", s32);
s32.assign("hello2", 5);
EXPECT_EQ("hello", s32);
s30.assign_as_view(kLongString);
EXPECT_EQ(tstring::Type::VIEW, s30.type());
s31.assign_as_view(s30);
EXPECT_EQ(tstring::Type::VIEW, s31.type());
EXPECT_EQ(kLongString, s30.c_str());
EXPECT_EQ(kLongString, s31.c_str());
std::string tmp(kLongString);
s32.assign_as_view(tmp);
EXPECT_EQ(tstring::Type::VIEW, s32.type());
EXPECT_STREQ(kLongString, s32.c_str());
s33.assign_as_view(kLongString, 2);
EXPECT_EQ(2, s33.size());
s32.assign_as_view(absl::string_view(kLongString));
EXPECT_EQ(tstring::Type::VIEW, s32.type());
EXPECT_EQ(kLongString, s32.c_str());
#ifdef PLATFORM_GOOGLE
s33 = absl::Cord(kLongString);
EXPECT_EQ(kLongString, s33);
EXPECT_EQ(tstring::Type::LARGE, s33.type());
EXPECT_EQ(kLongStringLen, s33.size());
tstring s34((absl::Cord(kLongString)));
EXPECT_EQ(kLongString, s34);
EXPECT_EQ(tstring::Type::LARGE, s34.type());
EXPECT_EQ(kLongStringLen, s34.size());
#endif // PLATFORM_GOOGLE
}
TEST(TF_TStringTest, Comparison) {
tstring empty("");
tstring a("a");
tstring aa("aa");
tstring a_("a");
tstring b("b");
const char c[] = "c";
tstring nulla("\0a", 2);
tstring nullb("\0b", 2);
tstring nullaa("\0aa", 3);
EXPECT_TRUE(a < b);
EXPECT_TRUE(a != b);
EXPECT_FALSE(a > b);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a < aa);
EXPECT_TRUE(a != aa);
EXPECT_FALSE(a > aa);
EXPECT_FALSE(a == aa);
EXPECT_TRUE(b > a);
EXPECT_TRUE(b != a);
EXPECT_FALSE(b < a);
EXPECT_FALSE(b == a);
EXPECT_FALSE(a == b);
EXPECT_FALSE(b == c);
EXPECT_TRUE(b != c);
EXPECT_TRUE(empty < a);
EXPECT_TRUE(empty != a);
EXPECT_FALSE(empty > a);
EXPECT_FALSE(empty == a);
EXPECT_TRUE(a > empty);
EXPECT_TRUE(a != empty);
EXPECT_FALSE(a < empty);
EXPECT_FALSE(a == empty);
EXPECT_FALSE(a < a_);
EXPECT_FALSE(a != a_);
EXPECT_FALSE(a > a_);
EXPECT_TRUE(a == a_);
EXPECT_TRUE(nulla < nullaa);
EXPECT_TRUE(nulla != nullaa);
EXPECT_FALSE(nulla > nullaa);
EXPECT_FALSE(nulla == nullaa);
EXPECT_TRUE(nulla < nullb);
EXPECT_TRUE(nullaa > nulla);
EXPECT_TRUE(nullaa != nulla);
EXPECT_FALSE(nullaa < nulla);
EXPECT_FALSE(nullaa == nulla);
}
TEST(TF_TStringTest, Conversion) {
tstring s50(kLongString);
std::string s51(s50);
absl::string_view s52(s50);
EXPECT_EQ(kLongString, s51);
EXPECT_EQ(kLongStringLen, s51.size());
EXPECT_EQ(kLongString, s52);
EXPECT_EQ(kLongStringLen, s52.size());
#ifdef PLATFORM_GOOGLE
absl::AlphaNum s53(s50);
EXPECT_STREQ(kLongString, s53.data());
EXPECT_EQ(kLongStringLen, s53.size());
#endif // PLATFORM_GOOGLE
}
TEST(TF_TStringTest, Allocation) {
tstring s60;
s60.resize(2);
EXPECT_EQ(std::string("\0\0", 2), s60);
EXPECT_EQ(2, s60.size());
EXPECT_EQ(2, s60.length());
s60.resize(6, 'a');
EXPECT_EQ(std::string("\0\0aaaa", 6), s60);
EXPECT_EQ(6, s60.size());
EXPECT_EQ(6, s60.length());
s60.resize(3, 'b');
EXPECT_EQ(std::string("\0\0a", 3), s60);
EXPECT_EQ(3, s60.size());
EXPECT_EQ(3, s60.length());
s60.clear();
EXPECT_EQ("", s60);
EXPECT_TRUE(s60.empty());
EXPECT_EQ(0, s60.size());
EXPECT_EQ(0, s60.length());
s60.reserve(100);
// 16-byte alignment 7*16-1 = 111
EXPECT_EQ(111, s60.capacity());
s60.reserve(100);
}
TEST(TF_TStringTest, ElementAccess) {
tstring s70(kLongString);
EXPECT_STREQ(kLongString, s70.data());
EXPECT_EQ(s70.data(), s70.c_str());
for (size_t i = 0; i < s70.size(); i++) {
EXPECT_EQ(kLongString[i], s70.data()[i]);
}
tstring::const_iterator i = s70.begin();
const char* j = kLongString;
for (; *j != '\0'; i++, j++) {
EXPECT_EQ(*j, *i);
}
EXPECT_EQ('\0', *s70.end());
EXPECT_EQ(*i, *s70.end());
EXPECT_EQ(*(i - 1), s70.back());
}
TEST(TF_TStringTest, Modifiers) {
// Modifiers
tstring s80("ba");
tstring s81;
tstring s82(kLongString);
s81.append(s80);
EXPECT_EQ("ba", s81);
s81.append(s80);
EXPECT_EQ("baba", s81);
s81.append("\0c", 2);
EXPECT_EQ(std::string("baba\0c", 6), s81);
s81.append("dd");
EXPECT_EQ(std::string("baba\0cdd", 8), s81);
s81.append(3, 'z');
EXPECT_EQ(tstring("baba\0cddzzz", 11), s81);
s81.append(0, 'z');
s81.append("dd", 0);
s81.append("");
s81.append(tstring());
EXPECT_EQ(std::string("baba\0cddzzz", 11), s81);
s81.erase(0, 1);
EXPECT_EQ(std::string("aba\0cddzzz", 10), s81);
s81.erase(4, 6);
EXPECT_EQ(std::string("aba\0", 4), s81);
s81.insert(1, tstring("\0moo\0", 5), 1, 4);
EXPECT_EQ(std::string("amoo\0ba\0", 8), s81);
s81.insert(0, 2, '\0');
s81.insert(s81.size() - 1, 1, 'q');
EXPECT_EQ(std::string("\0\0amoo\0baq\0", 11), s81);
s81.erase(0, s81.size());
EXPECT_EQ(tstring(), s81);
s80.swap(s82);
EXPECT_EQ(kLongString, s80);
EXPECT_EQ("ba", s82);
s82.push_back('\0');
s82.push_back('q');
EXPECT_EQ(std::string("ba\0q", 4), s82);
}
TEST(TF_TStringTest, Friends) {
tstring s90("b");
tstring s91("\0a\0", 3);
tstring s92;
EXPECT_EQ("b", s90 + s92);
EXPECT_EQ("b", s92 + s90);
EXPECT_EQ(std::string("\0a\0", 3), s92 + s91);
EXPECT_EQ(std::string("\0a\0", 3), s91 + s92);
EXPECT_EQ(std::string("b\0a\0", 4), s90 + s91);
EXPECT_EQ(std::string("\0a\0b", 4), s91 + s90);
std::stringstream ss;
ss << s91;
EXPECT_EQ(std::string("\0a\0", 3), ss.str());
}