From 24f835217fd27f21141ff0254a2b93ea3cfd7b6c Mon Sep 17 00:00:00 2001
From: Dero Gharibian <dero@google.com>
Date: Mon, 29 Jun 2020 18:11:30 -0700
Subject: [PATCH] Eliminate unnecessary packing/unpacking of string tensors
 across the C-API and language bindings.

With the std::string -> tensorflow::tstring migration in tensorflow core complete, we now have an ABI stable string type with associated C accessors/modifiers. The packing/unpacking of string tensors across the C-API is now superfluous and can be removed.  This is an ABI breaking change; updates to language bindings in tensorflow/ are included in this CL.

PiperOrigin-RevId: 318933001
Change-Id: I3c99cf70834ba0b6cefc4ba39a35d6c168e880db
---
 RELEASE.md                                    |   5 +
 tensorflow/c/BUILD                            |   4 +
 tensorflow/c/c_api.h                          |   1 +
 tensorflow/c/c_api_test.cc                    |  27 ++-
 tensorflow/c/tf_tensor.cc                     | 137 +------------
 tensorflow/c/tf_tensor.h                      |  28 ---
 tensorflow/c/tf_tstring.h                     |  20 ++
 tensorflow/core/platform/BUILD                |   8 +
 tensorflow/go/tensor.go                       | 180 ++++--------------
 tensorflow/java/src/main/native/tensor_jni.cc | 113 +++--------
 .../org/tensorflow/op/core/ConstantTest.java  |  15 +-
 tensorflow/python/lib/core/ndarray_tensor.cc  |  59 ++----
 12 files changed, 145 insertions(+), 452 deletions(-)
 create mode 100644 tensorflow/c/tf_tstring.h

diff --git a/RELEASE.md b/RELEASE.md
index 33bc49da750..28d8bd7ca36 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -6,6 +6,11 @@
 
 * <DOCUMENT BREAKING CHANGES HERE>
 * <THIS SECTION SHOULD CONTAIN API, ABI AND BEHAVIORAL BREAKING CHANGES>
+* The byte layout for string tensors across the C-API has been updated to match
+  TF Core/C++; i.e., a contiguous array of `tensorflow::tstring`/`TF_TString`s.
+* C-API functions `TF_StringDecode`, `TF_StringEncode`, and
+  `TF_StringEncodedSize` are no longer relevant and have been removed; see
+  core/platform/ctstring.h for string access/modification in C.
 
 ## Known Caveats
 
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 92cda95e8e4..410fc22069f 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -29,6 +29,8 @@ filegroup(
         "tf_file_statistics.h",
         "tf_status.h",
         "tf_tensor.h",
+        "tf_tstring.h",
+        "//tensorflow/core/platform:ctstring",
     ],
     visibility = ["//tensorflow:__subpackages__"],
 )
@@ -48,6 +50,7 @@ filegroup(
             "*test*",
         ],
     ) + [
+        "//tensorflow/core/platform:ctstring",
         "//tensorflow/cc:srcs_no_runtime",
         "//tensorflow/core/distributed_runtime:server_lib.h",
     ],
@@ -78,6 +81,7 @@ tf_cuda_library(
         "c_api_internal.h",
         "tf_datatype.h",
         "tf_tensor.h",
+        "tf_tstring.h",
     ],
     visibility = [
         "//tensorflow:internal",
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 0c413f6ebae..808bcf3bd80 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/c/tf_datatype.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/c/tf_tensor.h"
+#include "tensorflow/c/tf_tstring.h"
 
 // --------------------------------------------------------------------------
 // C API for TensorFlow.
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 0fc07b87070..bbbbb8f7d56 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -2286,14 +2286,15 @@ TEST_F(CApiAttributesTest, Tensor) {
 
 TEST_F(CApiAttributesTest, StringTensor) {
   // Create the string-Tensor "attribute" value.
-  char encoded[] = {
-      0,   0, 0, 0, 0, 0, 0, 0,  // array[uint64] offsets
-      1,                         // varint encoded string length
-      'A',
-  };
+  const char test_string[] =
+      "borkborkborkborkborkborkborkbork";  // >24bytes to force heap alloc
+  TF_TString tstr[1];
+  TF_TString_Init(&tstr[0]);
+  TF_TString_Copy(&tstr[0], test_string, sizeof(test_string) - 1);
+
   auto deallocator = [](void* data, size_t len, void* arg) {};
-  unique_tensor_ptr t_in(TF_NewTensor(TF_STRING, nullptr, 0, &encoded[0],
-                                      sizeof(encoded), deallocator, nullptr),
+  unique_tensor_ptr t_in(TF_NewTensor(TF_STRING, nullptr, 0, &tstr[0],
+                                      sizeof(tstr), deallocator, nullptr),
                          TF_DeleteTensor);
 
   // Create a TF_Operation with the attribute t_in
@@ -2312,9 +2313,17 @@ TEST_F(CApiAttributesTest, StringTensor) {
   EXPECT_EQ(TF_STRING, TF_TensorType(t_out));
   EXPECT_EQ(0, TF_NumDims(t_out));
   ASSERT_EQ(TF_TensorByteSize(t_in.get()), TF_TensorByteSize(t_out));
-  EXPECT_EQ(0, memcmp(TF_TensorData(t_in.get()), TF_TensorData(t_out),
-                      TF_TensorByteSize(t_out)));
+  TF_TString* t_in_tstr = static_cast<TF_TString*>(TF_TensorData(t_in.get()));
+  TF_TString* t_out_tstr = static_cast<TF_TString*>(TF_TensorData(t_out));
+  EXPECT_EQ(absl::string_view(test_string),
+            absl::string_view(TF_TString_GetDataPointer(t_out_tstr),
+                              TF_TString_GetSize(t_out_tstr)));
+  EXPECT_EQ(absl::string_view(TF_TString_GetDataPointer(t_in_tstr),
+                              TF_TString_GetSize(t_in_tstr)),
+            absl::string_view(TF_TString_GetDataPointer(t_out_tstr),
+                              TF_TString_GetSize(t_out_tstr)));
   TF_DeleteTensor(t_out);
+  TF_TString_Dealloc(&tstr[0]);
 }
 
 TEST_F(CApiAttributesTest, TensorList) {
diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc
index 7e4d3bb4932..0feb986ce44 100644
--- a/tensorflow/c/tf_tensor.cc
+++ b/tensorflow/c/tf_tensor.cc
@@ -228,57 +228,6 @@ Status TensorInterface::BitcastFrom(const TensorInterface& from, DataType type,
 }  // namespace tensorflow
 
 // --------------------------------------------------------------------------
-void StringEncode(const char* src, size_t src_len, char* dst) {
-  dst = tensorflow::core::EncodeVarint64(dst, src_len);
-  memcpy(dst, src, src_len);
-}
-
-size_t TF_StringEncode(const char* src, size_t src_len, char* dst,
-                       size_t dst_len, TF_Status* status) {
-  const size_t sz = TF_StringEncodedSize(src_len);
-  if (sz < src_len) {
-    Set_TF_Status_from_Status(
-        status, InvalidArgument("src string is too large to encode"));
-    return 0;
-  }
-  if (dst_len < sz) {
-    Set_TF_Status_from_Status(
-        status,
-        InvalidArgument("dst_len (", dst_len, ") too small to encode a ",
-                        src_len, "-byte string"));
-    return 0;
-  }
-  StringEncode(src, src_len, dst);
-  return sz;
-}
-
-static Status TF_StringDecode_Impl(const char* src, size_t src_len,
-                                   const char** dst, size_t* dst_len) {
-  tensorflow::uint64 len64 = 0;
-  const char* p = tensorflow::core::GetVarint64Ptr(src, src + src_len, &len64);
-  if (p == nullptr) {
-    return InvalidArgument("invalid string encoding or truncated src buffer");
-  }
-  if (len64 > std::numeric_limits<size_t>::max()) {
-    return InvalidArgument("encoded string is ", len64,
-                           "-bytes, which is too large for this architecture");
-  }
-  *dst = p;
-  *dst_len = static_cast<size_t>(len64);
-  return Status::OK();
-}
-
-size_t TF_StringDecode(const char* src, size_t src_len, const char** dst,
-                       size_t* dst_len, TF_Status* status) {
-  Set_TF_Status_from_Status(status,
-                            TF_StringDecode_Impl(src, src_len, dst, dst_len));
-  if (TF_GetCode(status) != TF_OK) return 0;
-  return static_cast<size_t>(*dst - src) + *dst_len;
-}
-
-size_t TF_StringEncodedSize(size_t len) {
-  return static_cast<size_t>(tensorflow::core::VarintLength(len)) + len;
-}
 
 static void DeleteArray(void* data, size_t size, void* arg) {
   DCHECK_EQ(data, arg);
@@ -334,58 +283,12 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) {
     std::memcpy(TF_TensorData(t), str.c_str(), str.size());
     return t;
   }
-  if (src.dtype() != tensorflow::DT_STRING) {
-    Tensor tensor;
-    if (!tensor.CopyFrom(src, src.shape())) {
-      return nullptr;
-    }
-    return new TF_Tensor{new tensorflow::TensorInterface(tensor)};
-  }
-  // DT_STRING tensors require a copying since TF_Tensor.buffer expects a flatly
-  // encoded sequence of strings.
 
-  // Compute bytes needed for encoding.
-  size_t size = 0;
-  const auto& srcarray = src.flat<tstring>();
-  for (int i = 0; i < srcarray.size(); ++i) {
-    const string& s = srcarray(i);
-    // uint64 starting_offset, TF_StringEncode-d string.
-    size += sizeof(tensorflow::uint64) + TF_StringEncodedSize(s.size());
-  }
-
-  // Encode all strings.
-  char* base = new char[size];
-  char* data_start = base + sizeof(tensorflow::uint64) * srcarray.size();
-  char* dst = data_start;  // Where next string is encoded.
-  size_t dst_len = size - static_cast<size_t>(data_start - base);
-  tensorflow::uint64* offsets = reinterpret_cast<tensorflow::uint64*>(base);
-  for (int i = 0; i < srcarray.size(); ++i) {
-    *offsets = (dst - data_start);
-    offsets++;
-    const string& s = srcarray(i);
-    const size_t consumed = TF_StringEncodedSize(s.size());
-    StringEncode(s.data(), s.size(), dst);
-    dst += consumed;
-    dst_len -= consumed;
-  }
-  if (dst != base + size) {
-    *status = InvalidArgument(
-        "invalid string tensor encoding (decoded ", (dst - base),
-        " bytes, but the tensor is encoded in ", size, " bytes");
-    delete[] base;
+  Tensor tensor;
+  if (!tensor.CopyFrom(src, src.shape())) {
     return nullptr;
   }
-
-  auto dims = src.shape().dim_sizes();
-  std::vector<tensorflow::int64> dimvec(dims.size());
-  for (size_t i = 0; i < dims.size(); ++i) {
-    dimvec[i] = dims[i];
-  }
-  static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
-                "64-bit int types should match in size");
-  return TF_NewTensor(TF_STRING,
-                      reinterpret_cast<const int64_t*>(dimvec.data()),
-                      dimvec.size(), base, size, DeleteArray, base);
+  return new TF_Tensor{new tensorflow::TensorInterface(tensor)};
 }
 
 Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) {
@@ -409,39 +312,7 @@ Status TensorInterface::ToTensor(tensorflow::Tensor* dst) const {
     }
     return Status::OK();
   }
-  if (tensor_.dtype() != DT_STRING) {
-    *dst = tensor_;
-    return Status::OK();
-  }
-  // TF_STRING tensors require copying since Tensor class expects a sequence of
-  // string objects.
-  const tensorflow::int64 num_elements = tensor_.NumElements();
-  const char* input = reinterpret_cast<const char*>(Data());
-  const size_t src_size = ByteSize();
-  if (static_cast<tensorflow::int64>(src_size / sizeof(tensorflow::uint64)) <
-      num_elements) {
-    return InvalidArgument(
-        "Malformed TF_STRING tensor; too short to hold number of elements");
-  }
-  const char* data_start = input + sizeof(tensorflow::uint64) * num_elements;
-  const char* limit = input + src_size;
-
-  *dst = tensorflow::Tensor(tensor_.dtype(), tensor_.shape());
-  auto dstarray = dst->flat<tstring>();
-  for (tensorflow::int64 i = 0; i < num_elements; ++i) {
-    tensorflow::uint64 offset =
-        reinterpret_cast<const tensorflow::uint64*>(input)[i];
-    if (static_cast<ptrdiff_t>(offset) >= (limit - data_start)) {
-      return InvalidArgument("Malformed TF_STRING tensor; element ", i,
-                             " out of range");
-    }
-    size_t len;
-    const char* p;
-    const char* srcp = data_start + offset;
-    Status status = TF_StringDecode_Impl(srcp, limit - srcp, &p, &len);
-    if (!status.ok()) return status;
-    dstarray(i).assign(p, len);
-  }
+  *dst = tensor_;
   return Status::OK();
 }
 
diff --git a/tensorflow/c/tf_tensor.h b/tensorflow/c/tf_tensor.h
index 7ed4a9f754e..acdf053e63a 100644
--- a/tensorflow/c/tf_tensor.h
+++ b/tensorflow/c/tf_tensor.h
@@ -148,34 +148,6 @@ TF_CAPI_EXPORT extern void TF_TensorBitcastFrom(const TF_Tensor* from,
                                                 int num_new_dims,
                                                 TF_Status* status);
 
-// --------------------------------------------------------------------------
-// Encode the string `src` (`src_len` bytes long) into `dst` in the format
-// required by TF_STRING tensors. Does not write to memory more than `dst_len`
-// bytes beyond `*dst`. `dst_len` should be at least
-// TF_StringEncodedSize(src_len).
-//
-// On success returns the size in bytes of the encoded string.
-// Returns an error into `status` otherwise.
-TF_CAPI_EXPORT extern size_t TF_StringEncode(const char* src, size_t src_len,
-                                             char* dst, size_t dst_len,
-                                             TF_Status* status);
-
-// Decode a string encoded using TF_StringEncode.
-//
-// On success, sets `*dst` to the start of the decoded string and `*dst_len` to
-// its length. Returns the number of bytes starting at `src` consumed while
-// decoding. `*dst` points to memory within the encoded buffer.  On failure,
-// `*dst` and `*dst_len` are undefined and an error is set in `status`.
-//
-// Does not read memory more than `src_len` bytes beyond `src`.
-TF_CAPI_EXPORT extern size_t TF_StringDecode(const char* src, size_t src_len,
-                                             const char** dst, size_t* dst_len,
-                                             TF_Status* status);
-
-// Return the size in bytes required to encode a string `len` bytes long into a
-// TF_STRING tensor.
-TF_CAPI_EXPORT extern size_t TF_StringEncodedSize(size_t len);
-
 // Returns bool iff this tensor is aligned.
 TF_CAPI_EXPORT extern bool TF_TensorIsAligned(const TF_Tensor*);
 
diff --git a/tensorflow/c/tf_tstring.h b/tensorflow/c/tf_tstring.h
new file mode 100644
index 00000000000..8b576ff8197
--- /dev/null
+++ b/tensorflow/c/tf_tstring.h
@@ -0,0 +1,20 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_C_TF_TSTRING_H_
+#define TENSORFLOW_C_TF_TSTRING_H_
+
+#include "tensorflow/core/platform/ctstring.h"
+
+#endif  // THIRD_PARTY_TENSORFLOW_C_TF_TSTRING_H_
diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD
index 33a1e7cfe0a..305e6ccf2d4 100644
--- a/tensorflow/core/platform/BUILD
+++ b/tensorflow/core/platform/BUILD
@@ -762,6 +762,14 @@ cc_library(
     ],
 )
 
+filegroup(
+    name = "ctstring",
+    srcs = [
+        "ctstring.h",
+        "ctstring_internal.h",
+    ],
+)
+
 cc_library(
     name = "types",
     hdrs = ["types.h"],
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index bd84254f698..9221d35274c 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -16,14 +16,20 @@ limitations under the License.
 
 package tensorflow
 
-// #include <stdlib.h>
-// #include <string.h>
-// #include "tensorflow/c/c_api.h"
+/*
+#include <stdlib.h>
+#include <string.h>
+#include "tensorflow/c/c_api.h"
+
+void toNewTString(_GoString_ gstr, TF_TString *tstr) {
+    TF_TString_Init(tstr);
+    TF_TString_Copy(tstr, _GoStringPtr(gstr), _GoStringLen(gstr));
+}
+*/
 import "C"
 
 import (
 	"bytes"
-	"encoding/binary"
 	"fmt"
 	"io"
 	"math/bits"
@@ -79,9 +85,7 @@ func NewTensor(value interface{}) (*Tensor, error) {
 	nflattened := numElements(shape)
 	nbytes := typeOf(dataType, nil).Size() * uintptr(nflattened)
 	if dataType == String {
-		// TF_STRING tensors are encoded as an array of 8-byte offsets
-		// followed by string data. See c_api.h.
-		nbytes = uintptr(nflattened*8 + int64(byteSizeOfEncodedStrings(val)))
+		nbytes = uintptr(nflattened) * C.sizeof_TF_TString
 	}
 	var shapePtr *C.int64_t
 	if len(shape) > 0 {
@@ -94,35 +98,26 @@ func NewTensor(value interface{}) (*Tensor, error) {
 	runtime.SetFinalizer(t, (*Tensor).finalize)
 	raw := tensorData(t.c)
 	buf := bytes.NewBuffer(raw[:0:len(raw)])
-	if dataType != String {
-		if isAllArray(val.Type()) {
-			// We have arrays all the way down, or just primitive types. We can
-			// just copy the memory in as it is all contiguous.
-			if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil {
-				return nil, err
-			}
-		} else {
-			// When there are slices involved the memory for each leaf slice may
-			// not be contiguous with the others or in the order we might
-			// expect, so we need to work our way down to each slice of
-			// primitives and copy them individually
-			if err := encodeTensorWithSlices(buf, val, shape); err != nil {
-				return nil, err
-			}
-		}
 
-		if uintptr(buf.Len()) != nbytes {
-			return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
-		}
-	} else {
-		e := stringEncoder{offsets: buf, data: raw[nflattened*8:], status: newStatus()}
-		if err := e.encode(reflect.ValueOf(value), shape); err != nil {
+	if isAllArray(val.Type()) {
+		// We have arrays all the way down, or just primitive types. We can
+		// just copy the memory in as it is all contiguous.
+		if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil {
 			return nil, err
 		}
-		if int64(buf.Len()) != nflattened*8 {
-			return nil, bug("invalid offset encoding for TF_STRING tensor with shape %v (got %v, want %v)", shape, buf.Len(), nflattened*8)
+	} else {
+		// When there are slices involved the memory for each leaf slice may
+		// not be contiguous with the others or in the order we might
+		// expect, so we need to work our way down to each slice of
+		// primitives and copy them individually
+		if err := encodeTensorWithSlices(buf, val, shape); err != nil {
+			return nil, err
 		}
 	}
+
+	if uintptr(buf.Len()) != nbytes {
+		return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
+	}
 	return t, nil
 }
 
@@ -131,6 +126,8 @@ func NewTensor(value interface{}) (*Tensor, error) {
 // contiguous in RAM.
 func isAllArray(typ reflect.Type) bool {
 	switch typ.Kind() {
+	case reflect.String:
+		return false
 	case reflect.Slice:
 		return false
 	case reflect.Array:
@@ -312,58 +309,16 @@ func setSliceInSlice(slice reflect.Value, index int, content sliceHeader) {
 
 // decodeOneDimString decodes a string tensor into a one-dimensional []string.
 func decodeOneDimString(raw []byte, nStrings int) ([]string, error) {
-	// Start by making an array of all the strings
 	strs := make([]string, nStrings)
-	// The first nStrings * 8 bytes of raw are offsets into the second half of
-	// the raw data. This second half is where the strings are encoded.
-	offsets := (*(*[]int64)(unsafe.Pointer(&raw)))[:nStrings]
+	tstrs := (*(*[]C.TF_TString)(unsafe.Pointer(&raw)))[:nStrings]
 
-	// Reset raw after the offsets. Now the offsets will work relative to raw
-	raw = raw[nStrings*8:]
-	// Next we work out the final length of the string data so we can copy the
-	// good data out of raw (which is owned by the C tensor and won't be safe
-	// to access if the tensor is freed)
-	r := bytes.NewReader(raw)
-	var totalLength int
-	for _, offset := range offsets {
-		// At each offset we should find a varint length of a string.
-		// Errors here should mean the tensor is corrupt.
-		if _, err := r.Seek(offset, io.SeekStart); err != nil {
-			return nil, err
-		}
-		l, err := binary.ReadUvarint(r)
-		if err != nil {
-			return nil, err
-		}
-		totalLength += int(l)
+	for i, tstr := range tstrs {
+		dst := C.TF_TString_GetDataPointer(&tstr)
+		dstLen := C.TF_TString_GetSize(&tstr)
+
+		strs[i] = C.GoStringN(dst, C.int(dstLen))
 	}
 
-	// Lets allocate a big buffer to carry our string data.
-	stringData := make([]byte, 0, totalLength)
-	// Now copy the string data across into our new buffer, keeping track of the
-	// location of each string in the strs slice.
-	var cursor int
-	for i, offset := range offsets {
-		// At each offset we should find a varint length. Read it
-		if _, err := r.Seek(offset, io.SeekStart); err != nil {
-			return nil, err
-		}
-		l, err := binary.ReadUvarint(r)
-		if err != nil {
-			return nil, err
-		}
-
-		// Then copy the actual string into our large buffer
-		target := stringData[cursor : cursor+int(l)]
-		if _, err := r.Read(target); err != nil {
-			return nil, err
-		}
-		// Track where this string data is.
-		strs[i] = *(*string)(unsafe.Pointer(&target))
-		cursor += int(l)
-	}
-
-	// So now we have a big slice of strings
 	return strs, nil
 }
 
@@ -469,26 +424,6 @@ func numElements(shape []int64) int64 {
 	return n
 }
 
-// byteSizeOfEncodedStrings returns the size of the encoded strings in val.
-// val MUST be a string, or a container (array/slice etc.) of strings.
-// Tensorflow encodes strings as the varint encoded length followed by the
-// string bytes. We could call into the C library to do this but cgo has a heavy
-// overhead. So we just do that calculation in Go
-func byteSizeOfEncodedStrings(val reflect.Value) int {
-	if val.Kind() == reflect.String {
-		return sizeVarUint(uint64(val.Len())) + val.Len()
-	}
-	if val.Kind() != reflect.Slice && val.Kind() != reflect.Array {
-		panic(fmt.Sprintf("unexpected type %s", val.Type()))
-	}
-	// Otherwise must be an array or slice.
-	var size int
-	for i := 0; i < val.Len(); i++ {
-		size += byteSizeOfEncodedStrings(val.Index(i))
-	}
-	return size
-}
-
 // sizeVarUint determines how many bytes it would take to encode the int v as
 // an unsigned varint
 func sizeVarUint(v uint64) int {
@@ -509,12 +444,18 @@ func encodeTensorWithSlices(w *bytes.Buffer, v reflect.Value, shape []int64) err
 		if v.Len() != expected {
 			return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
 		}
+	} else if v.Kind() == reflect.String {
+		s := v.Interface().(string)
+		var tstr C.TF_TString
+		C.toNewTString(s, &tstr)
+		ptr := unsafe.Pointer(&tstr)
+		return copyPtr(w, ptr, C.sizeof_TF_TString)
 	} else if v.Kind() != reflect.Array {
 		return fmt.Errorf("unsupported type %v", v.Type())
 	}
 
 	// Once we have just a single dimension we can just copy the data
-	if len(shape) == 1 && v.Len() > 0 {
+	if len(shape) == 1 && v.Len() > 0 && v.Index(0).Kind() != reflect.String {
 		elt := v.Index(0)
 		if !elt.CanAddr() {
 			panic("cannot take address")
@@ -556,45 +497,6 @@ func copyPtr(w *bytes.Buffer, ptr unsafe.Pointer, l int) error {
 	return err
 }
 
-type stringEncoder struct {
-	offsets *bytes.Buffer
-	data    []byte
-	offset  uint64
-	status  *status
-}
-
-func (e *stringEncoder) encode(v reflect.Value, shape []int64) error {
-	if v.Kind() == reflect.String {
-		if err := copyPtr(e.offsets, unsafe.Pointer(&e.offset), int(unsafe.Sizeof(e.offset))); err != nil {
-			return err
-		}
-		// A string is encoded as the varint length followed by the string bytes.
-		// We do this in Go to avoid the considerable overhead of a cgo call into
-		// the tensorflow library
-		s := v.String()
-		n := binary.PutUvarint(e.data[e.offset:], uint64(len(s)))
-		e.offset += uint64(n)
-		n = copy(e.data[e.offset:], s)
-		e.offset += uint64(n)
-		return nil
-	}
-
-	if v.Kind() == reflect.Slice {
-		expected := int(shape[0])
-		if v.Len() != expected {
-			return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
-		}
-	}
-
-	subShape := shape[1:]
-	for i := 0; i < v.Len(); i++ {
-		if err := e.encode(v.Index(i), subShape); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
 func bug(format string, args ...interface{}) error {
 	return fmt.Errorf("BUG: Please report at https://github.com/tensorflow/tensorflow/issues with the note: Go TensorFlow %v: %v", Version(), fmt.Sprintf(format, args...))
 }
diff --git a/tensorflow/java/src/main/native/tensor_jni.cc b/tensorflow/java/src/main/native/tensor_jni.cc
index 7e3cf4a88aa..5fa4e4b3e12 100644
--- a/tensorflow/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/java/src/main/native/tensor_jni.cc
@@ -220,16 +220,13 @@ size_t readNDArray(JNIEnv* env, TF_DataType dtype, const char* src,
   }
 }
 
-jbyteArray TF_StringDecodeTojbyteArray(JNIEnv* env, const char* src,
-                                       size_t src_len, TF_Status* status) {
-  const char* dst = nullptr;
-  size_t dst_len = 0;
-  TF_StringDecode(src, src_len, &dst, &dst_len, status);
-  if (TF_GetCode(status) != TF_OK) {
-    return nullptr;
-  }
+jbyteArray TF_StringDecodeTojbyteArray(JNIEnv* env, const TF_TString* src) {
+  const char* dst = TF_TString_GetDataPointer(src);
+  size_t dst_len = TF_TString_GetSize(src);
+
   jbyteArray ret = env->NewByteArray(dst_len);
   jbyte* cpy = env->GetByteArrayElements(ret, nullptr);
+
   memcpy(cpy, dst, dst_len);
   env->ReleaseByteArrayElements(ret, cpy, 0);
   return ret;
@@ -238,69 +235,32 @@ jbyteArray TF_StringDecodeTojbyteArray(JNIEnv* env, const char* src,
 class StringTensorWriter {
  public:
   StringTensorWriter(TF_Tensor* t, int num_elements)
-      : offset_(0),
-        poffsets_(static_cast<char*>(TF_TensorData(t))),
-        pdata_(poffsets_ + 8 * num_elements),
-        plimit_(poffsets_ + TF_TensorByteSize(t)) {}
+      : index_(0), data_(static_cast<TF_TString*>(TF_TensorData(t))) {}
 
   void Add(const char* src, size_t len, TF_Status* status) {
     if (TF_GetCode(status) != TF_OK) return;
-    if (plimit_ - poffsets_ < sizeof(offset_)) {
-      TF_SetStatus(status, TF_OUT_OF_RANGE,
-                   "TF_STRING tensor encoding ran out of space for offsets, "
-                   "this is likely a bug, please file an issue at "
-                   "https://github.com/tensorflow/tensorflow/issues/new");
-      return;
-    }
-    memcpy(poffsets_, &offset_, sizeof(offset_));
-    size_t written =
-        TF_StringEncode(src, len, pdata_, (plimit_ - pdata_), status);
-    offset_ += written;
-    poffsets_ += 8;
-    pdata_ += written;
+    TF_TString_Init(&data_[index_]);
+    TF_TString_Copy(&data_[index_++], src, len);
   }
 
  private:
-  uint64_t offset_;
-  char* poffsets_;
-  char* pdata_;
-  const char* plimit_;
+  int index_;
+  TF_TString* data_;
 };
 
 class StringTensorReader {
  public:
   StringTensorReader(const TF_Tensor* t, int num_elements)
-      : index_(0),
-        offsets_(static_cast<const char*>(TF_TensorData(t))),
-        data_(offsets_ + 8 * num_elements),
-        limit_(offsets_ + TF_TensorByteSize(t)) {}
+      : index_(0), data_(static_cast<const TF_TString*>(TF_TensorData(t))) {}
 
   jbyteArray Next(JNIEnv* env, TF_Status* status) {
     if (TF_GetCode(status) != TF_OK) return nullptr;
-    uint64_t offset = 0;
-    const char* poffset = offsets_ + sizeof(offset) * index_;
-    if (poffset >= limit_) {
-      TF_SetStatus(
-          status, TF_INTERNAL,
-          "Invalid TF_STRING tensor, offsets table seems to be too small");
-      return nullptr;
-    }
-    memcpy(&offset, poffset, sizeof(offset));
-    const char* pdata = data_ + offset;
-    if (pdata >= limit_) {
-      TF_SetStatus(status, TF_INTERNAL,
-                   "Invalid TF_STRING tensor, invalid entry in offset table");
-      return nullptr;
-    }
-    ++index_;
-    return TF_StringDecodeTojbyteArray(env, pdata, (limit_ - pdata), status);
+    return TF_StringDecodeTojbyteArray(env, &data_[index_++]);
   }
 
  private:
   int index_;
-  const char* offsets_;
-  const char* data_;
-  const char* limit_;
+  const TF_TString* data_;
 };
 
 void readNDStringArray(JNIEnv* env, StringTensorReader* reader, int dims_left,
@@ -367,17 +327,16 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateScalarBytes(
   // TF_STRING tensors are encoded with a table of 8-byte offsets followed by
   // TF_StringEncode-encoded bytes.
   size_t src_len = static_cast<int>(env->GetArrayLength(value));
-  size_t dst_len = TF_StringEncodedSize(src_len);
-  TF_Tensor* t = TF_AllocateTensor(TF_STRING, nullptr, 0, 8 + dst_len);
-  char* dst = static_cast<char*>(TF_TensorData(t));
-  memset(dst, 0, 8);  // The offset table
+  TF_Tensor* t = TF_AllocateTensor(TF_STRING, nullptr, 0, sizeof(TF_TString));
+  TF_TString* dst = static_cast<TF_TString*>(TF_TensorData(t));
 
   TF_Status* status = TF_NewStatus();
   jbyte* jsrc = env->GetByteArrayElements(value, nullptr);
   // jsrc is an unsigned byte*, TF_StringEncode requires a char*.
   // reinterpret_cast<> for this conversion should be safe.
-  TF_StringEncode(reinterpret_cast<const char*>(jsrc), src_len, dst + 8,
-                  dst_len, status);
+  TF_TString_Init(&dst[0]);
+  TF_TString_Copy(&dst[0], reinterpret_cast<const char*>(jsrc), src_len);
+
   env->ReleaseByteArrayElements(value, jsrc, JNI_ABORT);
   if (!throwExceptionIfNotOK(env, status)) {
     TF_DeleteStatus(status);
@@ -388,27 +347,18 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateScalarBytes(
 }
 
 namespace {
-size_t nonScalarTF_STRINGTensorSize(JNIEnv* env, jarray value, int num_dims) {
-  if (num_dims == 0) {
-    // This is the last dimension, i.e., value should correspond to a jbyteArray
-    // encoding the string.
-    return TF_StringEncodedSize(
-        static_cast<size_t>(env->GetArrayLength(value)));
-  }
+void checkForNullEntries(JNIEnv* env, jarray value, int num_dims) {
   jsize len = env->GetArrayLength(value);
-  size_t ret = 0;
   for (jsize i = 0; i < len; ++i) {
     jarray elem = static_cast<jarray>(
         env->GetObjectArrayElement(static_cast<jobjectArray>(value), i));
     if (elem == nullptr) {
       throwException(env, kNullPointerException,
                      "null entries in provided array");
-      return ret;
+      return;
     }
-    ret += nonScalarTF_STRINGTensorSize(env, elem, num_dims - 1);
-    if (env->ExceptionCheck()) return ret;
+    if (env->ExceptionCheck()) return;
   }
-  return ret;
 }
 
 void fillNonScalarTF_STRINGTensorData(JNIEnv* env, jarray value, int num_dims,
@@ -448,11 +398,10 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateNonScalarBytes(
     }
     env->ReleaseLongArrayElements(shape, jdims, JNI_ABORT);
   }
-  const size_t encoded_size =
-      nonScalarTF_STRINGTensorSize(env, value, num_dims);
+  checkForNullEntries(env, value, num_dims);
   if (env->ExceptionCheck()) return 0;
   TF_Tensor* t = TF_AllocateTensor(TF_STRING, dims, num_dims,
-                                   8 * num_elements + encoded_size);
+                                   sizeof(TF_TString) * num_elements);
   if (t == nullptr) {
     delete[] dims;
     throwException(env, kNullPointerException,
@@ -572,20 +521,8 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Tensor_scalarBytes(
                    "Tensor is not a string/bytes scalar");
     return nullptr;
   }
-  const char* data = static_cast<const char*>(TF_TensorData(t));
-  const char* src = data + 8;
-  size_t src_len = TF_TensorByteSize(t) - 8;
-  uint64_t offset = 0;
-  memcpy(&offset, data, sizeof(offset));
-  if (offset >= src_len) {
-    throwException(env, kIllegalArgumentException,
-                   "invalid tensor encoding: bad offsets");
-    return nullptr;
-  }
-  TF_Status* status = TF_NewStatus();
-  jbyteArray ret = TF_StringDecodeTojbyteArray(env, src, src_len, status);
-  throwExceptionIfNotOK(env, status);
-  TF_DeleteStatus(status);
+  const TF_TString* data = static_cast<const TF_TString*>(TF_TensorData(t));
+  jbyteArray ret = TF_StringDecodeTojbyteArray(env, &data[0]);
   return ret;
 }
 
diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java
index 7d3b26de8dc..cc2a7adcf35 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java
@@ -27,7 +27,6 @@ import java.nio.DoubleBuffer;
 import java.nio.FloatBuffer;
 import java.nio.IntBuffer;
 import java.nio.LongBuffer;
-
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -179,17 +178,13 @@ public class ConstantTest {
     byte[] data = {(byte) 1, (byte) 2, (byte) 3, (byte) 4};
     long[] shape = {};
 
-    // byte arrays (DataType.STRING in Tensorflow) are encoded as an offset in the data buffer,
-    // followed by a varint encoded size, followed by the data.
     ByteArrayOutputStream baout = new ByteArrayOutputStream();
     DataOutputStream out = new DataOutputStream(baout);
-    // Offset in array.
-    out.writeLong(0L);
-    // Varint encoded length of buffer.
-    // For any number < 0x80, the varint encoding is simply the number itself.
-    // https://developers.google.com/protocol-buffers/docs/encoding#varints
-    assertTrue(data.length < 0x80);
-    out.write(data.length);
+    // We construct a TF_TString_Small tstring, which has the capacity for a 22 byte string.
+    // The first 6 most significant bits of the first byte represent length; the remaining
+    // 2-bits are type indicators, and are left as 0b00 to denote a TF_TSTR_SMALL type.
+    assertTrue(data.length <= 22);
+    out.writeByte(data.length << 2);
     out.write(data);
     out.close();
     byte[] content = baout.toByteArray();
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index 2afd2888e8f..e2fb3ec8dc9 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -245,29 +245,17 @@ Status PyBytesArrayMap(PyArrayObject* array, F f) {
 // the buffer. The caller takes ownership of the buffer.
 Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
                           size_t* size, void** buffer) {
-  // Compute bytes needed for encoding.
-  *size = 0;
-  TF_RETURN_IF_ERROR(
-      PyBytesArrayMap(array, [&size](const char* ptr, Py_ssize_t len) {
-        *size += sizeof(tensorflow::uint64) +
-                 tensorflow::core::VarintLength(len) + len;
-      }));
   // Encode all strings.
-  std::unique_ptr<char[]> base_ptr(new char[*size]);
-  char* base = base_ptr.get();
-  char* data_start = base + sizeof(tensorflow::uint64) * nelems;
-  char* dst = data_start;  // Where next string is encoded.
-  tensorflow::uint64* offsets = reinterpret_cast<tensorflow::uint64*>(base);
+  *size = nelems * sizeof(tensorflow::tstring);
+  std::unique_ptr<tensorflow::tstring[]> base_ptr(
+      new tensorflow::tstring[nelems]);
+  tensorflow::tstring* dst = base_ptr.get();
 
-  TF_RETURN_IF_ERROR(PyBytesArrayMap(
-      array, [&data_start, &dst, &offsets](const char* ptr, Py_ssize_t len) {
-        *offsets = (dst - data_start);
-        offsets++;
-        dst = tensorflow::core::EncodeVarint64(dst, len);
-        memcpy(dst, ptr, len);
-        dst += len;
+  TF_RETURN_IF_ERROR(
+      PyBytesArrayMap(array, [&dst](const char* ptr, Py_ssize_t len) {
+        dst->assign(ptr, len);
+        dst++;
       }));
-  CHECK_EQ(dst, base + *size);
   *buffer = base_ptr.release();
   return Status::OK();
 }
@@ -275,37 +263,18 @@ Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
 Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems,
                                      PyArrayObject* dst) {
   const void* tensor_data = TF_TensorData(src);
-  const size_t tensor_size = TF_TensorByteSize(src);
-  const char* limit = static_cast<const char*>(tensor_data) + tensor_size;
   DCHECK(tensor_data != nullptr);
   DCHECK_EQ(TF_STRING, TF_TensorType(src));
 
-  const uint64* offsets = static_cast<const uint64*>(tensor_data);
-  const size_t offsets_size = sizeof(uint64) * nelems;
-  const char* data = static_cast<const char*>(tensor_data) + offsets_size;
+  const tstring* tstr = static_cast<const tstring*>(tensor_data);
 
-  const size_t expected_tensor_size =
-      (limit - static_cast<const char*>(tensor_data));
-  if (expected_tensor_size - tensor_size) {
-    return errors::InvalidArgument(
-        "Invalid/corrupt TF_STRING tensor: expected ", expected_tensor_size,
-        " bytes of encoded strings for the tensor containing ", nelems,
-        " strings, but the tensor is encoded in ", tensor_size, " bytes");
-  }
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
   auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst)));
   for (int64 i = 0; i < nelems; ++i) {
-    const char* start = data + offsets[i];
-    const char* ptr = nullptr;
-    size_t len = 0;
-
-    TF_StringDecode(start, limit - start, &ptr, &len, status.get());
-    if (TF_GetCode(status.get()) != TF_OK) {
-      return errors::InvalidArgument(TF_Message(status.get()));
-    }
-
-    auto py_string = make_safe(PyBytes_FromStringAndSize(ptr, len));
+    const tstring& tstr_i = tstr[i];
+    auto py_string =
+        make_safe(PyBytes_FromStringAndSize(tstr_i.data(), tstr_i.size()));
     if (py_string == nullptr) {
       return errors::Internal(
           "failed to create a python byte array when converting element #", i,
@@ -551,14 +520,14 @@ Status NdarrayToTensor(TFE_Context* ctx, PyObject* ndarray,
           static_cast<tensorflow::DataType>(dtype), dims.data(), dims.size(),
           encoded, size, convert_string,
           [](void* data, size_t len, void* arg) {
-            delete[] reinterpret_cast<char*>(data);
+            delete[] reinterpret_cast<tensorflow::tstring*>(data);
           },
           nullptr)});
     } else {
       *ret = make_safe(TF_NewTensor(
           dtype, dims.data(), dims.size(), encoded, size,
           [](void* data, size_t len, void* arg) {
-            delete[] reinterpret_cast<char*>(data);
+            delete[] reinterpret_cast<tensorflow::tstring*>(data);
           },
           nullptr));
     }