TensorCord Implementation.

Benchmarks (when using the new inline Variant implementation): ~break-even runtime (1x-1.25x) between copying a string tensor and making N TensorCord objects inside a Variant is (with changes here to Variant inlining) at 16-32 bytes per string entry. Also a significant reduction in memory usage since, y'know, we're not copying anymore. (Settings: --runs 25 --benchtime 1s --perflab --copt "-mavx" //third_party/tensorflow/core/kernels:tensor_cord_test) name time/op BM_TensorCopyFromTensor_NumElem_1_StringSize_16 79.0ns ? 1% BM_TensorCordFromTensor_NumElem_1_StringSize_16 95.1ns ? 1% BM_TensorCordFromAbslCord_NumElem_1_StringSize_16 122ns ? 1% BM_TensorCopyFromTensor_NumElem_1_StringSize_32 104ns ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_32 106ns ? 1% BM_TensorCordFromAbslCord_NumElem_1_StringSize_32 123ns ? 1% BM_TensorCopyFromTensor_NumElem_1_StringSize_128 106ns ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_128 106ns ? 1% BM_TensorCordFromAbslCord_NumElem_1_StringSize_128 123ns ? 1% BM_TensorCopyFromTensor_NumElem_1_StringSize_1024 160ns ? 1% BM_TensorCordFromTensor_NumElem_1_StringSize_1024 106ns ? 1% BM_TensorCordFromAbslCord_NumElem_1_StringSize_1024 122ns ? 1% BM_TensorCopyFromTensor_NumElem_1_StringSize_4096 332ns ? 1% BM_TensorCordFromTensor_NumElem_1_StringSize_4096 106ns ? 1% BM_TensorCordFromAbslCord_NumElem_1_StringSize_4096 124ns ? 1% BM_TensorCopyFromTensor_NumElem_16_StringSize_16 263ns ? 1% BM_TensorCordFromTensor_NumElem_16_StringSize_16 543ns ? 1% BM_TensorCordFromAbslCord_NumElem_16_StringSize_16 983ns ? 1% BM_TensorCopyFromTensor_NumElem_16_StringSize_32 583ns ? 1% BM_TensorCordFromTensor_NumElem_16_StringSize_32 748ns ? 2% BM_TensorCordFromAbslCord_NumElem_16_StringSize_32 986ns ? 1% BM_TensorCopyFromTensor_NumElem_16_StringSize_128 635ns ? 3% BM_TensorCordFromTensor_NumElem_16_StringSize_128 750ns ? 2% BM_TensorCordFromAbslCord_NumElem_16_StringSize_128 983ns ? 1% BM_TensorCopyFromTensor_NumElem_16_StringSize_1024 1.62?s ? 2% BM_TensorCordFromTensor_NumElem_16_StringSize_1024 749ns ? 2% BM_TensorCordFromAbslCord_NumElem_16_StringSize_1024 983ns ? 1% BM_TensorCopyFromTensor_NumElem_16_StringSize_4096 5.22?s ? 6% BM_TensorCordFromTensor_NumElem_16_StringSize_4096 748ns ? 1% BM_TensorCordFromAbslCord_NumElem_16_StringSize_4096 1.01?s ? 2% BM_TensorCopyFromTensor_NumElem_32_StringSize_16 454ns ? 1% BM_TensorCordFromTensor_NumElem_32_StringSize_16 1.00?s ? 2% BM_TensorCordFromAbslCord_NumElem_32_StringSize_16 1.91?s ? 1% BM_TensorCopyFromTensor_NumElem_32_StringSize_32 1.10?s ? 2% BM_TensorCordFromTensor_NumElem_32_StringSize_32 1.44?s ? 1% BM_TensorCordFromAbslCord_NumElem_32_StringSize_32 1.91?s ? 2% BM_TensorCopyFromTensor_NumElem_32_StringSize_128 1.22?s ? 2% BM_TensorCordFromTensor_NumElem_32_StringSize_128 1.44?s ? 1% BM_TensorCordFromAbslCord_NumElem_32_StringSize_128 1.91?s ? 1% BM_TensorCopyFromTensor_NumElem_32_StringSize_1024 3.27?s ? 1% BM_TensorCordFromTensor_NumElem_32_StringSize_1024 1.44?s ? 2% BM_TensorCordFromAbslCord_NumElem_32_StringSize_1024 1.92?s ? 2% BM_TensorCopyFromTensor_NumElem_32_StringSize_4096 12.2?s ? 4% BM_TensorCordFromTensor_NumElem_32_StringSize_4096 1.44?s ? 1% BM_TensorCordFromAbslCord_NumElem_32_StringSize_4096 1.97?s ? 1% BM_TensorCopyFromTensor_NumElem_64_StringSize_16 851ns ? 1% BM_TensorCordFromTensor_NumElem_64_StringSize_16 1.91?s ? 1% BM_TensorCordFromAbslCord_NumElem_64_StringSize_16 3.75?s ? 1% BM_TensorCopyFromTensor_NumElem_64_StringSize_32 2.14?s ? 4% BM_TensorCordFromTensor_NumElem_64_StringSize_32 2.81?s ? 1% BM_TensorCordFromAbslCord_NumElem_64_StringSize_32 3.76?s ? 1% BM_TensorCopyFromTensor_NumElem_64_StringSize_128 2.36?s ? 4% BM_TensorCordFromTensor_NumElem_64_StringSize_128 2.81?s ? 1% BM_TensorCordFromAbslCord_NumElem_64_StringSize_128 3.77?s ? 1% BM_TensorCopyFromTensor_NumElem_64_StringSize_1024 6.67?s ? 4% BM_TensorCordFromTensor_NumElem_64_StringSize_1024 2.81?s ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_1024 3.89?s ? 2% BM_TensorCopyFromTensor_NumElem_64_StringSize_4096 28.7?s ? 5% BM_TensorCordFromTensor_NumElem_64_StringSize_4096 2.81?s ? 1% BM_TensorCordFromAbslCord_NumElem_64_StringSize_4096 4.20?s ? 4% BM_TensorCopyFromTensor_NumElem_128_StringSize_16 1.60?s ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_16 3.77?s ? 2% BM_TensorCordFromAbslCord_NumElem_128_StringSize_16 7.56?s ? 2% BM_TensorCopyFromTensor_NumElem_128_StringSize_32 4.12?s ? 1% BM_TensorCordFromTensor_NumElem_128_StringSize_32 5.59?s ? 1% BM_TensorCordFromAbslCord_NumElem_128_StringSize_32 7.67?s ? 2% BM_TensorCopyFromTensor_NumElem_128_StringSize_128 4.86?s ? 4% BM_TensorCordFromTensor_NumElem_128_StringSize_128 5.59?s ? 1% BM_TensorCordFromAbslCord_NumElem_128_StringSize_128 7.86?s ? 2% BM_TensorCopyFromTensor_NumElem_128_StringSize_1024 15.5?s ? 4% BM_TensorCordFromTensor_NumElem_128_StringSize_1024 5.58?s ? 1% BM_TensorCordFromAbslCord_NumElem_128_StringSize_1024 8.44?s ? 1% BM_TensorCopyFromTensor_NumElem_128_StringSize_4096 58.8?s ? 4% BM_TensorCordFromTensor_NumElem_128_StringSize_4096 5.58?s ? 1% BM_TensorCordFromAbslCord_NumElem_128_StringSize_4096 8.78?s ? 3% name allocs/op BM_TensorCopyFromTensor_NumElem_1_StringSize_16 2.00 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_16 3.00 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_16 4.00 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_32 4.00 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_32 3.00 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_32 4.00 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_128 4.00 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_128 3.00 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_128 4.00 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_1024 4.00 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_1024 3.00 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_1024 4.00 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_4096 4.00 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_4096 3.00 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_4096 4.00 ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_16 2.00 ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_16 18.0 ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_16 34.0 ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_32 34.0 ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_32 18.0 ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_32 34.0 ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_128 34.0 ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_128 18.0 ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_128 34.0 ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_1024 34.0 ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_1024 18.0 ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_1024 34.0 ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_4096 34.0 ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_4096 18.0 ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_4096 34.0 ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_16 2.00 ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_16 34.0 ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_16 66.0 ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_32 66.0 ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_32 34.0 ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_32 66.0 ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_128 66.0 ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_128 34.0 ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_128 66.0 ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_1024 66.0 ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_1024 34.0 ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_1024 66.0 ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_4096 66.0 ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_4096 34.0 ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_4096 66.0 ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_16 2.00 ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_16 66.0 ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_16 130 ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_32 130 ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_32 66.0 ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_32 130 ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_128 130 ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_128 66.0 ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_128 130 ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_1024 130 ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_1024 66.0 ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_1024 130 ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_4096 130 ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_4096 66.0 ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_4096 130 ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_16 2.00 ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_16 130 ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_16 258 ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_32 258 ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_32 130 ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_32 258 ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_128 258 ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_128 130 ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_128 258 ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_1024 258 ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_1024 130 ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_1024 258 ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_4096 258 ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_4096 130 ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_4096 258 ? 0% name peak-mem(Bytes)/op BM_TensorCopyFromTensor_NumElem_1_StringSize_16 72.0 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_16 168 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_16 184 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_32 168 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_32 168 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_32 184 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_128 360 ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_128 168 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_128 184 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_1024 2.15k ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_1024 168 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_1024 184 ? 0% BM_TensorCopyFromTensor_NumElem_1_StringSize_4096 8.30k ? 0% BM_TensorCordFromTensor_NumElem_1_StringSize_4096 168 ? 0% BM_TensorCordFromAbslCord_NumElem_1_StringSize_4096 184 ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_16 432 ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_16 1.97k ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_16 2.22k ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_32 1.25k ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_32 1.97k ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_32 2.22k ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_128 2.88k ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_128 1.97k ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_128 2.22k ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_1024 18.1k ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_1024 1.97k ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_1024 2.22k ? 0% BM_TensorCopyFromTensor_NumElem_16_StringSize_4096 70.3k ? 0% BM_TensorCordFromTensor_NumElem_16_StringSize_4096 1.97k ? 0% BM_TensorCordFromAbslCord_NumElem_16_StringSize_4096 2.22k ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_16 816 ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_16 3.89k ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_16 4.40k ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_32 2.40k ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_32 3.89k ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_32 4.40k ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_128 5.57k ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_128 3.89k ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_128 4.40k ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_1024 35.1k ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_1024 3.89k ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_1024 4.40k ? 0% BM_TensorCopyFromTensor_NumElem_32_StringSize_4096 137k ? 0% BM_TensorCordFromTensor_NumElem_32_StringSize_4096 3.89k ? 0% BM_TensorCordFromAbslCord_NumElem_32_StringSize_4096 4.40k ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_16 1.58k ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_16 7.73k ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_16 8.75k ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_32 4.70k ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_32 7.73k ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_32 8.75k ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_128 10.9k ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_128 7.73k ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_128 8.75k ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_1024 69.2k ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_1024 7.73k ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_1024 8.75k ? 0% BM_TensorCopyFromTensor_NumElem_64_StringSize_4096 269k ? 0% BM_TensorCordFromTensor_NumElem_64_StringSize_4096 7.73k ? 0% BM_TensorCordFromAbslCord_NumElem_64_StringSize_4096 8.75k ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_16 3.12k ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_16 15.4k ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_16 17.5k ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_32 9.31k ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_32 15.4k ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_32 17.5k ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_128 21.7k ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_128 15.4k ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_128 17.5k ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_1024 137k ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_1024 15.4k ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_1024 17.5k ? 0% BM_TensorCopyFromTensor_NumElem_128_StringSize_4096 534k ? 0% BM_TensorCordFromTensor_NumElem_128_StringSize_4096 15.4k ? 0% BM_TensorCordFromAbslCord_NumElem_128_StringSize_4096 17.5k ? 0% PiperOrigin-RevId: 276768889 Change-Id: Ic3024dda576307d786564ec24bf22e320712ccf5
2019-10-25 15:07:20 -07:00 · 2019-10-25 15:07:20 -07:00 · a13e3ebc70
commit a13e3ebc70
parent 49feca4ec7
5 changed files with 781 additions and 0 deletions
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@ -653,6 +653,7 @@ class Tensor {

  friend class DMAHelper;             // For access to buf_.
  friend class TensorCApi;            // For access to buf_.
+  friend class TensorCord;            // For access to buf_.
  friend class TensorReference;       // For access to buf_.
  friend class VariableOp;            // For access to set_shape.
  friend class AutoReloadVariableOp;  // For access to set_shape.
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -2587,6 +2587,30 @@ tf_kernel_library(
    deps = DYNAMIC_DEPS,
 )

+cc_library(
+    name = "tensor_cord",
+    srcs = ["tensor_cord.cc"],
+    hdrs = ["tensor_cord.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "tensor_cord_test",
+    srcs = ["tensor_cord_test.cc"],
+    deps = [
+        ":tensor_cord",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/platform:cord",
+    ],
+)
+
 LOOKUP_DEPS = [
    ":bounds_check",
    ":initializable_lookup_table",
--- a/tensorflow/core/kernels/tensor_cord.cc
+++ b/tensorflow/core/kernels/tensor_cord.cc
@ -0,0 +1,107 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/tensor_cord.h"
+
+#include <cstring>
+
+#include "tensorflow/core/framework/variant.h"
+
+namespace tensorflow {
+
+static_assert(Variant::CanInlineType<TensorCord>(),
+              "TensorCord should be inlined into Variants");
+
+TensorCord::CordRep::~CordRep() {
+  if (!is_inline_ && rep_.external.releaser) {
+    rep_.external.releaser(rep_.external.arg);
+  }
+}
+
+TensorCord::~TensorCord() { Cleanup(); }
+
+void TensorCord::Encode(VariantTensorData* data) const {
+  data->metadata_string().clear();
+  for (auto rep : Chunks()) {
+    data->metadata_string().append(rep.data(), rep.size());
+  }
+}
+
+bool TensorCord::Decode(VariantTensorData data) {
+  auto* str = new string(std::move(data.metadata_string()));
+  Cleanup();
+  chunks_.push_back(new CordRep(absl::string_view(*str), &StringReleaser, str));
+  return true;
+}
+
+TensorBuffer* TensorCord::TensorBufWithRef(Tensor* tensor) {
+  TensorBuffer* buf = tensor->buf_;
+  buf->Ref();
+  return buf;
+}
+
+void TensorCord::TensorBufReleaser(void* tensor_buffer) {
+  static_cast<TensorBuffer*>(tensor_buffer)->Unref();
+}
+
+void TensorCord::StringReleaser(void* str_ptr) {
+  delete static_cast<string*>(str_ptr);
+}
+
+namespace {
+
+// Helpers for STLStringResizeUninitialized
+// HasMember is true_type or false_type, depending on whether or not
+// T has a __resize_default_init member. Resize will call the
+// __resize_default_init member if it exists, and will call the resize
+// member otherwise.
+template <typename string_type, typename = void>
+struct ResizeUninitializedTraits {
+  using HasMember = std::false_type;
+  static void Resize(string_type* s, size_t new_size) { s->resize(new_size); }
+};
+
+// __resize_default_init is provided by libc++ >= 8.0.
+template <typename string_type>
+struct ResizeUninitializedTraits<
+    string_type, absl::void_t<decltype(std::declval<string_type&>()
+                                           .__resize_default_init(237))> > {
+  using HasMember = std::true_type;
+  static void Resize(string_type* s, size_t new_size) {
+    s->__resize_default_init(new_size);
+  }
+};
+
+// Resize string `s` to `new_size`, leaving the data uninitialized.
+static inline void STLStringResizeUninitialized(string* s, size_t new_size) {
+  ResizeUninitializedTraits<string>::Resize(s, new_size);
+}
+
+}  // namespace
+
+TensorCord::operator string() const {
+  string out;
+  STLStringResizeUninitialized(&out, size());
+  char* data = const_cast<char*>(out.data());
+  for (auto* rep : chunks_) {
+    auto view = rep->view();
+    memcpy(data, view.data(), view.size());
+    data += view.size();
+  }
+  DCHECK_EQ(data - out.data(), size());
+  return out;
+}
+
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/tensor_cord.h
+++ b/tensorflow/core/kernels/tensor_cord.h
@ -0,0 +1,358 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_TENSOR_CORD_H_
+#define TENSORFLOW_CORE_KERNELS_TENSOR_CORD_H_
+
+#include <array>
+#include <numeric>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tensorflow/core/framework/variant_tensor_data.h"
+
+namespace tensorflow {
+
+typedef void (*CordRepReleaser)(void*);
+
+class TensorCord {
+  // A TensorCord keeps a view into some data, and a cleanup method to clean up
+  // that data when the TensorCord destructor is called.  Copying a TensorCord
+  // increments a reference count to the cleanup method, and so the cleanup
+  // method is only called when all copies of the original TensorCord are
+  // cleared.
+  //
+  // Example:
+  //
+  // const string& s = t.scalar<string>()();
+  // TensorCord tc(s, &t);
+  // ASSERT_EQ(s, tc.view());
+  // TensorCord copy(tc);
+  // tc = TensorCord();  // cleanup not called; the reference is held by `copy`.
+  // copy = TensorCord();  // cleanup happens now, the reference is destroyed.
+  //
+  // Another example:
+  //
+  // void TensorProtoDeleter(void* ptr) {
+  //   delete static_cast<TensorProto*>(ptr);
+  // }
+  //
+  // auto p = absl::MakeUnique<TensorProto>(...);
+  // absl::string_view content(p->tensor_content());
+  // TensorCord tc(content, TensorProtoDeleter, p.release());
+  //
+
+ public:
+  static constexpr const char kTypeName[] = "tensorflow::TensorCord";
+
+  TensorCord() : chunks_() {}
+
+  ~TensorCord();
+
+  // Args:
+  //   `view`: should point to a location in memory that is guaranteed to remain
+  //           valid until `releaser` is called.
+  //   `releaser`: A callback that will be executed when there are no references
+  //               left on `view`.  It will be called via `releaser(memory)`.
+  //   `memory`: The argument passed to `releaser` when it is called.
+  //
+  // You are STRONGLY advised to provide a non-null `releaser`, and a pointer
+  // to the underlying data (while ensuring that the data will not be deleted
+  // until `releaser(memory)` is called).  Otherwise the TensorCord may
+  // outlive the data backing `view`.
+  TensorCord(absl::string_view view, CordRepReleaser releaser,
+             void* memory = nullptr)
+      : chunks_({new CordRep(view, releaser, memory)}) {}
+
+  // Args:
+  //   `view`: should point to a location in memory backed by `tensor`,
+  //      e.g., `view` is a string_view on a tstring which is an element
+  //      of `tensor`.  Furthermore, the associated tstring is not expected
+  //      to be modified in such a way that the underlying memory will
+  //      be changed after this TensorCord is created.
+  TensorCord(absl::string_view view, Tensor* tensor)
+      : chunks_({NewCordRepFromTensor(view, tensor)}) {}
+
+  // Disallow construction with empty callback or empty tensor.
+  TensorCord(absl::string_view view, std::nullptr_t, void* memory) = delete;
+  TensorCord(absl::string_view view, std::nullptr_t) = delete;
+
+  TensorCord(const TensorCord& other);
+
+  TensorCord(TensorCord&& other) noexcept;
+
+  TensorCord& operator=(const TensorCord& other);
+
+  TensorCord& operator=(TensorCord&& other) noexcept;
+
+  void Append(const TensorCord& other);
+
+  void Append(absl::string_view view, CordRepReleaser releaser,
+              void* memory = nullptr);
+
+  void Append(absl::string_view view, Tensor* tensor);
+
+  // Disallow Appends with empty callbacks or empty tensors.
+  void Append(absl::string_view view, std::nullptr_t, void* memory) = delete;
+  void Append(absl::string_view view, std::nullptr_t) = delete;
+
+  size_t size() const;
+  bool empty() const { return size() == 0; }
+
+  // NOTE: This performs an expensive copy of the underlying data.
+  explicit operator string() const;
+
+  class ChunkIterator {
+   public:
+    using iterator_category = std::input_iterator_tag;
+    using value_type = absl::string_view;
+    using difference_type = ptrdiff_t;
+    using pointer = const value_type*;
+    using reference = value_type;
+
+    ChunkIterator& operator++();
+
+    ChunkIterator operator++(int) {
+      ChunkIterator tmp(*this);
+      operator++();
+      return tmp;
+    }
+
+    bool operator==(const ChunkIterator& other) const {
+      return (cord_ == other.cord_ && chunk_index_ == other.chunk_index_);
+    }
+
+    bool operator!=(const ChunkIterator& other) const {
+      return !(*this == other);
+    }
+    reference operator*() const {
+      assert(cord_ != nullptr);
+      return view_;
+    }
+    pointer operator->() const {
+      assert(cord_ != nullptr);
+      return &view_;
+    }
+
+    friend class TensorCord;
+
+   private:
+    // Constructs a `begin()` iterator from `cord`.
+    explicit ChunkIterator(const TensorCord* cord, int chunk_index);
+
+    const TensorCord* const cord_;
+    int chunk_index_;
+    absl::string_view view_;
+  };
+
+  class ChunkRange {
+   public:
+    explicit ChunkRange(const TensorCord* cord) : cord_(cord) {}
+
+    ChunkIterator begin() const { return ChunkIterator(cord_, 0); }
+
+    ChunkIterator end() const {
+      return ChunkIterator(cord_, cord_->chunks_.size());
+    }
+
+   private:
+    const TensorCord* cord_;
+  };
+
+  // Note that the ordinary caveats of temporary lifetime extension apply:
+  //
+  //   void Process() {
+  //     for (absl::string_view chunk : CordFactory().Chunks()) {
+  //       // The temporary Cord returned by CordFactory has been destroyed!
+  //     }
+  //   }
+  ChunkRange Chunks() const { return ChunkRange(this); }
+
+  ChunkIterator chunk_begin() const { return ChunkIterator(this, 0); }
+
+  ChunkIterator chunk_end() const {
+    return ChunkIterator(this, chunks_.size());
+  }
+
+  static string TypeName() { return kTypeName; }
+
+  string DebugString() const {
+    return absl::StrCat("<TensorCord size=", size(), ">");
+  }
+
+  void Encode(VariantTensorData* data) const;
+
+  bool Decode(VariantTensorData data);
+
+ private:
+  void Cleanup();
+
+  class CordRep : public core::RefCounted {
+   public:
+    CordRep(absl::string_view view, CordRepReleaser releaser,
+            void* arg = nullptr)
+        : is_inline_(false), rep_{.external = {view, releaser, arg}} {}
+
+    // **WARNING** Only use this constructor if
+    //    view.size() < CordRep::kMaxInlineSize.
+    explicit CordRep(absl::string_view view)
+        : is_inline_(true), rep_{.internal = InlineFromView(view)} {}
+
+    ~CordRep() override;
+
+    absl::string_view view() const {
+      if (is_inline_) {
+        return absl::string_view(
+            rep_.internal.data() + 1,
+            *reinterpret_cast<const uint8*>(rep_.internal.data()));
+      } else {
+        return rep_.external.view;
+      }
+    }
+
+   private:
+    friend class TensorCord;
+
+    struct ExternalRep {
+      absl::string_view view;
+      CordRepReleaser releaser;
+      void* arg;
+    };
+
+    // We save the size in the first byte, so subtract 1.
+    static constexpr int kMaxInlineSize = sizeof(ExternalRep) - 1;
+    static_assert(kMaxInlineSize < 255,
+                  "Cannot store size of InlineRep in a single byte.");
+
+    // The first byte stores the size as a uint8.  The rest of the bytes are the
+    // string itself.
+    using InlineRep = std::array<char, sizeof(ExternalRep)>;
+
+    static InlineRep InlineFromView(absl::string_view view) {
+      DCHECK_LT(view.size(), kMaxInlineSize);
+      InlineRep rep;
+      *reinterpret_cast<uint8*>(rep.data()) = view.size();
+      std::memcpy(static_cast<char*>(rep.data() + 1), view.data(), view.size());
+      return rep;
+    }
+
+    // Member variables.
+    const bool is_inline_;
+    const union {
+      InlineRep internal;
+      ExternalRep external;
+    } rep_;
+  };
+
+  static TensorBuffer* TensorBufWithRef(Tensor* tensor);
+  static void TensorBufReleaser(void* tensor_buffer);
+  static void StringReleaser(void* str_ptr);
+  static CordRep* NewCordRepFromTensor(absl::string_view view, Tensor* tensor);
+
+  absl::InlinedVector<CordRep*, 2> chunks_;
+};
+
+inline TensorCord::TensorCord(const TensorCord& other)
+    : chunks_(other.chunks_) {
+  for (auto* rep : chunks_) {
+    rep->Ref();
+  }
+}
+
+inline TensorCord::TensorCord(TensorCord&& other) noexcept
+    : chunks_(std::move(other.chunks_)) {
+  other.chunks_.clear();
+}
+
+inline TensorCord& TensorCord::operator=(const TensorCord& other) {
+  Cleanup();
+  chunks_ = other.chunks_;
+  for (auto* rep : chunks_) {
+    rep->Ref();
+  }
+  return *this;
+}
+
+inline TensorCord& TensorCord::operator=(TensorCord&& other) noexcept {
+  Cleanup();
+  std::swap(chunks_, other.chunks_);
+  return *this;
+}
+
+inline void TensorCord::Append(const TensorCord& other) {
+  for (auto* rep : other.chunks_) {
+    chunks_.push_back(rep);
+    rep->Ref();
+  }
+}
+
+inline void TensorCord::Append(absl::string_view view, CordRepReleaser releaser,
+                               void* memory) {
+  chunks_.push_back(new CordRep(view, releaser, memory));
+}
+
+inline void TensorCord::Append(absl::string_view view, Tensor* tensor) {
+  chunks_.push_back(NewCordRepFromTensor(view, tensor));
+}
+
+inline size_t TensorCord::size() const {
+  return (chunks_.empty())
+             ? 0
+             : std::accumulate(chunk_begin(), chunk_end(), 0,
+                               [](size_t acc, absl::string_view b) {
+                                 return acc + b.size();
+                               });
+}
+
+inline TensorCord::ChunkIterator& TensorCord::ChunkIterator::operator++() {
+  assert(cord_ != nullptr);
+  assert(chunk_index_ < cord_->chunks_.size());
+  chunk_index_ += 1;
+  if (chunk_index_ != cord_->chunks_.size()) {
+    view_ = cord_->chunks_[chunk_index_]->view();
+  }
+  return *this;
+}
+
+inline TensorCord::ChunkIterator::ChunkIterator(const TensorCord* cord,
+                                                int index)
+    : cord_(cord), chunk_index_(index) {
+  if (index < cord_->chunks_.size()) {
+    view_ = cord_->chunks_[index]->view();
+  }
+}
+
+inline TensorCord::CordRep* TensorCord::NewCordRepFromTensor(
+    absl::string_view view, Tensor* tensor) {
+  if (view.size() <= TensorCord::CordRep::kMaxInlineSize) {
+    return new CordRep(view);
+  } else {
+    return new CordRep(view, &TensorBufReleaser, TensorBufWithRef(tensor));
+  }
+}
+
+inline void TensorCord::Cleanup() {
+  if (chunks_.empty()) return;
+  for (auto* rep : chunks_) {
+    rep->Unref();
+  }
+  chunks_.clear();
+}
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_TENSOR_CORD_H_
--- a/tensorflow/core/kernels/tensor_cord_test.cc
+++ b/tensorflow/core/kernels/tensor_cord_test.cc
@ -0,0 +1,291 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/tensor_cord.h"
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_reference.h"
+#include "tensorflow/core/framework/tensor_util.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/platform/cord.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+
+namespace tensorflow {
+
+namespace {
+
+void DoNothingReleaser(void*) {}
+
+TEST(TensorCordTest, Empty) {
+  TensorCord tc;
+  EXPECT_EQ(tc.size(), 0);
+  EXPECT_EQ(tc.chunk_begin(), tc.chunk_end());
+  auto chunks = tc.Chunks();
+  EXPECT_EQ(chunks.begin(), chunks.end());
+}
+
+TEST(TensorCordTest, ViewOfValue) {
+  TensorCord tc("abc", &DoNothingReleaser, nullptr);
+  EXPECT_EQ(*tc.chunk_begin(), "abc");
+  auto it = tc.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(it, tc.chunk_end());
+}
+
+TEST(TensorCordTest, Chunks) {
+  TensorCord tc("abc", &DoNothingReleaser, nullptr);
+  int counter = 0;
+  for (auto string_piece : tc.Chunks()) {
+    EXPECT_EQ(string_piece, "abc");
+    ++counter;
+  }
+  EXPECT_EQ(counter, 1);
+}
+
+// This function  takes an arg-less std::function that may have a closure, and
+// creates a std::function with no closure: one that can be cast
+// directly to a (*)(void*) function pointer that takes the original function.
+// Use it this way:
+//
+//  void callback_with_arg((* fn)(void*), void* arg) { fn(arg); }
+//
+//  auto fn = [&]() { ... }
+//  auto thunk = CreateThunkFor(fn);
+//  callback_with_arg(thunk, &fn);
+//
+// Idea from:
+//   http://bannalia.blogspot.com/2016/07/passing-capturing-c-lambda-functions-as.html
+template <typename T>
+CordRepReleaser CreateThunkFor(const T& fn) {
+  return [](void* ptr) { (*static_cast<T*>(ptr))(); };
+}
+
+TEST(TensorCordTest, Copy) {
+  int cleaned = 0;
+  auto cleaner = [&cleaned]() { ++cleaned; };
+  auto thunk = CreateThunkFor(cleaner);
+  TensorCord tc_copy;
+  string a = "abc";
+  {
+    TensorCord tc(a, thunk, &cleaner);
+    tc_copy = tc;
+  }
+  auto it = tc_copy.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(it, tc_copy.chunk_end());
+  EXPECT_EQ(cleaned, 0);
+  tc_copy = TensorCord();
+  EXPECT_EQ(cleaned, 1);
+}
+
+TEST(TensorCordTest, AppendCord) {
+  int cleaned_0 = 0;
+  int cleaned_1 = 0;
+  auto cleaner_0 = [&cleaned_0]() { ++cleaned_0; };
+  auto cleaner_1 = [&cleaned_1]() { ++cleaned_1; };
+  auto thunk_0 = CreateThunkFor(cleaner_0);
+  auto thunk_1 = CreateThunkFor(cleaner_1);
+  TensorCord tc_0("abc", thunk_0, &cleaner_0);
+  TensorCord tc_1("cba", thunk_1, &cleaner_1);
+  tc_0.Append(tc_1);
+  EXPECT_EQ(string(tc_0), "abccba");
+  auto it = tc_0.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(*it, "cba");
+  ++it;
+  EXPECT_EQ(it, tc_0.chunk_end());
+  tc_1 = TensorCord();
+  EXPECT_EQ(cleaned_0, 0);
+  EXPECT_EQ(cleaned_1, 0);
+  tc_0 = TensorCord();
+  EXPECT_EQ(cleaned_0, 1);
+  EXPECT_EQ(cleaned_1, 1);
+}
+
+TEST(TensorCordTest, AppendView) {
+  int cleaned_0 = 0;
+  int cleaned_1 = 0;
+  auto cleaner_0 = [&cleaned_0]() { ++cleaned_0; };
+  auto cleaner_1 = [&cleaned_1]() { ++cleaned_1; };
+  auto thunk_0 = CreateThunkFor(cleaner_0);
+  auto thunk_1 = CreateThunkFor(cleaner_1);
+  TensorCord tc_0("abc", thunk_0, &cleaner_0);
+  tc_0.Append("cba", thunk_1, &cleaner_1);
+  EXPECT_EQ(string(tc_0), "abccba");
+  auto it = tc_0.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(*it, "cba");
+  ++it;
+  EXPECT_EQ(it, tc_0.chunk_end());
+  EXPECT_EQ(cleaned_0, 0);
+  EXPECT_EQ(cleaned_1, 0);
+  tc_0 = TensorCord();
+  EXPECT_EQ(cleaned_0, 1);
+  EXPECT_EQ(cleaned_1, 1);
+}
+
+TEST(TensorCordTest, Move) {
+  int cleaned = 0;
+  auto cleaner = [&cleaned]() { ++cleaned; };
+  auto thunk = CreateThunkFor(cleaner);
+  TensorCord tc_copy;
+  string a = "abc";
+  {
+    TensorCord tc(a, thunk, &cleaner);
+    tc_copy = std::move(tc);
+  }
+  EXPECT_EQ(tc_copy.size(), 3);
+  auto it = tc_copy.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(it, tc_copy.chunk_end());
+  EXPECT_EQ(cleaned, 0);
+  tc_copy = TensorCord();
+  EXPECT_EQ(tc_copy.size(), 0);
+  EXPECT_EQ(cleaned, 1);
+}
+
+TEST(TensorCordTest, CopyConstructor) {
+  int cleaned = 0;
+  auto cleaner = [&cleaned]() { ++cleaned; };
+  auto thunk = CreateThunkFor(cleaner);
+  string a = "abc";
+  TensorCord tc(a, thunk, &cleaner);
+  TensorCord tc_copy(tc);
+  EXPECT_EQ(tc.size(), 3);
+  EXPECT_EQ(tc_copy.size(), 3);
+  auto it = tc_copy.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(it, tc_copy.chunk_end());
+  EXPECT_EQ(cleaned, 0);
+  tc = TensorCord();
+  EXPECT_EQ(cleaned, 0);
+  tc_copy = TensorCord();
+  EXPECT_EQ(cleaned, 1);
+}
+
+TEST(TensorCordTest, MoveConstructor) {
+  int cleaned = 0;
+  auto cleaner = [&cleaned]() { ++cleaned; };
+  auto thunk = CreateThunkFor(cleaner);
+  string a = "abc";
+  TensorCord tc(a, thunk, &cleaner);
+  TensorCord tc_copy(std::move(tc));
+  EXPECT_EQ(tc_copy.size(), 3);
+  auto it = tc_copy.chunk_begin();
+  EXPECT_EQ(*it, "abc");
+  ++it;
+  EXPECT_EQ(it, tc_copy.chunk_end());
+  EXPECT_EQ(cleaned, 0);
+  tc_copy = TensorCord();
+  EXPECT_EQ(cleaned, 1);
+}
+
+#ifdef PLATFORM_GOOGLE
+
+void TensorCopyFromTensorBenchmark(benchmark::State& state, int num_elem,
+                                   int string_size) {
+  Tensor strings(DT_STRING, {num_elem});
+  auto t = strings.flat<string>();
+  for (int i = 0; i < num_elem; ++i) {
+    t(i).insert(0, string_size, 'a');
+  }
+  for (auto _ : state) {
+    testing::DoNotOptimize(tensor::DeepCopy(strings));
+  }
+}
+
+void TensorCordFromTensorBenchmark(benchmark::State& state, int num_elem,
+                                   int string_size) {
+  Tensor strings(DT_STRING, {num_elem});
+  auto t = strings.flat<string>();
+  for (int i = 0; i < num_elem; ++i) {
+    t(i).insert(0, string_size, 'a');
+  }
+  for (auto _ : state) {
+    Tensor copy(DT_VARIANT, {num_elem});
+    auto t_copy = copy.flat<Variant>();
+    for (int i = 0; i < num_elem; ++i) {
+      t_copy(i) = TensorCord(t(i), &strings);
+    }
+  }
+}
+
+void CordReleaser(void* cord_ptr) { delete static_cast<absl::Cord*>(cord_ptr); }
+
+void TensorCordFromAbslCordBenchmark(benchmark::State& state, int num_elem,
+                                     int string_size) {
+  std::vector<absl::Cord> cords(num_elem);
+  for (int i = 0; i < num_elem; ++i) {
+    string s(string_size, 'a');
+    cords[i] = s;
+  }
+
+  for (auto _ : state) {
+    Tensor copy(DT_VARIANT, {num_elem});
+    auto t_copy = copy.flat<Variant>();
+    for (int i = 0; i < num_elem; ++i) {
+      auto my_cord = new absl::Cord(cords[i]);
+      t_copy(i) = TensorCord(*my_cord->chunk_begin(), CordReleaser, my_cord);
+    }
+  }
+}
+
+#define CreateBM(NUM_ELEM, STRING_SIZE)                                        \
+  void BM_TensorCopyFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE(  \
+      benchmark::State& state) {                                               \
+    TensorCopyFromTensorBenchmark(state, NUM_ELEM, STRING_SIZE);               \
+  }                                                                            \
+  BENCHMARK(                                                                   \
+      BM_TensorCopyFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE);  \
+  void BM_TensorCordFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE(  \
+      benchmark::State& state) {                                               \
+    TensorCordFromTensorBenchmark(state, NUM_ELEM, STRING_SIZE);               \
+  }                                                                            \
+  BENCHMARK(                                                                   \
+      BM_TensorCordFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE);  \
+  void                                                                         \
+      BM_TensorCordFromAbslCord_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE( \
+          benchmark::State& state) {                                           \
+    TensorCordFromAbslCordBenchmark(state, NUM_ELEM, STRING_SIZE);             \
+  }                                                                            \
+  BENCHMARK(                                                                   \
+      BM_TensorCordFromAbslCord_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE);
+
+#define CreateStringBMs(NUM_ELEM)           \
+  CreateBM(NUM_ELEM, /*STRING_SIZE=*/16);   \
+  CreateBM(NUM_ELEM, /*STRING_SIZE=*/32);   \
+  CreateBM(NUM_ELEM, /*STRING_SIZE=*/128);  \
+  CreateBM(NUM_ELEM, /*STRING_SIZE=*/1024); \
+  CreateBM(NUM_ELEM, /*STRING_SIZE=*/4096);
+
+CreateStringBMs(/*NUM_ELEM=*/1);
+CreateStringBMs(/*NUM_ELEM=*/16);
+CreateStringBMs(/*NUM_ELEM=*/32);
+CreateStringBMs(/*NUM_ELEM=*/64);
+CreateStringBMs(/*NUM_ELEM=*/128);
+
+#endif  // PLATFORM_GOOGLE
+
+}  // namespace
+
+}  // namespace tensorflow