TensorCord Implementation.

Benchmarks (when using the new inline Variant implementation):

~break-even runtime (1x-1.25x) between copying a string tensor and making N TensorCord objects inside a Variant is (with changes here to Variant inlining) at 16-32 bytes per string entry.

Also a significant reduction in memory usage since, y'know, we're not copying anymore.

(Settings: --runs 25 --benchtime 1s --perflab --copt "-mavx" //third_party/tensorflow/core/kernels:tensor_cord_test)

name                                                          time/op
BM_TensorCopyFromTensor_NumElem_1_StringSize_16                79.0ns ? 1%
BM_TensorCordFromTensor_NumElem_1_StringSize_16                95.1ns ? 1%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_16               122ns ? 1%
BM_TensorCopyFromTensor_NumElem_1_StringSize_32                 104ns ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_32                 106ns ? 1%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_32               123ns ? 1%
BM_TensorCopyFromTensor_NumElem_1_StringSize_128                106ns ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_128                106ns ? 1%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_128              123ns ? 1%
BM_TensorCopyFromTensor_NumElem_1_StringSize_1024               160ns ? 1%
BM_TensorCordFromTensor_NumElem_1_StringSize_1024               106ns ? 1%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_1024             122ns ? 1%
BM_TensorCopyFromTensor_NumElem_1_StringSize_4096               332ns ? 1%
BM_TensorCordFromTensor_NumElem_1_StringSize_4096               106ns ? 1%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_4096             124ns ? 1%
BM_TensorCopyFromTensor_NumElem_16_StringSize_16                263ns ? 1%
BM_TensorCordFromTensor_NumElem_16_StringSize_16                543ns ? 1%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_16              983ns ? 1%
BM_TensorCopyFromTensor_NumElem_16_StringSize_32                583ns ? 1%
BM_TensorCordFromTensor_NumElem_16_StringSize_32                748ns ? 2%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_32              986ns ? 1%
BM_TensorCopyFromTensor_NumElem_16_StringSize_128               635ns ? 3%
BM_TensorCordFromTensor_NumElem_16_StringSize_128               750ns ? 2%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_128             983ns ? 1%
BM_TensorCopyFromTensor_NumElem_16_StringSize_1024            1.62?s ? 2%
BM_TensorCordFromTensor_NumElem_16_StringSize_1024              749ns ? 2%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_1024            983ns ? 1%
BM_TensorCopyFromTensor_NumElem_16_StringSize_4096            5.22?s ? 6%
BM_TensorCordFromTensor_NumElem_16_StringSize_4096              748ns ? 1%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_4096          1.01?s ? 2%
BM_TensorCopyFromTensor_NumElem_32_StringSize_16                454ns ? 1%
BM_TensorCordFromTensor_NumElem_32_StringSize_16              1.00?s ? 2%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_16            1.91?s ? 1%
BM_TensorCopyFromTensor_NumElem_32_StringSize_32              1.10?s ? 2%
BM_TensorCordFromTensor_NumElem_32_StringSize_32              1.44?s ? 1%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_32            1.91?s ? 2%
BM_TensorCopyFromTensor_NumElem_32_StringSize_128             1.22?s ? 2%
BM_TensorCordFromTensor_NumElem_32_StringSize_128             1.44?s ? 1%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_128           1.91?s ? 1%
BM_TensorCopyFromTensor_NumElem_32_StringSize_1024            3.27?s ? 1%
BM_TensorCordFromTensor_NumElem_32_StringSize_1024            1.44?s ? 2%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_1024          1.92?s ? 2%
BM_TensorCopyFromTensor_NumElem_32_StringSize_4096            12.2?s ? 4%
BM_TensorCordFromTensor_NumElem_32_StringSize_4096            1.44?s ? 1%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_4096          1.97?s ? 1%
BM_TensorCopyFromTensor_NumElem_64_StringSize_16                851ns ? 1%
BM_TensorCordFromTensor_NumElem_64_StringSize_16              1.91?s ? 1%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_16            3.75?s ? 1%
BM_TensorCopyFromTensor_NumElem_64_StringSize_32              2.14?s ? 4%
BM_TensorCordFromTensor_NumElem_64_StringSize_32              2.81?s ? 1%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_32            3.76?s ? 1%
BM_TensorCopyFromTensor_NumElem_64_StringSize_128             2.36?s ? 4%
BM_TensorCordFromTensor_NumElem_64_StringSize_128             2.81?s ? 1%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_128           3.77?s ? 1%
BM_TensorCopyFromTensor_NumElem_64_StringSize_1024            6.67?s ? 4%
BM_TensorCordFromTensor_NumElem_64_StringSize_1024            2.81?s ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_1024          3.89?s ? 2%
BM_TensorCopyFromTensor_NumElem_64_StringSize_4096            28.7?s ? 5%
BM_TensorCordFromTensor_NumElem_64_StringSize_4096            2.81?s ? 1%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_4096          4.20?s ? 4%
BM_TensorCopyFromTensor_NumElem_128_StringSize_16             1.60?s ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_16             3.77?s ? 2%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_16           7.56?s ? 2%
BM_TensorCopyFromTensor_NumElem_128_StringSize_32             4.12?s ? 1%
BM_TensorCordFromTensor_NumElem_128_StringSize_32             5.59?s ? 1%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_32           7.67?s ? 2%
BM_TensorCopyFromTensor_NumElem_128_StringSize_128            4.86?s ? 4%
BM_TensorCordFromTensor_NumElem_128_StringSize_128            5.59?s ? 1%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_128          7.86?s ? 2%
BM_TensorCopyFromTensor_NumElem_128_StringSize_1024           15.5?s ? 4%
BM_TensorCordFromTensor_NumElem_128_StringSize_1024           5.58?s ? 1%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_1024         8.44?s ? 1%
BM_TensorCopyFromTensor_NumElem_128_StringSize_4096           58.8?s ? 4%
BM_TensorCordFromTensor_NumElem_128_StringSize_4096           5.58?s ? 1%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_4096         8.78?s ? 3%

name                                                          allocs/op
BM_TensorCopyFromTensor_NumElem_1_StringSize_16                  2.00 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_16                  3.00 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_16                4.00 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_32                  4.00 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_32                  3.00 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_32                4.00 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_128                 4.00 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_128                 3.00 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_128               4.00 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_1024                4.00 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_1024                3.00 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_1024              4.00 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_4096                4.00 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_4096                3.00 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_4096              4.00 ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_16                 2.00 ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_16                 18.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_16               34.0 ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_32                 34.0 ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_32                 18.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_32               34.0 ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_128                34.0 ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_128                18.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_128              34.0 ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_1024               34.0 ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_1024               18.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_1024             34.0 ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_4096               34.0 ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_4096               18.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_4096             34.0 ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_16                 2.00 ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_16                 34.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_16               66.0 ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_32                 66.0 ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_32                 34.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_32               66.0 ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_128                66.0 ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_128                34.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_128              66.0 ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_1024               66.0 ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_1024               34.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_1024             66.0 ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_4096               66.0 ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_4096               34.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_4096             66.0 ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_16                 2.00 ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_16                 66.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_16                130 ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_32                  130 ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_32                 66.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_32                130 ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_128                 130 ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_128                66.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_128               130 ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_1024                130 ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_1024               66.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_1024              130 ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_4096                130 ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_4096               66.0 ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_4096              130 ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_16                2.00 ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_16                 130 ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_16               258 ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_32                 258 ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_32                 130 ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_32               258 ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_128                258 ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_128                130 ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_128              258 ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_1024               258 ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_1024               130 ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_1024             258 ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_4096               258 ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_4096               130 ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_4096             258 ? 0%

name                                                          peak-mem(Bytes)/op
BM_TensorCopyFromTensor_NumElem_1_StringSize_16                  72.0 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_16                   168 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_16                 184 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_32                   168 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_32                   168 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_32                 184 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_128                  360 ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_128                  168 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_128                184 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_1024               2.15k ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_1024                 168 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_1024               184 ? 0%
BM_TensorCopyFromTensor_NumElem_1_StringSize_4096               8.30k ? 0%
BM_TensorCordFromTensor_NumElem_1_StringSize_4096                 168 ? 0%
BM_TensorCordFromAbslCord_NumElem_1_StringSize_4096               184 ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_16                  432 ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_16                1.97k ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_16              2.22k ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_32                1.25k ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_32                1.97k ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_32              2.22k ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_128               2.88k ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_128               1.97k ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_128             2.22k ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_1024              18.1k ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_1024              1.97k ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_1024            2.22k ? 0%
BM_TensorCopyFromTensor_NumElem_16_StringSize_4096              70.3k ? 0%
BM_TensorCordFromTensor_NumElem_16_StringSize_4096              1.97k ? 0%
BM_TensorCordFromAbslCord_NumElem_16_StringSize_4096            2.22k ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_16                  816 ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_16                3.89k ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_16              4.40k ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_32                2.40k ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_32                3.89k ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_32              4.40k ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_128               5.57k ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_128               3.89k ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_128             4.40k ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_1024              35.1k ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_1024              3.89k ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_1024            4.40k ? 0%
BM_TensorCopyFromTensor_NumElem_32_StringSize_4096               137k ? 0%
BM_TensorCordFromTensor_NumElem_32_StringSize_4096              3.89k ? 0%
BM_TensorCordFromAbslCord_NumElem_32_StringSize_4096            4.40k ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_16                1.58k ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_16                7.73k ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_16              8.75k ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_32                4.70k ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_32                7.73k ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_32              8.75k ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_128               10.9k ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_128               7.73k ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_128             8.75k ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_1024              69.2k ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_1024              7.73k ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_1024            8.75k ? 0%
BM_TensorCopyFromTensor_NumElem_64_StringSize_4096               269k ? 0%
BM_TensorCordFromTensor_NumElem_64_StringSize_4096              7.73k ? 0%
BM_TensorCordFromAbslCord_NumElem_64_StringSize_4096            8.75k ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_16               3.12k ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_16               15.4k ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_16             17.5k ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_32               9.31k ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_32               15.4k ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_32             17.5k ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_128              21.7k ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_128              15.4k ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_128            17.5k ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_1024              137k ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_1024             15.4k ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_1024           17.5k ? 0%
BM_TensorCopyFromTensor_NumElem_128_StringSize_4096              534k ? 0%
BM_TensorCordFromTensor_NumElem_128_StringSize_4096             15.4k ? 0%
BM_TensorCordFromAbslCord_NumElem_128_StringSize_4096           17.5k ? 0%

PiperOrigin-RevId: 276768889
Change-Id: Ic3024dda576307d786564ec24bf22e320712ccf5
This commit is contained in:
Eugene Brevdo 2019-10-25 15:07:20 -07:00 committed by TensorFlower Gardener
parent 49feca4ec7
commit a13e3ebc70
5 changed files with 781 additions and 0 deletions

View File

@ -653,6 +653,7 @@ class Tensor {
friend class DMAHelper; // For access to buf_.
friend class TensorCApi; // For access to buf_.
friend class TensorCord; // For access to buf_.
friend class TensorReference; // For access to buf_.
friend class VariableOp; // For access to set_shape.
friend class AutoReloadVariableOp; // For access to set_shape.

View File

@ -2587,6 +2587,30 @@ tf_kernel_library(
deps = DYNAMIC_DEPS,
)
cc_library(
name = "tensor_cord",
srcs = ["tensor_cord.cc"],
hdrs = ["tensor_cord.h"],
deps = [
"//tensorflow/core:framework",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
],
)
tf_cc_test(
name = "tensor_cord_test",
srcs = ["tensor_cord_test.cc"],
deps = [
":tensor_cord",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core/platform:cord",
],
)
LOOKUP_DEPS = [
":bounds_check",
":initializable_lookup_table",

View File

@ -0,0 +1,107 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/kernels/tensor_cord.h"
#include <cstring>
#include "tensorflow/core/framework/variant.h"
namespace tensorflow {
static_assert(Variant::CanInlineType<TensorCord>(),
"TensorCord should be inlined into Variants");
TensorCord::CordRep::~CordRep() {
if (!is_inline_ && rep_.external.releaser) {
rep_.external.releaser(rep_.external.arg);
}
}
TensorCord::~TensorCord() { Cleanup(); }
void TensorCord::Encode(VariantTensorData* data) const {
data->metadata_string().clear();
for (auto rep : Chunks()) {
data->metadata_string().append(rep.data(), rep.size());
}
}
bool TensorCord::Decode(VariantTensorData data) {
auto* str = new string(std::move(data.metadata_string()));
Cleanup();
chunks_.push_back(new CordRep(absl::string_view(*str), &StringReleaser, str));
return true;
}
TensorBuffer* TensorCord::TensorBufWithRef(Tensor* tensor) {
TensorBuffer* buf = tensor->buf_;
buf->Ref();
return buf;
}
void TensorCord::TensorBufReleaser(void* tensor_buffer) {
static_cast<TensorBuffer*>(tensor_buffer)->Unref();
}
void TensorCord::StringReleaser(void* str_ptr) {
delete static_cast<string*>(str_ptr);
}
namespace {
// Helpers for STLStringResizeUninitialized
// HasMember is true_type or false_type, depending on whether or not
// T has a __resize_default_init member. Resize will call the
// __resize_default_init member if it exists, and will call the resize
// member otherwise.
template <typename string_type, typename = void>
struct ResizeUninitializedTraits {
using HasMember = std::false_type;
static void Resize(string_type* s, size_t new_size) { s->resize(new_size); }
};
// __resize_default_init is provided by libc++ >= 8.0.
template <typename string_type>
struct ResizeUninitializedTraits<
string_type, absl::void_t<decltype(std::declval<string_type&>()
.__resize_default_init(237))> > {
using HasMember = std::true_type;
static void Resize(string_type* s, size_t new_size) {
s->__resize_default_init(new_size);
}
};
// Resize string `s` to `new_size`, leaving the data uninitialized.
static inline void STLStringResizeUninitialized(string* s, size_t new_size) {
ResizeUninitializedTraits<string>::Resize(s, new_size);
}
} // namespace
TensorCord::operator string() const {
string out;
STLStringResizeUninitialized(&out, size());
char* data = const_cast<char*>(out.data());
for (auto* rep : chunks_) {
auto view = rep->view();
memcpy(data, view.data(), view.size());
data += view.size();
}
DCHECK_EQ(data - out.data(), size());
return out;
}
} // namespace tensorflow

View File

@ -0,0 +1,358 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_KERNELS_TENSOR_CORD_H_
#define TENSORFLOW_CORE_KERNELS_TENSOR_CORD_H_
#include <array>
#include <numeric>
#include "absl/container/inlined_vector.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "tensorflow/core/framework/variant_tensor_data.h"
namespace tensorflow {
typedef void (*CordRepReleaser)(void*);
class TensorCord {
// A TensorCord keeps a view into some data, and a cleanup method to clean up
// that data when the TensorCord destructor is called. Copying a TensorCord
// increments a reference count to the cleanup method, and so the cleanup
// method is only called when all copies of the original TensorCord are
// cleared.
//
// Example:
//
// const string& s = t.scalar<string>()();
// TensorCord tc(s, &t);
// ASSERT_EQ(s, tc.view());
// TensorCord copy(tc);
// tc = TensorCord(); // cleanup not called; the reference is held by `copy`.
// copy = TensorCord(); // cleanup happens now, the reference is destroyed.
//
// Another example:
//
// void TensorProtoDeleter(void* ptr) {
// delete static_cast<TensorProto*>(ptr);
// }
//
// auto p = absl::MakeUnique<TensorProto>(...);
// absl::string_view content(p->tensor_content());
// TensorCord tc(content, TensorProtoDeleter, p.release());
//
public:
static constexpr const char kTypeName[] = "tensorflow::TensorCord";
TensorCord() : chunks_() {}
~TensorCord();
// Args:
// `view`: should point to a location in memory that is guaranteed to remain
// valid until `releaser` is called.
// `releaser`: A callback that will be executed when there are no references
// left on `view`. It will be called via `releaser(memory)`.
// `memory`: The argument passed to `releaser` when it is called.
//
// You are STRONGLY advised to provide a non-null `releaser`, and a pointer
// to the underlying data (while ensuring that the data will not be deleted
// until `releaser(memory)` is called). Otherwise the TensorCord may
// outlive the data backing `view`.
TensorCord(absl::string_view view, CordRepReleaser releaser,
void* memory = nullptr)
: chunks_({new CordRep(view, releaser, memory)}) {}
// Args:
// `view`: should point to a location in memory backed by `tensor`,
// e.g., `view` is a string_view on a tstring which is an element
// of `tensor`. Furthermore, the associated tstring is not expected
// to be modified in such a way that the underlying memory will
// be changed after this TensorCord is created.
TensorCord(absl::string_view view, Tensor* tensor)
: chunks_({NewCordRepFromTensor(view, tensor)}) {}
// Disallow construction with empty callback or empty tensor.
TensorCord(absl::string_view view, std::nullptr_t, void* memory) = delete;
TensorCord(absl::string_view view, std::nullptr_t) = delete;
TensorCord(const TensorCord& other);
TensorCord(TensorCord&& other) noexcept;
TensorCord& operator=(const TensorCord& other);
TensorCord& operator=(TensorCord&& other) noexcept;
void Append(const TensorCord& other);
void Append(absl::string_view view, CordRepReleaser releaser,
void* memory = nullptr);
void Append(absl::string_view view, Tensor* tensor);
// Disallow Appends with empty callbacks or empty tensors.
void Append(absl::string_view view, std::nullptr_t, void* memory) = delete;
void Append(absl::string_view view, std::nullptr_t) = delete;
size_t size() const;
bool empty() const { return size() == 0; }
// NOTE: This performs an expensive copy of the underlying data.
explicit operator string() const;
class ChunkIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = value_type;
ChunkIterator& operator++();
ChunkIterator operator++(int) {
ChunkIterator tmp(*this);
operator++();
return tmp;
}
bool operator==(const ChunkIterator& other) const {
return (cord_ == other.cord_ && chunk_index_ == other.chunk_index_);
}
bool operator!=(const ChunkIterator& other) const {
return !(*this == other);
}
reference operator*() const {
assert(cord_ != nullptr);
return view_;
}
pointer operator->() const {
assert(cord_ != nullptr);
return &view_;
}
friend class TensorCord;
private:
// Constructs a `begin()` iterator from `cord`.
explicit ChunkIterator(const TensorCord* cord, int chunk_index);
const TensorCord* const cord_;
int chunk_index_;
absl::string_view view_;
};
class ChunkRange {
public:
explicit ChunkRange(const TensorCord* cord) : cord_(cord) {}
ChunkIterator begin() const { return ChunkIterator(cord_, 0); }
ChunkIterator end() const {
return ChunkIterator(cord_, cord_->chunks_.size());
}
private:
const TensorCord* cord_;
};
// Note that the ordinary caveats of temporary lifetime extension apply:
//
// void Process() {
// for (absl::string_view chunk : CordFactory().Chunks()) {
// // The temporary Cord returned by CordFactory has been destroyed!
// }
// }
ChunkRange Chunks() const { return ChunkRange(this); }
ChunkIterator chunk_begin() const { return ChunkIterator(this, 0); }
ChunkIterator chunk_end() const {
return ChunkIterator(this, chunks_.size());
}
static string TypeName() { return kTypeName; }
string DebugString() const {
return absl::StrCat("<TensorCord size=", size(), ">");
}
void Encode(VariantTensorData* data) const;
bool Decode(VariantTensorData data);
private:
void Cleanup();
class CordRep : public core::RefCounted {
public:
CordRep(absl::string_view view, CordRepReleaser releaser,
void* arg = nullptr)
: is_inline_(false), rep_{.external = {view, releaser, arg}} {}
// **WARNING** Only use this constructor if
// view.size() < CordRep::kMaxInlineSize.
explicit CordRep(absl::string_view view)
: is_inline_(true), rep_{.internal = InlineFromView(view)} {}
~CordRep() override;
absl::string_view view() const {
if (is_inline_) {
return absl::string_view(
rep_.internal.data() + 1,
*reinterpret_cast<const uint8*>(rep_.internal.data()));
} else {
return rep_.external.view;
}
}
private:
friend class TensorCord;
struct ExternalRep {
absl::string_view view;
CordRepReleaser releaser;
void* arg;
};
// We save the size in the first byte, so subtract 1.
static constexpr int kMaxInlineSize = sizeof(ExternalRep) - 1;
static_assert(kMaxInlineSize < 255,
"Cannot store size of InlineRep in a single byte.");
// The first byte stores the size as a uint8. The rest of the bytes are the
// string itself.
using InlineRep = std::array<char, sizeof(ExternalRep)>;
static InlineRep InlineFromView(absl::string_view view) {
DCHECK_LT(view.size(), kMaxInlineSize);
InlineRep rep;
*reinterpret_cast<uint8*>(rep.data()) = view.size();
std::memcpy(static_cast<char*>(rep.data() + 1), view.data(), view.size());
return rep;
}
// Member variables.
const bool is_inline_;
const union {
InlineRep internal;
ExternalRep external;
} rep_;
};
static TensorBuffer* TensorBufWithRef(Tensor* tensor);
static void TensorBufReleaser(void* tensor_buffer);
static void StringReleaser(void* str_ptr);
static CordRep* NewCordRepFromTensor(absl::string_view view, Tensor* tensor);
absl::InlinedVector<CordRep*, 2> chunks_;
};
inline TensorCord::TensorCord(const TensorCord& other)
: chunks_(other.chunks_) {
for (auto* rep : chunks_) {
rep->Ref();
}
}
inline TensorCord::TensorCord(TensorCord&& other) noexcept
: chunks_(std::move(other.chunks_)) {
other.chunks_.clear();
}
inline TensorCord& TensorCord::operator=(const TensorCord& other) {
Cleanup();
chunks_ = other.chunks_;
for (auto* rep : chunks_) {
rep->Ref();
}
return *this;
}
inline TensorCord& TensorCord::operator=(TensorCord&& other) noexcept {
Cleanup();
std::swap(chunks_, other.chunks_);
return *this;
}
inline void TensorCord::Append(const TensorCord& other) {
for (auto* rep : other.chunks_) {
chunks_.push_back(rep);
rep->Ref();
}
}
inline void TensorCord::Append(absl::string_view view, CordRepReleaser releaser,
void* memory) {
chunks_.push_back(new CordRep(view, releaser, memory));
}
inline void TensorCord::Append(absl::string_view view, Tensor* tensor) {
chunks_.push_back(NewCordRepFromTensor(view, tensor));
}
inline size_t TensorCord::size() const {
return (chunks_.empty())
? 0
: std::accumulate(chunk_begin(), chunk_end(), 0,
[](size_t acc, absl::string_view b) {
return acc + b.size();
});
}
inline TensorCord::ChunkIterator& TensorCord::ChunkIterator::operator++() {
assert(cord_ != nullptr);
assert(chunk_index_ < cord_->chunks_.size());
chunk_index_ += 1;
if (chunk_index_ != cord_->chunks_.size()) {
view_ = cord_->chunks_[chunk_index_]->view();
}
return *this;
}
inline TensorCord::ChunkIterator::ChunkIterator(const TensorCord* cord,
int index)
: cord_(cord), chunk_index_(index) {
if (index < cord_->chunks_.size()) {
view_ = cord_->chunks_[index]->view();
}
}
inline TensorCord::CordRep* TensorCord::NewCordRepFromTensor(
absl::string_view view, Tensor* tensor) {
if (view.size() <= TensorCord::CordRep::kMaxInlineSize) {
return new CordRep(view);
} else {
return new CordRep(view, &TensorBufReleaser, TensorBufWithRef(tensor));
}
}
inline void TensorCord::Cleanup() {
if (chunks_.empty()) return;
for (auto* rep : chunks_) {
rep->Unref();
}
chunks_.clear();
}
} // namespace tensorflow
#endif // TENSORFLOW_CORE_KERNELS_TENSOR_CORD_H_

View File

@ -0,0 +1,291 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/kernels/tensor_cord.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_reference.h"
#include "tensorflow/core/framework/tensor_util.h"
#include "tensorflow/core/framework/variant.h"
#include "tensorflow/core/framework/variant_encode_decode.h"
#include "tensorflow/core/platform/cord.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/test_benchmark.h"
namespace tensorflow {
namespace {
void DoNothingReleaser(void*) {}
TEST(TensorCordTest, Empty) {
TensorCord tc;
EXPECT_EQ(tc.size(), 0);
EXPECT_EQ(tc.chunk_begin(), tc.chunk_end());
auto chunks = tc.Chunks();
EXPECT_EQ(chunks.begin(), chunks.end());
}
TEST(TensorCordTest, ViewOfValue) {
TensorCord tc("abc", &DoNothingReleaser, nullptr);
EXPECT_EQ(*tc.chunk_begin(), "abc");
auto it = tc.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(it, tc.chunk_end());
}
TEST(TensorCordTest, Chunks) {
TensorCord tc("abc", &DoNothingReleaser, nullptr);
int counter = 0;
for (auto string_piece : tc.Chunks()) {
EXPECT_EQ(string_piece, "abc");
++counter;
}
EXPECT_EQ(counter, 1);
}
// This function takes an arg-less std::function that may have a closure, and
// creates a std::function with no closure: one that can be cast
// directly to a (*)(void*) function pointer that takes the original function.
// Use it this way:
//
// void callback_with_arg((* fn)(void*), void* arg) { fn(arg); }
//
// auto fn = [&]() { ... }
// auto thunk = CreateThunkFor(fn);
// callback_with_arg(thunk, &fn);
//
// Idea from:
// http://bannalia.blogspot.com/2016/07/passing-capturing-c-lambda-functions-as.html
template <typename T>
CordRepReleaser CreateThunkFor(const T& fn) {
return [](void* ptr) { (*static_cast<T*>(ptr))(); };
}
TEST(TensorCordTest, Copy) {
int cleaned = 0;
auto cleaner = [&cleaned]() { ++cleaned; };
auto thunk = CreateThunkFor(cleaner);
TensorCord tc_copy;
string a = "abc";
{
TensorCord tc(a, thunk, &cleaner);
tc_copy = tc;
}
auto it = tc_copy.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(it, tc_copy.chunk_end());
EXPECT_EQ(cleaned, 0);
tc_copy = TensorCord();
EXPECT_EQ(cleaned, 1);
}
TEST(TensorCordTest, AppendCord) {
int cleaned_0 = 0;
int cleaned_1 = 0;
auto cleaner_0 = [&cleaned_0]() { ++cleaned_0; };
auto cleaner_1 = [&cleaned_1]() { ++cleaned_1; };
auto thunk_0 = CreateThunkFor(cleaner_0);
auto thunk_1 = CreateThunkFor(cleaner_1);
TensorCord tc_0("abc", thunk_0, &cleaner_0);
TensorCord tc_1("cba", thunk_1, &cleaner_1);
tc_0.Append(tc_1);
EXPECT_EQ(string(tc_0), "abccba");
auto it = tc_0.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(*it, "cba");
++it;
EXPECT_EQ(it, tc_0.chunk_end());
tc_1 = TensorCord();
EXPECT_EQ(cleaned_0, 0);
EXPECT_EQ(cleaned_1, 0);
tc_0 = TensorCord();
EXPECT_EQ(cleaned_0, 1);
EXPECT_EQ(cleaned_1, 1);
}
TEST(TensorCordTest, AppendView) {
int cleaned_0 = 0;
int cleaned_1 = 0;
auto cleaner_0 = [&cleaned_0]() { ++cleaned_0; };
auto cleaner_1 = [&cleaned_1]() { ++cleaned_1; };
auto thunk_0 = CreateThunkFor(cleaner_0);
auto thunk_1 = CreateThunkFor(cleaner_1);
TensorCord tc_0("abc", thunk_0, &cleaner_0);
tc_0.Append("cba", thunk_1, &cleaner_1);
EXPECT_EQ(string(tc_0), "abccba");
auto it = tc_0.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(*it, "cba");
++it;
EXPECT_EQ(it, tc_0.chunk_end());
EXPECT_EQ(cleaned_0, 0);
EXPECT_EQ(cleaned_1, 0);
tc_0 = TensorCord();
EXPECT_EQ(cleaned_0, 1);
EXPECT_EQ(cleaned_1, 1);
}
TEST(TensorCordTest, Move) {
int cleaned = 0;
auto cleaner = [&cleaned]() { ++cleaned; };
auto thunk = CreateThunkFor(cleaner);
TensorCord tc_copy;
string a = "abc";
{
TensorCord tc(a, thunk, &cleaner);
tc_copy = std::move(tc);
}
EXPECT_EQ(tc_copy.size(), 3);
auto it = tc_copy.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(it, tc_copy.chunk_end());
EXPECT_EQ(cleaned, 0);
tc_copy = TensorCord();
EXPECT_EQ(tc_copy.size(), 0);
EXPECT_EQ(cleaned, 1);
}
TEST(TensorCordTest, CopyConstructor) {
int cleaned = 0;
auto cleaner = [&cleaned]() { ++cleaned; };
auto thunk = CreateThunkFor(cleaner);
string a = "abc";
TensorCord tc(a, thunk, &cleaner);
TensorCord tc_copy(tc);
EXPECT_EQ(tc.size(), 3);
EXPECT_EQ(tc_copy.size(), 3);
auto it = tc_copy.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(it, tc_copy.chunk_end());
EXPECT_EQ(cleaned, 0);
tc = TensorCord();
EXPECT_EQ(cleaned, 0);
tc_copy = TensorCord();
EXPECT_EQ(cleaned, 1);
}
TEST(TensorCordTest, MoveConstructor) {
int cleaned = 0;
auto cleaner = [&cleaned]() { ++cleaned; };
auto thunk = CreateThunkFor(cleaner);
string a = "abc";
TensorCord tc(a, thunk, &cleaner);
TensorCord tc_copy(std::move(tc));
EXPECT_EQ(tc_copy.size(), 3);
auto it = tc_copy.chunk_begin();
EXPECT_EQ(*it, "abc");
++it;
EXPECT_EQ(it, tc_copy.chunk_end());
EXPECT_EQ(cleaned, 0);
tc_copy = TensorCord();
EXPECT_EQ(cleaned, 1);
}
#ifdef PLATFORM_GOOGLE
void TensorCopyFromTensorBenchmark(benchmark::State& state, int num_elem,
int string_size) {
Tensor strings(DT_STRING, {num_elem});
auto t = strings.flat<string>();
for (int i = 0; i < num_elem; ++i) {
t(i).insert(0, string_size, 'a');
}
for (auto _ : state) {
testing::DoNotOptimize(tensor::DeepCopy(strings));
}
}
void TensorCordFromTensorBenchmark(benchmark::State& state, int num_elem,
int string_size) {
Tensor strings(DT_STRING, {num_elem});
auto t = strings.flat<string>();
for (int i = 0; i < num_elem; ++i) {
t(i).insert(0, string_size, 'a');
}
for (auto _ : state) {
Tensor copy(DT_VARIANT, {num_elem});
auto t_copy = copy.flat<Variant>();
for (int i = 0; i < num_elem; ++i) {
t_copy(i) = TensorCord(t(i), &strings);
}
}
}
void CordReleaser(void* cord_ptr) { delete static_cast<absl::Cord*>(cord_ptr); }
void TensorCordFromAbslCordBenchmark(benchmark::State& state, int num_elem,
int string_size) {
std::vector<absl::Cord> cords(num_elem);
for (int i = 0; i < num_elem; ++i) {
string s(string_size, 'a');
cords[i] = s;
}
for (auto _ : state) {
Tensor copy(DT_VARIANT, {num_elem});
auto t_copy = copy.flat<Variant>();
for (int i = 0; i < num_elem; ++i) {
auto my_cord = new absl::Cord(cords[i]);
t_copy(i) = TensorCord(*my_cord->chunk_begin(), CordReleaser, my_cord);
}
}
}
#define CreateBM(NUM_ELEM, STRING_SIZE) \
void BM_TensorCopyFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE( \
benchmark::State& state) { \
TensorCopyFromTensorBenchmark(state, NUM_ELEM, STRING_SIZE); \
} \
BENCHMARK( \
BM_TensorCopyFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE); \
void BM_TensorCordFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE( \
benchmark::State& state) { \
TensorCordFromTensorBenchmark(state, NUM_ELEM, STRING_SIZE); \
} \
BENCHMARK( \
BM_TensorCordFromTensor_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE); \
void \
BM_TensorCordFromAbslCord_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE( \
benchmark::State& state) { \
TensorCordFromAbslCordBenchmark(state, NUM_ELEM, STRING_SIZE); \
} \
BENCHMARK( \
BM_TensorCordFromAbslCord_NumElem_##NUM_ELEM##_StringSize_##STRING_SIZE);
#define CreateStringBMs(NUM_ELEM) \
CreateBM(NUM_ELEM, /*STRING_SIZE=*/16); \
CreateBM(NUM_ELEM, /*STRING_SIZE=*/32); \
CreateBM(NUM_ELEM, /*STRING_SIZE=*/128); \
CreateBM(NUM_ELEM, /*STRING_SIZE=*/1024); \
CreateBM(NUM_ELEM, /*STRING_SIZE=*/4096);
CreateStringBMs(/*NUM_ELEM=*/1);
CreateStringBMs(/*NUM_ELEM=*/16);
CreateStringBMs(/*NUM_ELEM=*/32);
CreateStringBMs(/*NUM_ELEM=*/64);
CreateStringBMs(/*NUM_ELEM=*/128);
#endif // PLATFORM_GOOGLE
} // namespace
} // namespace tensorflow