Speed up creation of tensors from compressed TensorProtos by 2-3x.

This should speed up some TF models optimized by Grappler in particular, since Grappler tries to compress all constants in a graph.

Run on XXXXX (72 X 2991 MHz CPUs); 2019-09-13T15:55:01.194485871-07:00
CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_FromProto/512                         114       116     -1.8%
BM_FromProto/4k                          692       671     +3.0%
BM_FromProto/32k                        8675      8713     -0.4%
BM_FromProto/256k                     183931    184131     -0.1%
BM_FromProto/1M                       640952    638278     +0.4%
BM_FromProtoCompressed/512               215       118    +45.1%
BM_FromProtoCompressed/4k               1283       490    +61.8%
BM_FromProtoCompressed/32k             14115      8324    +41.0%
BM_FromProtoCompressed/256k            76930     32191    +58.2%
BM_FromProtoCompressed/1M             326284    170167    +47.8%
BM_FromProtoCompressedZero/512           215       119    +44.7%
BM_FromProtoCompressedZero/4k           1302       490    +62.4%
BM_FromProtoCompressedZero/32k         14333      8160    +43.1%
BM_FromProtoCompressedZero/256k        77032     32110    +58.3%
BM_FromProtoCompressedZero/1M         329943    171449    +48.0%
PiperOrigin-RevId: 269027674
This commit is contained in:
A. Unique TensorFlower 2019-09-13 19:39:38 -07:00 committed by TensorFlower Gardener
parent df1b3b396b
commit 4bd8a42706
5 changed files with 121 additions and 15 deletions

View File

@ -514,8 +514,13 @@ TensorBuffer* FromProtoField(Allocator* a, const TensorProto& in, int64 n) {
std::copy_n(begin, n, data);
} else {
std::copy_n(begin, in_n, data);
const T& last = *(data + in_n - 1);
std::fill_n(data + in_n, n - in_n, last);
if (std::is_trivially_copyable<T>::value) {
const T last = *(data + in_n - 1);
std::fill_n(data + in_n, n - in_n, last);
} else {
const T& last = *(data + in_n - 1);
std::fill_n(data + in_n, n - in_n, last);
}
}
}
@ -648,14 +653,14 @@ bool Tensor::IsInitialized() const {
}
void Tensor::CheckType(DataType expected_dtype) const {
CHECK_EQ(dtype(), expected_dtype) << " "
<< DataTypeString(expected_dtype) << " expected, got "
CHECK_EQ(dtype(), expected_dtype)
<< " " << DataTypeString(expected_dtype) << " expected, got "
<< DataTypeString(dtype());
}
void Tensor::CheckTypeAndIsAligned(DataType expected_dtype) const {
CHECK_EQ(dtype(), expected_dtype) << " "
<< DataTypeString(expected_dtype) << " expected, got "
CHECK_EQ(dtype(), expected_dtype)
<< " " << DataTypeString(expected_dtype) << " expected, got "
<< DataTypeString(dtype());
CHECK(IsAligned()) << "ptr = " << base<void>();
}

View File

@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/core/framework/tensor.pb.h"
#include "tensorflow/core/framework/tensor_testutil.h"
#include "tensorflow/core/framework/tensor_util.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/variant.h"
#include "tensorflow/core/framework/variant_encode_decode.h"
@ -1518,5 +1519,59 @@ void BM_CreateAndDestroyHostScalarOptimized(int iters) {
}
BENCHMARK(BM_CreateAndDestroyHostScalarOptimized);
static void BM_FromProto(int iters, int size) {
testing::StopTiming();
TensorShape shape({size});
Allocator* allocator = cpu_allocator();
Tensor a(allocator, DT_FLOAT, shape);
std::fill_n(a.flat<float>().data(), size, 42.0);
TensorProto p;
a.AsProtoField(&p);
testing::StartTiming();
while (--iters) {
Tensor b;
ASSERT_TRUE(b.FromProto(p));
}
testing::StopTiming();
}
BENCHMARK(BM_FromProto)->Range(1, 1 << 20);
static void BM_FromProtoCompressed(int iters, int size) {
testing::StopTiming();
TensorShape shape({size});
Allocator* allocator = cpu_allocator();
Tensor a(allocator, DT_FLOAT, shape);
std::fill_n(a.flat<float>().data(), size, 42.0f);
TensorProto p;
a.AsProtoField(&p);
tensor::CompressTensorProtoInPlace(&p);
testing::StartTiming();
while (--iters) {
Tensor b;
ASSERT_TRUE(b.FromProto(p));
}
testing::StopTiming();
}
BENCHMARK(BM_FromProtoCompressed)->Range(1, 1 << 20);
static void BM_FromProtoCompressedZero(int iters, int size) {
testing::StopTiming();
TensorShape shape({size});
Allocator* allocator = cpu_allocator();
Tensor a(allocator, DT_FLOAT, shape);
std::fill_n(a.flat<float>().data(), size, 0);
a.flat<float>()(0) = 1;
TensorProto p;
a.AsProtoField(&p);
tensor::CompressTensorProtoInPlace(&p);
testing::StartTiming();
while (--iters) {
Tensor b;
ASSERT_TRUE(b.FromProto(p));
}
testing::StopTiming();
}
BENCHMARK(BM_FromProtoCompressedZero)->Range(1, 1 << 20);
} // namespace
} // namespace tensorflow

View File

@ -243,6 +243,12 @@ bool CompressTensorContent(float min_compression_ratio,
}
tensor->clear_tensor_content();
}
if (new_num_values == 1) {
const T value = TypeHelper::GetValue(0, *tensor);
if (value == T()) {
TypeHelper::Truncate(0, tensor);
}
}
return true;
}
@ -287,7 +293,8 @@ bool CompressRepeatedField(float min_compression_ratio,
last_index = i + 1;
}
}
const int64 num_truncated_proto_values = last_index + 1;
const int64 num_truncated_proto_values =
(last_value == T() && last_index == 0) ? 0 : last_index + 1;
const int64 num_bytes_as_field =
num_truncated_proto_values * sizeof(FieldType);
const int64 num_bytes_as_tensor_content = num_tensor_values * sizeof(T);

View File

@ -455,41 +455,81 @@ TEST(TensorProtoUtil, CompressTensorProtoInPlaceTooSmall) {
EXPECT_FALSE(tensor::CompressTensorProtoInPlace(&tensor_proto));
}
TEST(TensorProtoUtil, CompressTensorProtoInPlaceAllEqual) {
TEST(TensorProtoUtil, CompressTensorProtoInPlaceAllZero) {
const int kLength = 64;
TensorProto tensor_proto =
tensor::CreateTensorProto(std::vector<float>(kLength), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<float>::NumValues(tensor_proto),
1);
0);
tensor_proto =
tensor::CreateTensorProto(std::vector<int>(kLength), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<int>::NumValues(tensor_proto),
1);
0);
tensor_proto =
tensor::CreateTensorProto(std::vector<uint8>(kLength), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<uint8>::NumValues(tensor_proto),
1);
0);
tensor_proto =
tensor::CreateTensorProto(std::vector<bool>(kLength), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<bool>::NumValues(tensor_proto),
1);
0);
tensor_proto =
tensor::CreateTensorProto(std::vector<Eigen::half>(kLength), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(
tensor::internal::TensorProtoHelper<Eigen::half>::NumValues(tensor_proto),
1);
0);
tensor_proto = tensor::CreateTensorProto(
std::vector<std::complex<float>>(kLength), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<std::complex<float>>::NumValues(
tensor_proto),
0);
}
TEST(TensorProtoUtil, CompressTensorProtoInPlaceAllOnes) {
const int kLength = 64;
TensorProto tensor_proto =
tensor::CreateTensorProto(std::vector<float>(kLength, 1), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<float>::NumValues(tensor_proto),
1);
tensor_proto =
tensor::CreateTensorProto(std::vector<int>(kLength, 1), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<int>::NumValues(tensor_proto),
1);
tensor_proto =
tensor::CreateTensorProto(std::vector<uint8>(kLength, 1), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<uint8>::NumValues(tensor_proto),
1);
tensor_proto =
tensor::CreateTensorProto(std::vector<bool>(kLength, true), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<bool>::NumValues(tensor_proto),
1);
tensor_proto = tensor::CreateTensorProto(
std::vector<Eigen::half>(kLength, Eigen::half(1.0)), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(
tensor::internal::TensorProtoHelper<Eigen::half>::NumValues(tensor_proto),
1);
tensor_proto = tensor::CreateTensorProto(
std::vector<std::complex<float>>(kLength, 1), {kLength});
EXPECT_TRUE(tensor::CompressTensorProtoInPlace(&tensor_proto));
EXPECT_EQ(tensor::internal::TensorProtoHelper<std::complex<float>>::NumValues(
tensor_proto),
1);

View File

@ -991,8 +991,7 @@ TEST_F(MetaOptimizerTest, CompressConstants) {
found_zeros = true;
EXPECT_EQ(node.op(), "Const");
const TensorProto& zeroes_t = node.attr().at("value").tensor();
EXPECT_EQ(zeroes_t.float_val_size(), 1);
EXPECT_EQ(zeroes_t.float_val(0), 0.0f);
EXPECT_EQ(zeroes_t.float_val_size(), 0);
} else if (node.name() == "host_ones") {
found_host_ones = true;
EXPECT_EQ(node.op(), "HostConst");