Internal tests cleanup.
PiperOrigin-RevId: 339820260 Change-Id: Ic704d3ec6c1d5e0a4155b6a49c14b977bf264716
This commit is contained in:
parent
92f946352c
commit
b62ccde60d
tensorflow/core
@ -2587,11 +2587,9 @@ TEST(DirectSessionTest,
|
|||||||
|
|
||||||
// A simple benchmark for the overhead of `DirectSession::Run()` calls
|
// A simple benchmark for the overhead of `DirectSession::Run()` calls
|
||||||
// with varying numbers of feeds/fetches.
|
// with varying numbers of feeds/fetches.
|
||||||
void FeedFetchBenchmarkHelper(int iters, int num_feeds, bool use_make_callable,
|
void FeedFetchBenchmarkHelper(::testing::benchmark::State& state, int num_feeds,
|
||||||
int inter_op_threads,
|
bool use_make_callable, int inter_op_threads,
|
||||||
bool use_single_threaded_executor) {
|
bool use_single_threaded_executor) {
|
||||||
testing::StopTiming();
|
|
||||||
|
|
||||||
Tensor value(DT_FLOAT, TensorShape());
|
Tensor value(DT_FLOAT, TensorShape());
|
||||||
value.flat<float>()(0) = 37.0;
|
value.flat<float>()(0) = 37.0;
|
||||||
|
|
||||||
@ -2643,13 +2641,11 @@ void FeedFetchBenchmarkHelper(int iters, int num_feeds, bool use_make_callable,
|
|||||||
}
|
}
|
||||||
TF_CHECK_OK(session->MakeCallable(callable_options, &handle));
|
TF_CHECK_OK(session->MakeCallable(callable_options, &handle));
|
||||||
|
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
std::vector<Tensor> output_values;
|
std::vector<Tensor> output_values;
|
||||||
TF_CHECK_OK(
|
TF_CHECK_OK(
|
||||||
session->RunCallable(handle, input_tensors, &output_values, nullptr));
|
session->RunCallable(handle, input_tensors, &output_values, nullptr));
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
} else {
|
} else {
|
||||||
{
|
{
|
||||||
// NOTE(mrry): Ignore the first run, which will incur the graph
|
// NOTE(mrry): Ignore the first run, which will incur the graph
|
||||||
@ -2661,32 +2657,40 @@ void FeedFetchBenchmarkHelper(int iters, int num_feeds, bool use_make_callable,
|
|||||||
std::vector<Tensor> output_values;
|
std::vector<Tensor> output_values;
|
||||||
TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values));
|
TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values));
|
||||||
}
|
}
|
||||||
testing::StartTiming();
|
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
std::vector<Tensor> output_values;
|
std::vector<Tensor> output_values;
|
||||||
TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values));
|
TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values));
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_FeedFetch(int iters, int num_feeds) {
|
void BM_FeedFetch(::testing::benchmark::State& state) {
|
||||||
FeedFetchBenchmarkHelper(iters, num_feeds, /* use_make_callable */ false,
|
const int num_feeds = state.range(0);
|
||||||
|
|
||||||
|
FeedFetchBenchmarkHelper(state, num_feeds, /* use_make_callable */ false,
|
||||||
/* inter_op_threads */ 0,
|
/* inter_op_threads */ 0,
|
||||||
/* use_single_threaded_executor */ false);
|
/* use_single_threaded_executor */ false);
|
||||||
}
|
}
|
||||||
void BM_FeedFetchCallable(int iters, int num_feeds) {
|
void BM_FeedFetchCallable(::testing::benchmark::State& state) {
|
||||||
FeedFetchBenchmarkHelper(iters, num_feeds, /* use_make_callable */ true,
|
const int num_feeds = state.range(0);
|
||||||
|
|
||||||
|
FeedFetchBenchmarkHelper(state, num_feeds, /* use_make_callable */ true,
|
||||||
/* inter_op_threads */ 0,
|
/* inter_op_threads */ 0,
|
||||||
/* use_single_threaded_executor */ false);
|
/* use_single_threaded_executor */ false);
|
||||||
}
|
}
|
||||||
void BM_FeedFetchCallableSingleThread(int iters, int num_feeds) {
|
void BM_FeedFetchCallableSingleThread(::testing::benchmark::State& state) {
|
||||||
FeedFetchBenchmarkHelper(iters, num_feeds, /* use_make_callable */ true,
|
const int num_feeds = state.range(0);
|
||||||
|
|
||||||
|
FeedFetchBenchmarkHelper(state, num_feeds, /* use_make_callable */ true,
|
||||||
/* inter_op_threads */ -1,
|
/* inter_op_threads */ -1,
|
||||||
/* use_single_threaded_executor */ false);
|
/* use_single_threaded_executor */ false);
|
||||||
}
|
}
|
||||||
void BM_FeedFetchCallableSingleThreadExecutor(int iters, int num_feeds) {
|
void BM_FeedFetchCallableSingleThreadExecutor(
|
||||||
FeedFetchBenchmarkHelper(iters, num_feeds, /* use_make_callable */ true,
|
::testing::benchmark::State& state) {
|
||||||
|
const int num_feeds = state.range(0);
|
||||||
|
|
||||||
|
FeedFetchBenchmarkHelper(state, num_feeds, /* use_make_callable */ true,
|
||||||
/* inter_op_threads */ -1,
|
/* inter_op_threads */ -1,
|
||||||
/* use_single_threaded_executor */ true);
|
/* use_single_threaded_executor */ true);
|
||||||
}
|
}
|
||||||
|
@ -69,8 +69,8 @@ class TestEnv {
|
|||||||
Device* cpu_device_;
|
Device* cpu_device_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void BM_CreateGraph(int iters) {
|
void BM_CreateGraph(::testing::benchmark::State& state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
Scope root = Scope::NewRootScope();
|
Scope root = Scope::NewRootScope();
|
||||||
auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}});
|
auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}});
|
||||||
auto M = ops::MatMul(root, C, C);
|
auto M = ops::MatMul(root, C, C);
|
||||||
@ -79,8 +79,7 @@ void BM_CreateGraph(int iters) {
|
|||||||
}
|
}
|
||||||
BENCHMARK(BM_CreateGraph);
|
BENCHMARK(BM_CreateGraph);
|
||||||
|
|
||||||
void BM_RunGraph(int iters) {
|
void BM_RunGraph(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
Scope root = Scope::NewRootScope();
|
Scope root = Scope::NewRootScope();
|
||||||
auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}});
|
auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}});
|
||||||
auto M = ops::MatMul(root, C, C);
|
auto M = ops::MatMul(root, C, C);
|
||||||
@ -89,28 +88,24 @@ void BM_RunGraph(int iters) {
|
|||||||
opts.config.set_intra_op_parallelism_threads(1);
|
opts.config.set_intra_op_parallelism_threads(1);
|
||||||
ClientSession sess(root, opts);
|
ClientSession sess(root, opts);
|
||||||
std::vector<Tensor> outputs;
|
std::vector<Tensor> outputs;
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
outputs.clear();
|
outputs.clear();
|
||||||
TF_CHECK_OK(sess.Run({M}, &outputs));
|
TF_CHECK_OK(sess.Run({M}, &outputs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_RunGraph);
|
BENCHMARK(BM_RunGraph);
|
||||||
|
|
||||||
void BM_CreateAndDestroySession(int iters) {
|
void BM_CreateAndDestroySession(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
Scope root = Scope::NewRootScope();
|
Scope root = Scope::NewRootScope();
|
||||||
auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}});
|
auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}});
|
||||||
auto M = ops::MatMul(root, C, C);
|
auto M = ops::MatMul(root, C, C);
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
ClientSession sess(root);
|
ClientSession sess(root);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_CreateAndDestroySession);
|
BENCHMARK(BM_CreateAndDestroySession);
|
||||||
|
|
||||||
void BM_KernelAndDeviceInit(int iters) {
|
void BM_KernelAndDeviceInit(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
NodeDef ndef(AttrBuilder("MatMul")
|
NodeDef ndef(AttrBuilder("MatMul")
|
||||||
.Set("T", DT_FLOAT)
|
.Set("T", DT_FLOAT)
|
||||||
.Set("transpose_a", false)
|
.Set("transpose_a", false)
|
||||||
@ -120,15 +115,13 @@ void BM_KernelAndDeviceInit(int iters) {
|
|||||||
TestEnv env;
|
TestEnv env;
|
||||||
KernelAndDeviceOp k(nullptr, false, env.function_library_runtime(), nullptr,
|
KernelAndDeviceOp k(nullptr, false, env.function_library_runtime(), nullptr,
|
||||||
nullptr, env.cpu_device());
|
nullptr, env.cpu_device());
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
TF_CHECK_OK(k.Init({}, ndef, nullptr));
|
TF_CHECK_OK(k.Init({}, ndef, nullptr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_KernelAndDeviceInit);
|
BENCHMARK(BM_KernelAndDeviceInit);
|
||||||
|
|
||||||
void BM_KernelAndDeviceRun(int iters) {
|
void BM_KernelAndDeviceRun(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
Tensor t(Input({{1.0f, 2.0f}, {3.0f, 4.0f}}).tensor());
|
Tensor t(Input({{1.0f, 2.0f}, {3.0f, 4.0f}}).tensor());
|
||||||
gtl::InlinedVector<TensorValue, 4> inputs;
|
gtl::InlinedVector<TensorValue, 4> inputs;
|
||||||
inputs.push_back(TensorValue(&t));
|
inputs.push_back(TensorValue(&t));
|
||||||
@ -145,8 +138,7 @@ void BM_KernelAndDeviceRun(int iters) {
|
|||||||
nullptr, env.cpu_device());
|
nullptr, env.cpu_device());
|
||||||
TF_CHECK_OK(k.Init({}, ndef, nullptr));
|
TF_CHECK_OK(k.Init({}, ndef, nullptr));
|
||||||
const EagerKernelArgs args(std::move(inputs));
|
const EagerKernelArgs args(std::move(inputs));
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
TF_CHECK_OK(k.Run(nullptr, args, &outputs, nullptr, absl::nullopt));
|
TF_CHECK_OK(k.Run(nullptr, args, &outputs, nullptr, absl::nullopt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -433,11 +433,10 @@ TEST_F(ExecutorTest, NoInputTensors) {
|
|||||||
// Create a graph that is 'depth' deep. At each level, fan-in and fan-out a
|
// Create a graph that is 'depth' deep. At each level, fan-in and fan-out a
|
||||||
// maximum of 'width' nodes. All nodes are no-ops and all dependencies are
|
// maximum of 'width' nodes. All nodes are no-ops and all dependencies are
|
||||||
// control dependencies.
|
// control dependencies.
|
||||||
static void BM_executor(int iters, int width, int depth) {
|
static void BM_executor(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
const int width = state.range(0);
|
||||||
#ifdef PLATFORM_GOOGLE
|
const int depth = state.range(1);
|
||||||
BenchmarkUseRealTime();
|
|
||||||
#endif // PLATFORM_GOOGLE
|
|
||||||
Graph* g = new Graph(OpRegistry::Global());
|
Graph* g = new Graph(OpRegistry::Global());
|
||||||
random::PhiloxRandom philox(1729, 17);
|
random::PhiloxRandom philox(1729, 17);
|
||||||
random::SimplePhilox rand(&philox);
|
random::SimplePhilox rand(&philox);
|
||||||
@ -466,30 +465,29 @@ static void BM_executor(int iters, int width, int depth) {
|
|||||||
++cur;
|
++cur;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef PLATFORM_GOOGLE
|
|
||||||
SetBenchmarkLabel(strings::StrCat("Nodes = ", cur));
|
|
||||||
SetBenchmarkItemsProcessed(cur * static_cast<int64>(iters));
|
|
||||||
#endif // PLATFORM_GOOGLE
|
|
||||||
FixupSourceAndSinkEdges(g);
|
FixupSourceAndSinkEdges(g);
|
||||||
testing::StartTiming();
|
test::Benchmark("cpu", g, /*old_benchmark_api=*/false).Run(state);
|
||||||
test::Benchmark("cpu", g).Run(iters);
|
|
||||||
|
state.SetLabel(strings::StrCat("Nodes = ", cur));
|
||||||
|
state.SetItemsProcessed(cur * static_cast<int64>(state.iterations()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tall skinny graphs
|
// Tall skinny graphs
|
||||||
BENCHMARK(BM_executor)->ArgPair(16, 1024);
|
BENCHMARK(BM_executor)->UseRealTime()->ArgPair(16, 1024);
|
||||||
BENCHMARK(BM_executor)->ArgPair(32, 8192);
|
BENCHMARK(BM_executor)->UseRealTime()->ArgPair(32, 8192);
|
||||||
|
|
||||||
// Short fat graphs
|
// Short fat graphs
|
||||||
BENCHMARK(BM_executor)->ArgPair(1024, 16);
|
BENCHMARK(BM_executor)->UseRealTime()->ArgPair(1024, 16);
|
||||||
BENCHMARK(BM_executor)->ArgPair(8192, 32);
|
BENCHMARK(BM_executor)->UseRealTime()->ArgPair(8192, 32);
|
||||||
|
|
||||||
// Tall fat graph
|
// Tall fat graph
|
||||||
BENCHMARK(BM_executor)->ArgPair(1024, 1024);
|
BENCHMARK(BM_executor)->UseRealTime()->ArgPair(1024, 1024);
|
||||||
|
|
||||||
|
static void BM_const_identity(::testing::benchmark::State& state) {
|
||||||
|
const int width = state.range(0);
|
||||||
|
const int outputs_per_const = state.range(1);
|
||||||
|
|
||||||
static void BM_const_identity(int iters, int width, int outputs_per_const) {
|
|
||||||
#ifdef PLATFORM_GOOGL
|
|
||||||
BenchmarkUseRealTime();
|
|
||||||
#endif // PLATFORM_GOOGLE
|
|
||||||
Graph* g = new Graph(OpRegistry::Global());
|
Graph* g = new Graph(OpRegistry::Global());
|
||||||
for (int i = 0; i < width; ++i) {
|
for (int i = 0; i < width; ++i) {
|
||||||
Tensor i_t(i);
|
Tensor i_t(i);
|
||||||
@ -499,23 +497,21 @@ static void BM_const_identity(int iters, int width, int outputs_per_const) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
FixupSourceAndSinkEdges(g);
|
FixupSourceAndSinkEdges(g);
|
||||||
#ifdef PLATFORM_GOOGLE
|
test::Benchmark("cpu", g, /*old_benchmark_api=*/false).Run(state);
|
||||||
SetBenchmarkLabel(
|
state.SetLabel(strings::StrCat("Nodes = ", (1 + outputs_per_const) * width));
|
||||||
strings::StrCat("Nodes = ", (1 + outputs_per_const) * width));
|
state.SetItemsProcessed((1 + outputs_per_const) * width *
|
||||||
SetBenchmarkItemsProcessed((1 + outputs_per_const) * width *
|
static_cast<int64>(state.iterations()));
|
||||||
static_cast<int64>(iters));
|
|
||||||
#endif // PLATFORM_GOOGLE
|
|
||||||
test::Benchmark("cpu", g).Run(iters);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Graph with actual op execution.
|
// Graph with actual op execution.
|
||||||
BENCHMARK(BM_const_identity)->ArgPair(1, 1);
|
BENCHMARK(BM_const_identity)
|
||||||
BENCHMARK(BM_const_identity)->ArgPair(1, 100);
|
->UseRealTime()
|
||||||
BENCHMARK(BM_const_identity)->ArgPair(100, 1);
|
->ArgPair(1, 1)
|
||||||
BENCHMARK(BM_const_identity)->ArgPair(100, 100);
|
->ArgPair(1, 100)
|
||||||
|
->ArgPair(100, 1)
|
||||||
|
->ArgPair(100, 100);
|
||||||
|
|
||||||
static void BM_FeedInputFetchOutput(int iters) {
|
static void BM_FeedInputFetchOutput(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
|
||||||
Graph* g = new Graph(OpRegistry::Global());
|
Graph* g = new Graph(OpRegistry::Global());
|
||||||
// z = x + y: x and y are provided as benchmark inputs. z is the
|
// z = x + y: x and y are provided as benchmark inputs. z is the
|
||||||
// output of the benchmark. Conceptually, the caller is ALICE, the
|
// output of the benchmark. Conceptually, the caller is ALICE, the
|
||||||
@ -531,13 +527,10 @@ static void BM_FeedInputFetchOutput(int iters) {
|
|||||||
|
|
||||||
Tensor val(DT_FLOAT, TensorShape({}));
|
Tensor val(DT_FLOAT, TensorShape({}));
|
||||||
val.scalar<float>()() = 3.14;
|
val.scalar<float>()() = 3.14;
|
||||||
#ifdef PLATFORM_GOOGLE
|
|
||||||
SetBenchmarkItemsProcessed(static_cast<int64>(iters));
|
|
||||||
#endif // PLATFORM_GOOGLE
|
|
||||||
FixupSourceAndSinkEdges(g);
|
FixupSourceAndSinkEdges(g);
|
||||||
testing::StartTiming();
|
test::Benchmark("cpu", g, /*old_benchmark_api=*/false)
|
||||||
test::Benchmark("cpu", g).RunWithRendezvousArgs({{x_key, val}, {y_key, val}},
|
.RunWithRendezvousArgs({{x_key, val}, {y_key, val}}, {z_key}, state);
|
||||||
{z_key}, iters);
|
state.SetItemsProcessed(static_cast<int64>(state.iterations()));
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_FeedInputFetchOutput);
|
BENCHMARK(BM_FeedInputFetchOutput);
|
||||||
|
|
||||||
@ -549,9 +542,8 @@ BENCHMARK(BM_FeedInputFetchOutput);
|
|||||||
//
|
//
|
||||||
// ...using the functional `WhileOp` (if `lower` is false) or the
|
// ...using the functional `WhileOp` (if `lower` is false) or the
|
||||||
// `Switch`/`Merge`-style of control flow (if `lower` is true).
|
// `Switch`/`Merge`-style of control flow (if `lower` is true).
|
||||||
static void BM_WhileLoopHelper(int iters, int loop_iters, int loop_vars,
|
static void BM_WhileLoopHelper(::testing::benchmark::State& state,
|
||||||
bool lower) {
|
int loop_iters, int loop_vars, bool lower) {
|
||||||
testing::StopTiming();
|
|
||||||
std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
|
std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
|
||||||
|
|
||||||
// Add test functions for cond and body.
|
// Add test functions for cond and body.
|
||||||
@ -661,12 +653,15 @@ static void BM_WhileLoopHelper(int iters, int loop_iters, int loop_vars,
|
|||||||
}
|
}
|
||||||
|
|
||||||
FixupSourceAndSinkEdges(graph.get());
|
FixupSourceAndSinkEdges(graph.get());
|
||||||
testing::StartTiming();
|
test::Benchmark("cpu", graph.release(), /*old_benchmark_api=*/false)
|
||||||
test::Benchmark("cpu", graph.release()).Run(iters);
|
.Run(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void BM_LoweredWhileLoop(int iters, int loop_iters, int loop_vars) {
|
static void BM_LoweredWhileLoop(::testing::benchmark::State& state) {
|
||||||
BM_WhileLoopHelper(iters, loop_iters, loop_vars, /* lower= */ true);
|
const int loop_iters = state.range(0);
|
||||||
|
const int loop_vars = state.range(1);
|
||||||
|
|
||||||
|
BM_WhileLoopHelper(state, loop_iters, loop_vars, /* lower= */ true);
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_LoweredWhileLoop)
|
BENCHMARK(BM_LoweredWhileLoop)
|
||||||
->ArgPair(0, 1)
|
->ArgPair(0, 1)
|
||||||
@ -680,8 +675,11 @@ BENCHMARK(BM_LoweredWhileLoop)
|
|||||||
->ArgPair(100, 100)
|
->ArgPair(100, 100)
|
||||||
->ArgPair(1000, 100);
|
->ArgPair(1000, 100);
|
||||||
|
|
||||||
static void BM_FunctionalWhileLoop(int iters, int loop_iters, int loop_vars) {
|
static void BM_FunctionalWhileLoop(::testing::benchmark::State& state) {
|
||||||
BM_WhileLoopHelper(iters, loop_iters, loop_vars, /* lower= */ false);
|
const int loop_iters = state.range(0);
|
||||||
|
const int loop_vars = state.range(1);
|
||||||
|
|
||||||
|
BM_WhileLoopHelper(state, loop_iters, loop_vars, /* lower= */ false);
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_FunctionalWhileLoop)
|
BENCHMARK(BM_FunctionalWhileLoop)
|
||||||
->ArgPair(0, 1)
|
->ArgPair(0, 1)
|
||||||
|
@ -221,14 +221,16 @@ TEST(CustomAllocatorAttributes, TestSetterAndGetter) {
|
|||||||
EXPECT_FALSE(HasDeviceAllocatorAttribute(AllocatorAttributes()));
|
EXPECT_FALSE(HasDeviceAllocatorAttribute(AllocatorAttributes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void BM_Allocation(int iters, int arg) {
|
static void BM_Allocation(::testing::benchmark::State& state) {
|
||||||
|
const int arg = state.range(0);
|
||||||
|
|
||||||
Allocator* a = cpu_allocator();
|
Allocator* a = cpu_allocator();
|
||||||
// Exercise a few different allocation sizes
|
// Exercise a few different allocation sizes
|
||||||
std::vector<int> sizes = {256, 4096, 16384, 524288, 512, 1048576};
|
std::vector<int> sizes = {256, 4096, 16384, 524288, 512, 1048576};
|
||||||
int size_index = 0;
|
int size_index = 0;
|
||||||
|
|
||||||
if (arg) EnableCPUAllocatorStats();
|
if (arg) EnableCPUAllocatorStats();
|
||||||
while (--iters > 0) {
|
for (auto s : state) {
|
||||||
int bytes = sizes[size_index++ % sizes.size()];
|
int bytes = sizes[size_index++ % sizes.size()];
|
||||||
void* p = a->AllocateRaw(1, bytes);
|
void* p = a->AllocateRaw(1, bytes);
|
||||||
a->DeallocateRaw(p);
|
a->DeallocateRaw(p);
|
||||||
|
@ -39,60 +39,60 @@ TEST(Bfloat16Test, Conversion) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void BM_FloatToBFloat16(int iters) {
|
void BM_FloatToBFloat16(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
|
||||||
static const int N = 32 << 20;
|
static const int N = 32 << 20;
|
||||||
const int64 tot = static_cast<int64>(iters) * N;
|
|
||||||
testing::ItemsProcessed(tot);
|
|
||||||
testing::BytesProcessed(tot * (sizeof(float) + sizeof(bfloat16)));
|
|
||||||
|
|
||||||
float* inp = new float[N];
|
float* inp = new float[N];
|
||||||
bfloat16* out = new bfloat16[N];
|
bfloat16* out = new bfloat16[N];
|
||||||
|
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
while (iters--) {
|
|
||||||
FloatToBFloat16(inp, out, N);
|
FloatToBFloat16(inp, out, N);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const int64 tot = static_cast<int64>(state.iterations()) * N;
|
||||||
|
state.SetItemsProcessed(tot);
|
||||||
|
state.SetBytesProcessed(tot * (sizeof(float) + sizeof(bfloat16)));
|
||||||
|
|
||||||
delete[] inp;
|
delete[] inp;
|
||||||
delete[] out;
|
delete[] out;
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_FloatToBFloat16);
|
BENCHMARK(BM_FloatToBFloat16);
|
||||||
|
|
||||||
static void BM_RoundFloatToBFloat16(int iters) {
|
void BM_RoundFloatToBFloat16(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
|
||||||
static const int N = 32 << 20;
|
static const int N = 32 << 20;
|
||||||
const int64 tot = static_cast<int64>(iters) * N;
|
|
||||||
testing::ItemsProcessed(tot);
|
|
||||||
testing::BytesProcessed(tot * (sizeof(float) + sizeof(bfloat16)));
|
|
||||||
|
|
||||||
float* inp = new float[N];
|
float* inp = new float[N];
|
||||||
bfloat16* out = new bfloat16[N];
|
bfloat16* out = new bfloat16[N];
|
||||||
|
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
while (iters--) {
|
|
||||||
RoundFloatToBFloat16(inp, out, N);
|
RoundFloatToBFloat16(inp, out, N);
|
||||||
tensorflow::testing::DoNotOptimize(inp);
|
tensorflow::testing::DoNotOptimize(inp);
|
||||||
tensorflow::testing::DoNotOptimize(out);
|
tensorflow::testing::DoNotOptimize(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const int64 tot = static_cast<int64>(state.iterations()) * N;
|
||||||
|
state.SetItemsProcessed(tot);
|
||||||
|
state.SetBytesProcessed(tot * (sizeof(float) + sizeof(bfloat16)));
|
||||||
|
|
||||||
delete[] inp;
|
delete[] inp;
|
||||||
delete[] out;
|
delete[] out;
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_RoundFloatToBFloat16);
|
BENCHMARK(BM_RoundFloatToBFloat16);
|
||||||
|
|
||||||
static void BM_BFloat16ToFloat(int iters) {
|
void BM_BFloat16ToFloat(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
|
||||||
static const int N = 32 << 20;
|
static const int N = 32 << 20;
|
||||||
const int64 tot = static_cast<int64>(iters) * N;
|
|
||||||
testing::ItemsProcessed(tot);
|
|
||||||
testing::BytesProcessed(tot * (sizeof(float) + sizeof(bfloat16)));
|
|
||||||
|
|
||||||
bfloat16* inp = new bfloat16[N];
|
bfloat16* inp = new bfloat16[N];
|
||||||
float* out = new float[N];
|
float* out = new float[N];
|
||||||
|
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
while (iters--) {
|
|
||||||
BFloat16ToFloat(inp, out, N);
|
BFloat16ToFloat(inp, out, N);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const int64 tot = static_cast<int64>(state.iterations()) * N;
|
||||||
|
state.SetItemsProcessed(tot);
|
||||||
|
state.SetBytesProcessed(tot * (sizeof(float) + sizeof(bfloat16)));
|
||||||
|
|
||||||
delete[] inp;
|
delete[] inp;
|
||||||
delete[] out;
|
delete[] out;
|
||||||
}
|
}
|
||||||
|
@ -1002,9 +1002,9 @@ TEST_F(LabelTest, Duplicate) {
|
|||||||
error::INVALID_ARGUMENT);
|
error::INVALID_ARGUMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_InputRangeHelper(int iters, const NodeDef& node_def,
|
void BM_InputRangeHelper(::testing::benchmark::State& state,
|
||||||
const char* input_name, int expected_start,
|
const NodeDef& node_def, const char* input_name,
|
||||||
int expected_stop) {
|
int expected_start, int expected_stop) {
|
||||||
Status status;
|
Status status;
|
||||||
auto device = absl::make_unique<DummyDevice>(Env::Default());
|
auto device = absl::make_unique<DummyDevice>(Env::Default());
|
||||||
|
|
||||||
@ -1013,24 +1013,20 @@ void BM_InputRangeHelper(int iters, const NodeDef& node_def,
|
|||||||
TF_GRAPH_DEF_VERSION, &status));
|
TF_GRAPH_DEF_VERSION, &status));
|
||||||
TF_CHECK_OK(status);
|
TF_CHECK_OK(status);
|
||||||
|
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
int start;
|
int start;
|
||||||
int stop;
|
int stop;
|
||||||
TF_CHECK_OK(op->InputRange(input_name, &start, &stop));
|
TF_CHECK_OK(op->InputRange(input_name, &start, &stop));
|
||||||
EXPECT_EQ(expected_start, start);
|
EXPECT_EQ(expected_start, start);
|
||||||
EXPECT_EQ(expected_stop, stop);
|
EXPECT_EQ(expected_stop, stop);
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("ConcatV2").Device(DEVICE_CPU), DummyKernel);
|
REGISTER_KERNEL_BUILDER(Name("ConcatV2").Device(DEVICE_CPU), DummyKernel);
|
||||||
REGISTER_KERNEL_BUILDER(Name("Select").Device(DEVICE_CPU), DummyKernel);
|
REGISTER_KERNEL_BUILDER(Name("Select").Device(DEVICE_CPU), DummyKernel);
|
||||||
REGISTER_KERNEL_BUILDER(Name("MatMul").Device(DEVICE_CPU), DummyKernel);
|
REGISTER_KERNEL_BUILDER(Name("MatMul").Device(DEVICE_CPU), DummyKernel);
|
||||||
|
|
||||||
void BM_ConcatInputRange(int iters) {
|
void BM_ConcatInputRange(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
|
||||||
|
|
||||||
// Create a ConcatV2 NodeDef with 4 inputs (plus the axis).
|
// Create a ConcatV2 NodeDef with 4 inputs (plus the axis).
|
||||||
NodeDef node_def;
|
NodeDef node_def;
|
||||||
node_def.set_name("concat-op");
|
node_def.set_name("concat-op");
|
||||||
@ -1048,12 +1044,10 @@ void BM_ConcatInputRange(int iters) {
|
|||||||
node_def.add_input(strings::StrCat("a:", i));
|
node_def.add_input(strings::StrCat("a:", i));
|
||||||
}
|
}
|
||||||
|
|
||||||
BM_InputRangeHelper(iters, node_def, "values", 0, 4);
|
BM_InputRangeHelper(state, node_def, "values", 0, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_SelectInputRange(int iters) {
|
void BM_SelectInputRange(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
|
||||||
|
|
||||||
// Create a Select NodeDef with 3 inputs.
|
// Create a Select NodeDef with 3 inputs.
|
||||||
NodeDef node_def;
|
NodeDef node_def;
|
||||||
node_def.set_name("select-op");
|
node_def.set_name("select-op");
|
||||||
@ -1065,11 +1059,11 @@ void BM_SelectInputRange(int iters) {
|
|||||||
node_def.add_input(strings::StrCat("a:", i));
|
node_def.add_input(strings::StrCat("a:", i));
|
||||||
}
|
}
|
||||||
|
|
||||||
BM_InputRangeHelper(iters, node_def, "condition", 0, 1);
|
BM_InputRangeHelper(state, node_def, "condition", 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_TraceString(const int iters, const int verbose) {
|
void BM_TraceString(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
const int verbose = state.range(0);
|
||||||
|
|
||||||
// Create a MatMul NodeDef with 2 inputs.
|
// Create a MatMul NodeDef with 2 inputs.
|
||||||
NodeDef node_def;
|
NodeDef node_def;
|
||||||
@ -1103,11 +1097,9 @@ void BM_TraceString(const int iters, const int verbose) {
|
|||||||
params.inputs = &inputs;
|
params.inputs = &inputs;
|
||||||
auto ctx = absl::make_unique<OpKernelContext>(¶ms);
|
auto ctx = absl::make_unique<OpKernelContext>(¶ms);
|
||||||
|
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
auto trace = op->TraceString(*ctx, verbose);
|
auto trace = op->TraceString(*ctx, verbose);
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK(BM_ConcatInputRange);
|
BENCHMARK(BM_ConcatInputRange);
|
||||||
|
@ -434,83 +434,89 @@ TEST_F(LocalRendezvousTest, TransferDummyDeviceContext) {
|
|||||||
args1.device_context->Unref();
|
args1.device_context->Unref();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_SendRecv(int iters) {
|
void BM_SendRecv(::testing::benchmark::State& state) {
|
||||||
Rendezvous* rendez = NewLocalRendezvous();
|
Rendezvous* rendez = NewLocalRendezvous();
|
||||||
Tensor orig = V("val");
|
Tensor orig = V("val");
|
||||||
Tensor val(DT_STRING, TensorShape({}));
|
Tensor val(DT_STRING, TensorShape({}));
|
||||||
bool is_dead = false;
|
bool is_dead = false;
|
||||||
Rendezvous::Args args;
|
Rendezvous::Args args;
|
||||||
if (iters > 0) {
|
|
||||||
while (iters--) {
|
for (auto s : state) {
|
||||||
TF_CHECK_OK(rendez->Send(KeyFoo(), args, orig, is_dead));
|
TF_CHECK_OK(rendez->Send(KeyFoo(), args, orig, is_dead));
|
||||||
TF_CHECK_OK(rendez->Recv(KeyFoo(), args, &val, &is_dead));
|
TF_CHECK_OK(rendez->Recv(KeyFoo(), args, &val, &is_dead));
|
||||||
}
|
|
||||||
CHECK_EQ(V(val), V(orig));
|
|
||||||
}
|
}
|
||||||
|
CHECK_EQ(V(val), V(orig));
|
||||||
|
|
||||||
rendez->Unref();
|
rendez->Unref();
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_SendRecv);
|
BENCHMARK(BM_SendRecv);
|
||||||
|
|
||||||
void BM_RecvSend(int iters) {
|
void BM_RecvSend(::testing::benchmark::State& state) {
|
||||||
Rendezvous* rendez = NewLocalRendezvous();
|
Rendezvous* rendez = NewLocalRendezvous();
|
||||||
Tensor orig = V("val");
|
Tensor orig = V("val");
|
||||||
Tensor val(DT_STRING, TensorShape({}));
|
Tensor val(DT_STRING, TensorShape({}));
|
||||||
bool is_dead = false;
|
bool is_dead = false;
|
||||||
Rendezvous::Args args;
|
Rendezvous::Args args;
|
||||||
if (iters > 0) {
|
|
||||||
while (iters--) {
|
for (auto s : state) {
|
||||||
bool received = false;
|
bool received = false;
|
||||||
rendez->RecvAsync(
|
rendez->RecvAsync(
|
||||||
KeyFoo(), args,
|
KeyFoo(), args,
|
||||||
[&val, &received](const Status& s, const Rendezvous::Args& send_args,
|
[&val, &received](const Status& /*s*/,
|
||||||
const Rendezvous::Args& recv_args,
|
const Rendezvous::Args& /*send_args*/,
|
||||||
const Tensor& tensor, bool is_dead) {
|
const Rendezvous::Args& /*recv_args*/,
|
||||||
val = tensor;
|
const Tensor& tensor, bool /*is_dead*/) {
|
||||||
received = true;
|
val = tensor;
|
||||||
});
|
received = true;
|
||||||
TF_CHECK_OK(rendez->Send(KeyFoo(), args, orig, is_dead));
|
});
|
||||||
CHECK(received);
|
TF_CHECK_OK(rendez->Send(KeyFoo(), args, orig, is_dead));
|
||||||
}
|
CHECK(received);
|
||||||
CHECK_EQ(V(val), V(orig));
|
|
||||||
}
|
}
|
||||||
|
CHECK_EQ(V(val), V(orig));
|
||||||
|
|
||||||
rendez->Unref();
|
rendez->Unref();
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_RecvSend);
|
BENCHMARK(BM_RecvSend);
|
||||||
|
|
||||||
void BM_PingPong(int iters) {
|
void BM_PingPong(::testing::benchmark::State& state) {
|
||||||
CHECK_GT(iters, 0);
|
const int messages_count = state.range(0);
|
||||||
auto* cm = new CancellationManager();
|
auto* cm = new CancellationManager();
|
||||||
thread::ThreadPool* pool = new thread::ThreadPool(Env::Default(), "test", 1);
|
thread::ThreadPool* pool = new thread::ThreadPool(Env::Default(), "test", 1);
|
||||||
|
|
||||||
// The main thread sends "foo" for iters times and receives "bar"
|
// Benchmark loop
|
||||||
// for iters times. The other thread sends "bar" for iters times
|
// In each iteration:
|
||||||
// and receives "foo" for iters times.
|
// The main thread sends "foo" for messages_count times and receives "bar"
|
||||||
Rendezvous* rendez = NewLocalRendezvous();
|
// for messages_count times. The other thread sends "bar" for
|
||||||
pool->Schedule([rendez, iters]() {
|
// messages_count times and receives "foo" for messages_count times.
|
||||||
Tensor bar = V("bar");
|
for (auto s : state) {
|
||||||
Tensor foo(DT_STRING, TensorShape({}));
|
Rendezvous* rendez = NewLocalRendezvous();
|
||||||
|
pool->Schedule([rendez, messages_count]() {
|
||||||
|
Tensor bar = V("bar");
|
||||||
|
Tensor foo(DT_STRING, TensorShape({}));
|
||||||
|
bool is_dead = false;
|
||||||
|
Rendezvous::Args args;
|
||||||
|
for (int i = 0; i < messages_count; ++i) {
|
||||||
|
TF_CHECK_OK(rendez->Recv(KeyFoo(), args, &foo, &is_dead));
|
||||||
|
TF_CHECK_OK(rendez->Send(KeyBar(), args, bar, is_dead));
|
||||||
|
}
|
||||||
|
CHECK_EQ("foo", V(foo));
|
||||||
|
});
|
||||||
|
Tensor foo = V("foo");
|
||||||
|
Tensor bar(DT_STRING, TensorShape({}));
|
||||||
bool is_dead = false;
|
bool is_dead = false;
|
||||||
Rendezvous::Args args;
|
Rendezvous::Args args;
|
||||||
for (int i = 0; i < iters; ++i) {
|
args.cancellation_manager = cm;
|
||||||
TF_CHECK_OK(rendez->Recv(KeyFoo(), args, &foo, &is_dead));
|
for (int i = 0; i < messages_count; ++i) {
|
||||||
TF_CHECK_OK(rendez->Send(KeyBar(), args, bar, is_dead));
|
TF_CHECK_OK(rendez->Send(KeyFoo(), args, foo, is_dead));
|
||||||
|
TF_CHECK_OK(rendez->Recv(KeyBar(), args, &bar, &is_dead));
|
||||||
}
|
}
|
||||||
CHECK_EQ("foo", V(foo));
|
CHECK_EQ("bar", V(bar));
|
||||||
});
|
|
||||||
Tensor foo = V("foo");
|
|
||||||
Tensor bar(DT_STRING, TensorShape({}));
|
|
||||||
bool is_dead = false;
|
|
||||||
Rendezvous::Args args;
|
|
||||||
args.cancellation_manager = cm;
|
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
TF_CHECK_OK(rendez->Send(KeyFoo(), args, foo, is_dead));
|
|
||||||
TF_CHECK_OK(rendez->Recv(KeyBar(), args, &bar, &is_dead));
|
|
||||||
}
|
}
|
||||||
CHECK_EQ("bar", V(bar));
|
state.SetItemsProcessed(messages_count * state.iterations());
|
||||||
delete pool;
|
delete pool;
|
||||||
delete cm;
|
delete cm;
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_PingPong);
|
BENCHMARK(BM_PingPong)->Arg(100)->Arg(200)->Arg(300);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
@ -684,19 +684,24 @@ static std::vector<int64> MakeSizes(int arg) {
|
|||||||
return sizes;
|
return sizes;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void BM_TensorShape_Init(int iters, int arg) {
|
void BM_TensorShape_Init(::testing::benchmark::State& state) {
|
||||||
|
const int arg = state.range(0);
|
||||||
|
|
||||||
auto sizes = MakeSizes(arg);
|
auto sizes = MakeSizes(arg);
|
||||||
while (--iters > 0) {
|
for (auto s : state) {
|
||||||
TensorShape shape(sizes);
|
TensorShape shape(sizes);
|
||||||
tensorflow::testing::DoNotOptimize(shape.num_elements());
|
tensorflow::testing::DoNotOptimize(shape.num_elements());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_TensorShape_Init)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4);
|
BENCHMARK(BM_TensorShape_Init)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4);
|
||||||
|
|
||||||
static void BM_TensorShape_Assign(int iters, int arg) {
|
void BM_TensorShape_Assign(::testing::benchmark::State& state) {
|
||||||
TensorShape s(MakeSizes(arg));
|
const int arg = state.range(0);
|
||||||
while (--iters > 0) {
|
|
||||||
TensorShape s2 = s;
|
TensorShape shape(MakeSizes(arg));
|
||||||
|
for (auto s : state) {
|
||||||
|
const TensorShape s2 = shape;
|
||||||
|
tensorflow::testing::DoNotOptimize(s2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_TensorShape_Assign)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4);
|
BENCHMARK(BM_TensorShape_Assign)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4);
|
||||||
|
@ -1468,19 +1468,19 @@ TEST(SummarizeValue, STRING_PRINT_V2) {
|
|||||||
x.SummarizeValue(16, true));
|
x.SummarizeValue(16, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_CreateAndDestroy(int iters) {
|
void BM_CreateAndDestroy(::testing::benchmark::State& state) {
|
||||||
TensorShape shape({10, 20});
|
TensorShape shape({10, 20});
|
||||||
while (--iters) {
|
for (auto s : state) {
|
||||||
Tensor t(DT_FLOAT, shape);
|
Tensor t(DT_FLOAT, shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_CreateAndDestroy);
|
BENCHMARK(BM_CreateAndDestroy);
|
||||||
|
|
||||||
void BM_Assign(int iters) {
|
void BM_Assign(::testing::benchmark::State& state) {
|
||||||
Tensor a(DT_FLOAT, TensorShape({10, 20}));
|
Tensor a(DT_FLOAT, TensorShape({10, 20}));
|
||||||
Tensor b(DT_FLOAT, TensorShape({10, 20}));
|
Tensor b(DT_FLOAT, TensorShape({10, 20}));
|
||||||
bool a_to_b = true;
|
bool a_to_b = true;
|
||||||
while (--iters) {
|
for (auto s : state) {
|
||||||
if (a_to_b) {
|
if (a_to_b) {
|
||||||
b = a;
|
b = a;
|
||||||
} else {
|
} else {
|
||||||
@ -1498,20 +1498,20 @@ TEST(Tensor, EmptyTensorData) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Benchmark create and destroy a tensor, with an allocated buffer.
|
// Benchmark create and destroy a tensor, with an allocated buffer.
|
||||||
void BM_CreateAndDestroyWithBuf(int iters) {
|
void BM_CreateAndDestroyWithBuf(::testing::benchmark::State& state) {
|
||||||
TensorShape shape({10, 20});
|
TensorShape shape({10, 20});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
while (--iters) {
|
for (auto s : state) {
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_CreateAndDestroyWithBuf);
|
BENCHMARK(BM_CreateAndDestroyWithBuf);
|
||||||
|
|
||||||
// Benchmark create+copy a tensor, with an allocated buffer.
|
// Benchmark create+copy a tensor, with an allocated buffer.
|
||||||
void BM_CreateAndCopyCtrWithBuf(int iters) {
|
void BM_CreateAndCopyCtrWithBuf(::testing::benchmark::State& state) {
|
||||||
TensorShape shape({10, 20});
|
TensorShape shape({10, 20});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
while (--iters) {
|
for (auto s : state) {
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
Tensor b(a);
|
Tensor b(a);
|
||||||
}
|
}
|
||||||
@ -1519,10 +1519,10 @@ void BM_CreateAndCopyCtrWithBuf(int iters) {
|
|||||||
BENCHMARK(BM_CreateAndCopyCtrWithBuf);
|
BENCHMARK(BM_CreateAndCopyCtrWithBuf);
|
||||||
|
|
||||||
// Benchmark create+move a tensor, with an allocated buffer.
|
// Benchmark create+move a tensor, with an allocated buffer.
|
||||||
void BM_CreateAndMoveCtrWithBuf(int iters) {
|
void BM_CreateAndMoveCtrWithBuf(::testing::benchmark::State& state) {
|
||||||
TensorShape shape({10, 20});
|
TensorShape shape({10, 20});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
while (--iters) {
|
for (auto s : state) {
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
Tensor b(std::move(a));
|
Tensor b(std::move(a));
|
||||||
}
|
}
|
||||||
@ -1531,10 +1531,11 @@ BENCHMARK(BM_CreateAndMoveCtrWithBuf);
|
|||||||
|
|
||||||
// Benchmark creating and destroy a host-scalar tensor, using the allocator
|
// Benchmark creating and destroy a host-scalar tensor, using the allocator
|
||||||
// interface.
|
// interface.
|
||||||
void BM_CreateAndDestroyHostScalarNonOptimized(int iters) {
|
void BM_CreateAndDestroyHostScalarNonOptimized(
|
||||||
|
::testing::benchmark::State& state) {
|
||||||
TensorShape shape({});
|
TensorShape shape({});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
while (--iters) {
|
for (auto s : state) {
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
a.scalar<float>()() = 37.0;
|
a.scalar<float>()() = 37.0;
|
||||||
}
|
}
|
||||||
@ -1543,32 +1544,33 @@ BENCHMARK(BM_CreateAndDestroyHostScalarNonOptimized);
|
|||||||
|
|
||||||
// Benchmark creating and destroy a host-scalar tensor, using the specialized
|
// Benchmark creating and destroy a host-scalar tensor, using the specialized
|
||||||
// constructor.
|
// constructor.
|
||||||
void BM_CreateAndDestroyHostScalarOptimized(int iters) {
|
void BM_CreateAndDestroyHostScalarOptimized(
|
||||||
while (--iters) {
|
::testing::benchmark::State& state) {
|
||||||
|
for (auto s : state) {
|
||||||
Tensor a(37.0);
|
Tensor a(37.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_CreateAndDestroyHostScalarOptimized);
|
BENCHMARK(BM_CreateAndDestroyHostScalarOptimized);
|
||||||
|
|
||||||
static void BM_FromProto(int iters, int size) {
|
void BM_FromProto(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
const int size = state.range(0);
|
||||||
|
|
||||||
TensorShape shape({size});
|
TensorShape shape({size});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
std::fill_n(a.flat<float>().data(), size, 42.0);
|
std::fill_n(a.flat<float>().data(), size, 42.0);
|
||||||
TensorProto p;
|
TensorProto p;
|
||||||
a.AsProtoField(&p);
|
a.AsProtoField(&p);
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
while (--iters) {
|
|
||||||
Tensor b;
|
Tensor b;
|
||||||
ASSERT_TRUE(b.FromProto(p));
|
ASSERT_TRUE(b.FromProto(p));
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_FromProto)->Range(1, 1 << 20);
|
BENCHMARK(BM_FromProto)->Range(1, 1 << 20);
|
||||||
|
|
||||||
static void BM_FromProtoCompressed(int iters, int size) {
|
void BM_FromProtoCompressed(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
const int size = state.range(0);
|
||||||
|
|
||||||
TensorShape shape({size});
|
TensorShape shape({size});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
@ -1576,17 +1578,16 @@ static void BM_FromProtoCompressed(int iters, int size) {
|
|||||||
TensorProto p;
|
TensorProto p;
|
||||||
a.AsProtoField(&p);
|
a.AsProtoField(&p);
|
||||||
tensor::CompressTensorProtoInPlace(&p);
|
tensor::CompressTensorProtoInPlace(&p);
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
while (--iters) {
|
|
||||||
Tensor b;
|
Tensor b;
|
||||||
ASSERT_TRUE(b.FromProto(p));
|
ASSERT_TRUE(b.FromProto(p));
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_FromProtoCompressed)->Range(1, 1 << 20);
|
BENCHMARK(BM_FromProtoCompressed)->Range(1, 1 << 20);
|
||||||
|
|
||||||
static void BM_FromProtoCompressedZero(int iters, int size) {
|
void BM_FromProtoCompressedZero(::testing::benchmark::State& state) {
|
||||||
testing::StopTiming();
|
const int size = state.range(0);
|
||||||
|
|
||||||
TensorShape shape({size});
|
TensorShape shape({size});
|
||||||
Allocator* allocator = cpu_allocator();
|
Allocator* allocator = cpu_allocator();
|
||||||
Tensor a(allocator, DT_FLOAT, shape);
|
Tensor a(allocator, DT_FLOAT, shape);
|
||||||
@ -1595,12 +1596,10 @@ static void BM_FromProtoCompressedZero(int iters, int size) {
|
|||||||
TensorProto p;
|
TensorProto p;
|
||||||
a.AsProtoField(&p);
|
a.AsProtoField(&p);
|
||||||
tensor::CompressTensorProtoInPlace(&p);
|
tensor::CompressTensorProtoInPlace(&p);
|
||||||
testing::StartTiming();
|
for (auto s : state) {
|
||||||
while (--iters) {
|
|
||||||
Tensor b;
|
Tensor b;
|
||||||
ASSERT_TRUE(b.FromProto(p));
|
ASSERT_TRUE(b.FromProto(p));
|
||||||
}
|
}
|
||||||
testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_FromProtoCompressedZero)->Range(1, 1 << 20);
|
BENCHMARK(BM_FromProtoCompressedZero)->Range(1, 1 << 20);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user