Internal tests cleanup.
PiperOrigin-RevId: 339456235 Change-Id: Ia960a93771ef371256dc10078a39421ca1faeb14
This commit is contained in:
parent
08af2ba27c
commit
fbac0a99f7
@ -2100,10 +2100,14 @@ std::unique_ptr<HloComputation> MakeBenchmarkWhileBody() {
|
|||||||
return builder.Build();
|
return builder.Build();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_SequentialWhiles(int num_iters, int num_whiles) {
|
void BM_SequentialWhiles(::testing::benchmark::State& state) {
|
||||||
|
const int num_whiles = state.range(0);
|
||||||
|
|
||||||
// This benchmark constructs a chain of sequential while instructions.
|
// This benchmark constructs a chain of sequential while instructions.
|
||||||
tensorflow::testing::StopTiming();
|
// Timer starts automatically at the first iteration of this loop
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
// and ends after the last one.
|
||||||
|
for (auto s : state) {
|
||||||
|
state.PauseTiming();
|
||||||
HloModuleConfig config;
|
HloModuleConfig config;
|
||||||
config.set_debug_options(GetDebugOptionsFromFlags());
|
config.set_debug_options(GetDebugOptionsFromFlags());
|
||||||
HloModule module("BM_SequentialWhiles", config);
|
HloModule module("BM_SequentialWhiles", config);
|
||||||
@ -2131,19 +2135,22 @@ void BM_SequentialWhiles(int num_iters, int num_whiles) {
|
|||||||
|
|
||||||
CopyInsertion copy_insertion;
|
CopyInsertion copy_insertion;
|
||||||
|
|
||||||
tensorflow::testing::StartTiming();
|
state.ResumeTiming();
|
||||||
ASSERT_IS_OK(copy_insertion.Run(&module).status());
|
ASSERT_IS_OK(copy_insertion.Run(&module).status());
|
||||||
tensorflow::testing::StopTiming();
|
state.PauseTiming();
|
||||||
|
|
||||||
// The entry computation should have three copies, and each body has one.
|
// The entry computation should have three copies, and each body has one.
|
||||||
ASSERT_EQ(CountCopies(module), 3 + num_whiles);
|
ASSERT_EQ(CountCopies(module), 3 + num_whiles);
|
||||||
|
state.ResumeTiming();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_ParallelWhiles(int num_iters, int num_whiles) {
|
void BM_ParallelWhiles(::testing::benchmark::State& state) {
|
||||||
|
const int num_whiles = state.range(0);
|
||||||
|
|
||||||
// This benchmark constructs a fan-out of parallel while instructions.
|
// This benchmark constructs a fan-out of parallel while instructions.
|
||||||
tensorflow::testing::StopTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
state.PauseTiming();
|
||||||
HloModuleConfig config;
|
HloModuleConfig config;
|
||||||
config.set_debug_options(GetDebugOptionsFromFlags());
|
config.set_debug_options(GetDebugOptionsFromFlags());
|
||||||
HloModule module("BM_SequentialWhiles", config);
|
HloModule module("BM_SequentialWhiles", config);
|
||||||
@ -2182,9 +2189,9 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) {
|
|||||||
|
|
||||||
CopyInsertion copy_insertion;
|
CopyInsertion copy_insertion;
|
||||||
|
|
||||||
tensorflow::testing::StartTiming();
|
state.ResumeTiming();
|
||||||
ASSERT_IS_OK(copy_insertion.Run(&module).status());
|
ASSERT_IS_OK(copy_insertion.Run(&module).status());
|
||||||
tensorflow::testing::StopTiming();
|
state.PauseTiming();
|
||||||
|
|
||||||
// Each body receives of copy of two of the parameters (the corresponding
|
// Each body receives of copy of two of the parameters (the corresponding
|
||||||
// elements in the body are modified), and there is one copy in each body.
|
// elements in the body are modified), and there is one copy in each body.
|
||||||
@ -2209,14 +2216,15 @@ std::unique_ptr<HloComputation> MakeBenchmarkWhileBody(
|
|||||||
return builder.Build();
|
return builder.Build();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_ManyElementTuple(int num_iters, const int num_tuple_inputs) {
|
void BM_ManyElementTuple(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int num_tuple_inputs = state.range(0);
|
||||||
HloModuleConfig config;
|
HloModuleConfig config;
|
||||||
config.set_debug_options(GetDebugOptionsFromFlags());
|
config.set_debug_options(GetDebugOptionsFromFlags());
|
||||||
CopyInsertion copy_insertion;
|
CopyInsertion copy_insertion;
|
||||||
const Shape element_shape = ShapeUtil::MakeShape(F32, {});
|
const Shape element_shape = ShapeUtil::MakeShape(F32, {});
|
||||||
std::vector<HloInstruction*> tuple_params(num_tuple_inputs);
|
std::vector<HloInstruction*> tuple_params(num_tuple_inputs);
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
for (auto s : state) {
|
||||||
|
state.PauseTiming();
|
||||||
auto builder = HloComputation::Builder("BM_ParallelWhiles");
|
auto builder = HloComputation::Builder("BM_ParallelWhiles");
|
||||||
HloModule module("BM_ManyElementTuple", config);
|
HloModule module("BM_ManyElementTuple", config);
|
||||||
for (int j = 0; j < num_tuple_inputs; ++j) {
|
for (int j = 0; j < num_tuple_inputs; ++j) {
|
||||||
@ -2234,9 +2242,8 @@ void BM_ManyElementTuple(int num_iters, const int num_tuple_inputs) {
|
|||||||
builder.AddInstruction(HloInstruction::CreateGetTupleElement(
|
builder.AddInstruction(HloInstruction::CreateGetTupleElement(
|
||||||
ShapeUtil::MakeShape(F32, {}), xla_while, 0));
|
ShapeUtil::MakeShape(F32, {}), xla_while, 0));
|
||||||
module.AddEntryComputation(builder.Build());
|
module.AddEntryComputation(builder.Build());
|
||||||
tensorflow::testing::StartTiming();
|
state.ResumeTiming();
|
||||||
ASSERT_IS_OK(copy_insertion.Run(&module).status());
|
ASSERT_IS_OK(copy_insertion.Run(&module).status());
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -2545,8 +2545,7 @@ TEST_F(HloEvaluatorPreciseReduceTest, AddReductionPrecisionTest) {
|
|||||||
|
|
||||||
// Reducing many numbers should be fast because it doesn't create
|
// Reducing many numbers should be fast because it doesn't create
|
||||||
// intermediate Literals; the microbenchmark should finish in < 1 msec.
|
// intermediate Literals; the microbenchmark should finish in < 1 msec.
|
||||||
void BM_ReducePrecisely(int num_iters) {
|
void BM_ReducePrecisely(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
HloComputation::Builder b("BM_ReducePrecisely");
|
HloComputation::Builder b("BM_ReducePrecisely");
|
||||||
HloModuleConfig config;
|
HloModuleConfig config;
|
||||||
config.set_debug_options(GetDebugOptionsFromFlags());
|
config.set_debug_options(GetDebugOptionsFromFlags());
|
||||||
@ -2574,10 +2573,11 @@ void BM_ReducePrecisely(int num_iters) {
|
|||||||
/*dimensions_to_reduce=*/{0}, add_func));
|
/*dimensions_to_reduce=*/{0}, add_func));
|
||||||
module.AddEntryComputation(b.Build());
|
module.AddEntryComputation(b.Build());
|
||||||
|
|
||||||
|
// Benchmark loop
|
||||||
|
for (auto s : state) {
|
||||||
HloEvaluator hlo_eval;
|
HloEvaluator hlo_eval;
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
hlo_eval.Evaluate(reduce_instruction).ConsumeValueOrDie();
|
hlo_eval.Evaluate(reduce_instruction).ConsumeValueOrDie();
|
||||||
tensorflow::testing::StopTiming();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK(BM_ReducePrecisely);
|
BENCHMARK(BM_ReducePrecisely);
|
||||||
|
|||||||
@ -173,8 +173,10 @@ TEST(ScopedShapedBufferTest, TestSubShapeTree) {
|
|||||||
|
|
||||||
// Test TakeSubTree with different depths (depth of ShapeTree) and fan-outs
|
// Test TakeSubTree with different depths (depth of ShapeTree) and fan-outs
|
||||||
// (cardinality of each non-leaf node's children).
|
// (cardinality of each non-leaf node's children).
|
||||||
void BM_TakeSubTree(int iters, int depth, int fan_out) {
|
void BM_TakeSubTree(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
TestAllocator allocator;
|
TestAllocator allocator;
|
||||||
xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128});
|
xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
@ -183,13 +185,11 @@ void BM_TakeSubTree(int iters, int depth, int fan_out) {
|
|||||||
}
|
}
|
||||||
xla::ScopedShapedBuffer shaped_buffer(shape, /*allocator=*/&allocator,
|
xla::ScopedShapedBuffer shaped_buffer(shape, /*allocator=*/&allocator,
|
||||||
/*device_ordinal=*/0);
|
/*device_ordinal=*/0);
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
// Extract a buffer from approximately the middle of the first level of the
|
// Extract a buffer from approximately the middle of the first level of the
|
||||||
// tree.
|
// tree.
|
||||||
(void)shaped_buffer.TakeSubTree(/*index=*/{fan_out / 2}).release();
|
(void)shaped_buffer.TakeSubTree(/*index=*/{fan_out / 2}).release();
|
||||||
}
|
}
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK(BM_TakeSubTree)
|
BENCHMARK(BM_TakeSubTree)
|
||||||
|
|||||||
@ -535,94 +535,100 @@ TEST_F(ShapeTreeTest, ReverseIterateOrderLeaves) {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_Construct(int iters, int depth, int fan_out) {
|
void BM_Construct(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
std::vector<xla::Shape> shapes(fan_out, shape);
|
std::vector<xla::Shape> shapes(fan_out, shape);
|
||||||
shape = ShapeUtil::MakeTupleShape(shapes);
|
shape = ShapeUtil::MakeTupleShape(shapes);
|
||||||
}
|
}
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
|
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
ShapeTree<int> shape_tree(shape);
|
ShapeTree<int> shape_tree(shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_ConstructUnowned(int iters, int depth, int fan_out) {
|
void BM_ConstructUnowned(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
std::vector<xla::Shape> shapes(fan_out, shape);
|
std::vector<xla::Shape> shapes(fan_out, shape);
|
||||||
shape = ShapeUtil::MakeTupleShape(shapes);
|
shape = ShapeUtil::MakeTupleShape(shapes);
|
||||||
}
|
}
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
|
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
ShapeTree<int> shape_tree(&shape);
|
ShapeTree<int> shape_tree(&shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_Copy(int iters, int depth, int fan_out) {
|
void BM_Copy(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
std::vector<xla::Shape> shapes(fan_out, shape);
|
std::vector<xla::Shape> shapes(fan_out, shape);
|
||||||
shape = ShapeUtil::MakeTupleShape(shapes);
|
shape = ShapeUtil::MakeTupleShape(shapes);
|
||||||
}
|
}
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
|
|
||||||
ShapeTree<int> shape_tree(shape);
|
ShapeTree<int> shape_tree(shape);
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
ShapeTree<int> copy = shape_tree;
|
ShapeTree<int> copy = shape_tree;
|
||||||
tensorflow::testing::DoNotOptimize(copy);
|
tensorflow::testing::DoNotOptimize(copy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_Move(int iters, int depth, int fan_out) {
|
void BM_Move(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
std::vector<xla::Shape> shapes(fan_out, shape);
|
std::vector<xla::Shape> shapes(fan_out, shape);
|
||||||
shape = ShapeUtil::MakeTupleShape(shapes);
|
shape = ShapeUtil::MakeTupleShape(shapes);
|
||||||
}
|
}
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
|
|
||||||
ShapeTree<int> shape_tree(shape);
|
ShapeTree<int> shape_tree(shape);
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
ShapeTree<int> copy = std::move(shape_tree);
|
ShapeTree<int> copy = std::move(shape_tree);
|
||||||
shape_tree = std::move(copy);
|
shape_tree = std::move(copy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_ForEach(int iters, int depth, int fan_out) {
|
void BM_ForEach(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
std::vector<xla::Shape> shapes(fan_out, shape);
|
std::vector<xla::Shape> shapes(fan_out, shape);
|
||||||
shape = ShapeUtil::MakeTupleShape(shapes);
|
shape = ShapeUtil::MakeTupleShape(shapes);
|
||||||
}
|
}
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
|
|
||||||
ShapeTree<int> shape_tree(shape);
|
ShapeTree<int> shape_tree(shape);
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
shape_tree.ForEachMutableElement([](const ShapeIndex& index, int* data) {
|
shape_tree.ForEachMutableElement([](const ShapeIndex& index, int* data) {
|
||||||
tensorflow::testing::DoNotOptimize(index);
|
tensorflow::testing::DoNotOptimize(index);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_Iterate(int iters, int depth, int fan_out) {
|
void BM_Iterate(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
const int depth = state.range(0);
|
||||||
|
const int fan_out = state.range(1);
|
||||||
|
|
||||||
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
|
||||||
for (int i = 0; i < depth; ++i) {
|
for (int i = 0; i < depth; ++i) {
|
||||||
std::vector<xla::Shape> shapes(fan_out, shape);
|
std::vector<xla::Shape> shapes(fan_out, shape);
|
||||||
shape = ShapeUtil::MakeTupleShape(shapes);
|
shape = ShapeUtil::MakeTupleShape(shapes);
|
||||||
}
|
}
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
|
|
||||||
ShapeTree<int> shape_tree(shape);
|
ShapeTree<int> shape_tree(shape);
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
for (auto& iter : shape_tree) {
|
for (auto& iter : shape_tree) {
|
||||||
tensorflow::testing::DoNotOptimize(iter.second);
|
tensorflow::testing::DoNotOptimize(iter.second);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -824,9 +824,8 @@ XLA_TEST_F(FusionClientLibraryTest, ManyLayoutTransformations) {
|
|||||||
ComputeAndCompare(&b, {});
|
ComputeAndCompare(&b, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_ParallelFusion(int num_iters) {
|
void BM_ParallelFusion(::testing::benchmark::State& state) {
|
||||||
// Simple element-wise computation to benchmark parallel task partitioning.
|
// Simple element-wise computation to benchmark parallel task partitioning.
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
|
|
||||||
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
||||||
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
||||||
@ -915,17 +914,16 @@ void BM_ParallelFusion(int num_iters) {
|
|||||||
const int64 total_bytes = param0_dim0 * param0_dim0 +
|
const int64 total_bytes = param0_dim0 * param0_dim0 +
|
||||||
param1_dim0 * param1_dim0 +
|
param1_dim0 * param1_dim0 +
|
||||||
param2_dim0 * param2_dim0;
|
param2_dim0 * param2_dim0;
|
||||||
tensorflow::testing::BytesProcessed(static_cast<int64>(num_iters) *
|
|
||||||
total_bytes * sizeof(float));
|
for (auto s : state) {
|
||||||
tensorflow::testing::UseRealTime();
|
|
||||||
tensorflow::testing::StartTiming();
|
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
|
||||||
auto result = executable->Run({&buffer0, &buffer1, &buffer2}, options);
|
auto result = executable->Run({&buffer0, &buffer1, &buffer2}, options);
|
||||||
ASSERT_TRUE(result.ok());
|
ASSERT_TRUE(result.ok());
|
||||||
}
|
}
|
||||||
|
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * total_bytes *
|
||||||
|
sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK(BM_ParallelFusion);
|
BENCHMARK(BM_ParallelFusion)->UseRealTime();
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace xla
|
} // namespace xla
|
||||||
|
|||||||
@ -750,9 +750,7 @@ XLA_TEST_F(HloTestBase, AddOfDUS) {
|
|||||||
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0}));
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0}));
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_DynamicSlice(int num_iters) {
|
void BM_DynamicSlice(::testing::benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
|
|
||||||
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
||||||
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
||||||
se::StreamExecutorMemoryAllocator allocator(platform, executors);
|
se::StreamExecutorMemoryAllocator allocator(platform, executors);
|
||||||
@ -817,8 +815,7 @@ void BM_DynamicSlice(int num_iters) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run benchmark.
|
// Run benchmark.
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
|
||||||
auto result = executable->Run(shaped_buffer_ptrs, options);
|
auto result = executable->Run(shaped_buffer_ptrs, options);
|
||||||
ASSERT_TRUE(result.ok());
|
ASSERT_TRUE(result.ok());
|
||||||
}
|
}
|
||||||
|
|||||||
@ -946,9 +946,7 @@ XLA_TEST_F(LocalClientExecuteTest, DISABLED_ON_INTERPRETER(InfeedOutfeedTest)) {
|
|||||||
|
|
||||||
// Benchmark that measures the overhead of the LocalClient API when running a
|
// Benchmark that measures the overhead of the LocalClient API when running a
|
||||||
// trivial computation
|
// trivial computation
|
||||||
void BM_LocalClientOverhead(int num_iters) {
|
void BM_LocalClientOverhead(benchmark::State& state) {
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
|
|
||||||
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
||||||
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
||||||
se::StreamExecutorMemoryAllocator allocator(platform, executors);
|
se::StreamExecutorMemoryAllocator allocator(platform, executors);
|
||||||
@ -990,8 +988,7 @@ void BM_LocalClientOverhead(int num_iters) {
|
|||||||
ASSERT_IS_OK(result);
|
ASSERT_IS_OK(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
|
||||||
auto result = executable->Run({&buffer}, run_options);
|
auto result = executable->Run({&buffer}, run_options);
|
||||||
ASSERT_IS_OK(result);
|
ASSERT_IS_OK(result);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -357,8 +357,8 @@ class TransferDeviceToHostBenchmark : public TransferManagerTest {
|
|||||||
using TransferManagerTest::TransferManagerTest;
|
using TransferManagerTest::TransferManagerTest;
|
||||||
~TransferDeviceToHostBenchmark() override {}
|
~TransferDeviceToHostBenchmark() override {}
|
||||||
|
|
||||||
void Run(int iters, int num_tuple_elements, int array_size) {
|
void Run(::testing::benchmark::State& state, int num_tuple_elements,
|
||||||
tensorflow::testing::StopTiming();
|
int array_size) {
|
||||||
SetUp();
|
SetUp();
|
||||||
|
|
||||||
std::vector<Literal> tuple_elements;
|
std::vector<Literal> tuple_elements;
|
||||||
@ -370,13 +370,11 @@ class TransferDeviceToHostBenchmark : public TransferManagerTest {
|
|||||||
auto device_buffer = AllocateDeviceBuffer(literal.shape());
|
auto device_buffer = AllocateDeviceBuffer(literal.shape());
|
||||||
TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice(stream_, literal,
|
TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice(stream_, literal,
|
||||||
device_buffer));
|
device_buffer));
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < iters; ++i) {
|
|
||||||
TF_ASSERT_OK_AND_ASSIGN(
|
TF_ASSERT_OK_AND_ASSIGN(
|
||||||
Literal result,
|
Literal result,
|
||||||
transfer_manager_->TransferLiteralFromDevice(stream_, device_buffer));
|
transfer_manager_->TransferLiteralFromDevice(stream_, device_buffer));
|
||||||
}
|
}
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
TearDown();
|
TearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -388,7 +386,8 @@ class TransferHostToDeviceBenchmark : public TransferManagerTest {
|
|||||||
using TransferManagerTest::TransferManagerTest;
|
using TransferManagerTest::TransferManagerTest;
|
||||||
~TransferHostToDeviceBenchmark() override {}
|
~TransferHostToDeviceBenchmark() override {}
|
||||||
|
|
||||||
void Run(int iters, int num_tuple_elements, int array_size) {
|
void Run(::testing::benchmark::State& state, int num_tuple_elements,
|
||||||
|
int array_size) {
|
||||||
tensorflow::testing::StopTiming();
|
tensorflow::testing::StopTiming();
|
||||||
SetUp();
|
SetUp();
|
||||||
|
|
||||||
@ -400,7 +399,7 @@ class TransferHostToDeviceBenchmark : public TransferManagerTest {
|
|||||||
Literal literal = LiteralUtil::MakeTupleOwned(std::move(tuple_elements));
|
Literal literal = LiteralUtil::MakeTupleOwned(std::move(tuple_elements));
|
||||||
auto device_buffer = AllocateDeviceBuffer(literal.shape());
|
auto device_buffer = AllocateDeviceBuffer(literal.shape());
|
||||||
tensorflow::testing::StartTiming();
|
tensorflow::testing::StartTiming();
|
||||||
for (int i = 0; i < iters; ++i) {
|
for (auto s : state) {
|
||||||
TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice(stream_, literal,
|
TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice(stream_, literal,
|
||||||
device_buffer));
|
device_buffer));
|
||||||
}
|
}
|
||||||
@ -411,16 +410,20 @@ class TransferHostToDeviceBenchmark : public TransferManagerTest {
|
|||||||
void TestBody() override {}
|
void TestBody() override {}
|
||||||
};
|
};
|
||||||
|
|
||||||
void BM_TransferDeviceToHost(int iters, int num_tuple_elements,
|
void BM_TransferDeviceToHost(::testing::benchmark::State& state) {
|
||||||
int array_size) {
|
const int num_tuple_elements = state.range(0);
|
||||||
|
const int array_size = state.range(1);
|
||||||
|
|
||||||
TransferDeviceToHostBenchmark bm;
|
TransferDeviceToHostBenchmark bm;
|
||||||
bm.Run(iters, num_tuple_elements, array_size);
|
bm.Run(state, num_tuple_elements, array_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_TransferHostToDevice(int iters, int num_tuple_elements,
|
void BM_TransferHostToDevice(::testing::benchmark::State& state) {
|
||||||
int array_size) {
|
const int num_tuple_elements = state.range(0);
|
||||||
|
const int array_size = state.range(1);
|
||||||
|
|
||||||
TransferHostToDeviceBenchmark bm;
|
TransferHostToDeviceBenchmark bm;
|
||||||
bm.Run(iters, num_tuple_elements, array_size);
|
bm.Run(state, num_tuple_elements, array_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK(BM_TransferHostToDevice)
|
BENCHMARK(BM_TransferHostToDevice)
|
||||||
|
|||||||
@ -1259,9 +1259,8 @@ XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) {
|
|||||||
ComputeAndCompareR0<int32>(&builder, 2, {});
|
ComputeAndCompareR0<int32>(&builder, 2, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BM_WhileLoop(int num_iters) {
|
void BM_WhileLoop(::testing::benchmark::State& state) {
|
||||||
// Benchmark a simple kernel to measure while loop overheads.
|
// Benchmark a simple kernel to measure while loop overheads.
|
||||||
tensorflow::testing::StopTiming();
|
|
||||||
|
|
||||||
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
|
||||||
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
|
||||||
@ -1330,8 +1329,7 @@ void BM_WhileLoop(int num_iters) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run benchmark.
|
// Run benchmark.
|
||||||
tensorflow::testing::StartTiming();
|
for (auto s : state) {
|
||||||
for (int i = 0; i < num_iters; ++i) {
|
|
||||||
auto result =
|
auto result =
|
||||||
executable->Run(absl::Span<const ShapedBuffer* const>(), options);
|
executable->Run(absl::Span<const ShapedBuffer* const>(), options);
|
||||||
ASSERT_TRUE(result.ok());
|
ASSERT_TRUE(result.ok());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user