Internal tests cleanup.

PiperOrigin-RevId: 339456235
Change-Id: Ia960a93771ef371256dc10078a39421ca1faeb14
This commit is contained in:
A. Unique TensorFlower 2020-10-28 07:36:09 -07:00 committed by TensorFlower Gardener
parent 08af2ba27c
commit fbac0a99f7
9 changed files with 91 additions and 85 deletions

View File

@ -2100,10 +2100,14 @@ std::unique_ptr<HloComputation> MakeBenchmarkWhileBody() {
return builder.Build();
}
void BM_SequentialWhiles(int num_iters, int num_whiles) {
void BM_SequentialWhiles(::testing::benchmark::State& state) {
const int num_whiles = state.range(0);
// This benchmark constructs a chain of sequential while instructions.
tensorflow::testing::StopTiming();
for (int i = 0; i < num_iters; ++i) {
// Timer starts automatically at the first iteration of this loop
// and ends after the last one.
for (auto s : state) {
state.PauseTiming();
HloModuleConfig config;
config.set_debug_options(GetDebugOptionsFromFlags());
HloModule module("BM_SequentialWhiles", config);
@ -2131,19 +2135,22 @@ void BM_SequentialWhiles(int num_iters, int num_whiles) {
CopyInsertion copy_insertion;
tensorflow::testing::StartTiming();
state.ResumeTiming();
ASSERT_IS_OK(copy_insertion.Run(&module).status());
tensorflow::testing::StopTiming();
state.PauseTiming();
// The entry computation should have three copies, and each body has one.
ASSERT_EQ(CountCopies(module), 3 + num_whiles);
state.ResumeTiming();
}
}
void BM_ParallelWhiles(int num_iters, int num_whiles) {
void BM_ParallelWhiles(::testing::benchmark::State& state) {
const int num_whiles = state.range(0);
// This benchmark constructs a fan-out of parallel while instructions.
tensorflow::testing::StopTiming();
for (int i = 0; i < num_iters; ++i) {
for (auto s : state) {
state.PauseTiming();
HloModuleConfig config;
config.set_debug_options(GetDebugOptionsFromFlags());
HloModule module("BM_SequentialWhiles", config);
@ -2182,9 +2189,9 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) {
CopyInsertion copy_insertion;
tensorflow::testing::StartTiming();
state.ResumeTiming();
ASSERT_IS_OK(copy_insertion.Run(&module).status());
tensorflow::testing::StopTiming();
state.PauseTiming();
// Each body receives of copy of two of the parameters (the corresponding
// elements in the body are modified), and there is one copy in each body.
@ -2209,14 +2216,15 @@ std::unique_ptr<HloComputation> MakeBenchmarkWhileBody(
return builder.Build();
}
void BM_ManyElementTuple(int num_iters, const int num_tuple_inputs) {
tensorflow::testing::StopTiming();
void BM_ManyElementTuple(::testing::benchmark::State& state) {
const int num_tuple_inputs = state.range(0);
HloModuleConfig config;
config.set_debug_options(GetDebugOptionsFromFlags());
CopyInsertion copy_insertion;
const Shape element_shape = ShapeUtil::MakeShape(F32, {});
std::vector<HloInstruction*> tuple_params(num_tuple_inputs);
for (int i = 0; i < num_iters; ++i) {
for (auto s : state) {
state.PauseTiming();
auto builder = HloComputation::Builder("BM_ParallelWhiles");
HloModule module("BM_ManyElementTuple", config);
for (int j = 0; j < num_tuple_inputs; ++j) {
@ -2234,9 +2242,8 @@ void BM_ManyElementTuple(int num_iters, const int num_tuple_inputs) {
builder.AddInstruction(HloInstruction::CreateGetTupleElement(
ShapeUtil::MakeShape(F32, {}), xla_while, 0));
module.AddEntryComputation(builder.Build());
tensorflow::testing::StartTiming();
state.ResumeTiming();
ASSERT_IS_OK(copy_insertion.Run(&module).status());
tensorflow::testing::StopTiming();
}
}

View File

@ -2545,8 +2545,7 @@ TEST_F(HloEvaluatorPreciseReduceTest, AddReductionPrecisionTest) {
// Reducing many numbers should be fast because it doesn't create
// intermediate Literals; the microbenchmark should finish in < 1 msec.
void BM_ReducePrecisely(int num_iters) {
tensorflow::testing::StopTiming();
void BM_ReducePrecisely(::testing::benchmark::State& state) {
HloComputation::Builder b("BM_ReducePrecisely");
HloModuleConfig config;
config.set_debug_options(GetDebugOptionsFromFlags());
@ -2574,10 +2573,11 @@ void BM_ReducePrecisely(int num_iters) {
/*dimensions_to_reduce=*/{0}, add_func));
module.AddEntryComputation(b.Build());
HloEvaluator hlo_eval;
tensorflow::testing::StartTiming();
hlo_eval.Evaluate(reduce_instruction).ConsumeValueOrDie();
tensorflow::testing::StopTiming();
// Benchmark loop
for (auto s : state) {
HloEvaluator hlo_eval;
hlo_eval.Evaluate(reduce_instruction).ConsumeValueOrDie();
}
}
BENCHMARK(BM_ReducePrecisely);

View File

@ -173,8 +173,10 @@ TEST(ScopedShapedBufferTest, TestSubShapeTree) {
// Test TakeSubTree with different depths (depth of ShapeTree) and fan-outs
// (cardinality of each non-leaf node's children).
void BM_TakeSubTree(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_TakeSubTree(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
TestAllocator allocator;
xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
@ -183,13 +185,11 @@ void BM_TakeSubTree(int iters, int depth, int fan_out) {
}
xla::ScopedShapedBuffer shaped_buffer(shape, /*allocator=*/&allocator,
/*device_ordinal=*/0);
tensorflow::testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
// Extract a buffer from approximately the middle of the first level of the
// tree.
(void)shaped_buffer.TakeSubTree(/*index=*/{fan_out / 2}).release();
}
tensorflow::testing::StopTiming();
}
BENCHMARK(BM_TakeSubTree)

View File

@ -535,94 +535,100 @@ TEST_F(ShapeTreeTest, ReverseIterateOrderLeaves) {
}));
}
void BM_Construct(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_Construct(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
std::vector<xla::Shape> shapes(fan_out, shape);
shape = ShapeUtil::MakeTupleShape(shapes);
}
tensorflow::testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
ShapeTree<int> shape_tree(shape);
}
}
void BM_ConstructUnowned(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_ConstructUnowned(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
std::vector<xla::Shape> shapes(fan_out, shape);
shape = ShapeUtil::MakeTupleShape(shapes);
}
tensorflow::testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
ShapeTree<int> shape_tree(&shape);
}
}
void BM_Copy(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_Copy(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
std::vector<xla::Shape> shapes(fan_out, shape);
shape = ShapeUtil::MakeTupleShape(shapes);
}
tensorflow::testing::StartTiming();
ShapeTree<int> shape_tree(shape);
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
ShapeTree<int> copy = shape_tree;
tensorflow::testing::DoNotOptimize(copy);
}
}
void BM_Move(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_Move(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
std::vector<xla::Shape> shapes(fan_out, shape);
shape = ShapeUtil::MakeTupleShape(shapes);
}
tensorflow::testing::StartTiming();
ShapeTree<int> shape_tree(shape);
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
ShapeTree<int> copy = std::move(shape_tree);
shape_tree = std::move(copy);
}
}
void BM_ForEach(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_ForEach(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
std::vector<xla::Shape> shapes(fan_out, shape);
shape = ShapeUtil::MakeTupleShape(shapes);
}
tensorflow::testing::StartTiming();
ShapeTree<int> shape_tree(shape);
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
shape_tree.ForEachMutableElement([](const ShapeIndex& index, int* data) {
tensorflow::testing::DoNotOptimize(index);
});
}
}
void BM_Iterate(int iters, int depth, int fan_out) {
tensorflow::testing::StopTiming();
void BM_Iterate(::testing::benchmark::State& state) {
const int depth = state.range(0);
const int fan_out = state.range(1);
Shape shape = ShapeUtil::MakeShape(F32, {32, 64, 128});
for (int i = 0; i < depth; ++i) {
std::vector<xla::Shape> shapes(fan_out, shape);
shape = ShapeUtil::MakeTupleShape(shapes);
}
tensorflow::testing::StartTiming();
ShapeTree<int> shape_tree(shape);
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
for (auto& iter : shape_tree) {
tensorflow::testing::DoNotOptimize(iter.second);
}

View File

@ -824,9 +824,8 @@ XLA_TEST_F(FusionClientLibraryTest, ManyLayoutTransformations) {
ComputeAndCompare(&b, {});
}
void BM_ParallelFusion(int num_iters) {
void BM_ParallelFusion(::testing::benchmark::State& state) {
// Simple element-wise computation to benchmark parallel task partitioning.
tensorflow::testing::StopTiming();
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
@ -915,17 +914,16 @@ void BM_ParallelFusion(int num_iters) {
const int64 total_bytes = param0_dim0 * param0_dim0 +
param1_dim0 * param1_dim0 +
param2_dim0 * param2_dim0;
tensorflow::testing::BytesProcessed(static_cast<int64>(num_iters) *
total_bytes * sizeof(float));
tensorflow::testing::UseRealTime();
tensorflow::testing::StartTiming();
for (int i = 0; i < num_iters; ++i) {
for (auto s : state) {
auto result = executable->Run({&buffer0, &buffer1, &buffer2}, options);
ASSERT_TRUE(result.ok());
}
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * total_bytes *
sizeof(float));
}
BENCHMARK(BM_ParallelFusion);
BENCHMARK(BM_ParallelFusion)->UseRealTime();
} // namespace
} // namespace xla

View File

@ -750,9 +750,7 @@ XLA_TEST_F(HloTestBase, AddOfDUS) {
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0}));
}
void BM_DynamicSlice(int num_iters) {
tensorflow::testing::StopTiming();
void BM_DynamicSlice(::testing::benchmark::State& state) {
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
se::StreamExecutorMemoryAllocator allocator(platform, executors);
@ -817,8 +815,7 @@ void BM_DynamicSlice(int num_iters) {
}
// Run benchmark.
tensorflow::testing::StartTiming();
for (int i = 0; i < num_iters; ++i) {
for (auto s : state) {
auto result = executable->Run(shaped_buffer_ptrs, options);
ASSERT_TRUE(result.ok());
}

View File

@ -946,9 +946,7 @@ XLA_TEST_F(LocalClientExecuteTest, DISABLED_ON_INTERPRETER(InfeedOutfeedTest)) {
// Benchmark that measures the overhead of the LocalClient API when running a
// trivial computation
void BM_LocalClientOverhead(int num_iters) {
tensorflow::testing::StopTiming();
void BM_LocalClientOverhead(benchmark::State& state) {
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
se::StreamExecutorMemoryAllocator allocator(platform, executors);
@ -990,8 +988,7 @@ void BM_LocalClientOverhead(int num_iters) {
ASSERT_IS_OK(result);
}
tensorflow::testing::StartTiming();
for (int i = 0; i < num_iters; ++i) {
for (auto s : state) {
auto result = executable->Run({&buffer}, run_options);
ASSERT_IS_OK(result);
}

View File

@ -357,8 +357,8 @@ class TransferDeviceToHostBenchmark : public TransferManagerTest {
using TransferManagerTest::TransferManagerTest;
~TransferDeviceToHostBenchmark() override {}
void Run(int iters, int num_tuple_elements, int array_size) {
tensorflow::testing::StopTiming();
void Run(::testing::benchmark::State& state, int num_tuple_elements,
int array_size) {
SetUp();
std::vector<Literal> tuple_elements;
@ -370,13 +370,11 @@ class TransferDeviceToHostBenchmark : public TransferManagerTest {
auto device_buffer = AllocateDeviceBuffer(literal.shape());
TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice(stream_, literal,
device_buffer));
tensorflow::testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
TF_ASSERT_OK_AND_ASSIGN(
Literal result,
transfer_manager_->TransferLiteralFromDevice(stream_, device_buffer));
}
tensorflow::testing::StopTiming();
TearDown();
}
@ -388,7 +386,8 @@ class TransferHostToDeviceBenchmark : public TransferManagerTest {
using TransferManagerTest::TransferManagerTest;
~TransferHostToDeviceBenchmark() override {}
void Run(int iters, int num_tuple_elements, int array_size) {
void Run(::testing::benchmark::State& state, int num_tuple_elements,
int array_size) {
tensorflow::testing::StopTiming();
SetUp();
@ -400,7 +399,7 @@ class TransferHostToDeviceBenchmark : public TransferManagerTest {
Literal literal = LiteralUtil::MakeTupleOwned(std::move(tuple_elements));
auto device_buffer = AllocateDeviceBuffer(literal.shape());
tensorflow::testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
TF_CHECK_OK(transfer_manager_->TransferLiteralToDevice(stream_, literal,
device_buffer));
}
@ -411,16 +410,20 @@ class TransferHostToDeviceBenchmark : public TransferManagerTest {
void TestBody() override {}
};
void BM_TransferDeviceToHost(int iters, int num_tuple_elements,
int array_size) {
void BM_TransferDeviceToHost(::testing::benchmark::State& state) {
const int num_tuple_elements = state.range(0);
const int array_size = state.range(1);
TransferDeviceToHostBenchmark bm;
bm.Run(iters, num_tuple_elements, array_size);
bm.Run(state, num_tuple_elements, array_size);
}
void BM_TransferHostToDevice(int iters, int num_tuple_elements,
int array_size) {
void BM_TransferHostToDevice(::testing::benchmark::State& state) {
const int num_tuple_elements = state.range(0);
const int array_size = state.range(1);
TransferHostToDeviceBenchmark bm;
bm.Run(iters, num_tuple_elements, array_size);
bm.Run(state, num_tuple_elements, array_size);
}
BENCHMARK(BM_TransferHostToDevice)

View File

@ -1259,9 +1259,8 @@ XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) {
ComputeAndCompareR0<int32>(&builder, 2, {});
}
void BM_WhileLoop(int num_iters) {
void BM_WhileLoop(::testing::benchmark::State& state) {
// Benchmark a simple kernel to measure while loop overheads.
tensorflow::testing::StopTiming();
se::Platform* platform = PlatformUtil::GetDefaultPlatform().ValueOrDie();
auto executors = PlatformUtil::GetStreamExecutors(platform).ValueOrDie();
@ -1330,8 +1329,7 @@ void BM_WhileLoop(int num_iters) {
}
// Run benchmark.
tensorflow::testing::StartTiming();
for (int i = 0; i < num_iters; ++i) {
for (auto s : state) {
auto result =
executable->Run(absl::Span<const ShapedBuffer* const>(), options);
ASSERT_TRUE(result.ok());