From 65f038e14d2cfce350cd0a825a5f204e2821c058 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Feb 2021 13:57:57 -0800 Subject: [PATCH] Update benchmarks to newer API PiperOrigin-RevId: 356584743 Change-Id: I3c50bc0be4482171533b948b2a44c8eade403578 --- .../gpu/gpu_bfc_allocator_test.cc | 74 +++++++++++-------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 889bc2a73a0..a18b141e07d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -333,7 +333,7 @@ TEST_P(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { INSTANTIATE_TEST_SUITE_P(GPUBFCAllocatorTestSuite, GPUBFCAllocatorTest, TestSuiteValues()); -static void BM_Allocation(int iters) { +static void BM_Allocation(::testing::benchmark::State& state) { GPUBFCAllocator a(CreateSubAllocator(1ul << 36), 1uLL << 33, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 524288, @@ -341,7 +341,7 @@ static void BM_Allocation(int iters) { 1048576000, 2048576000}; int size_index = 0; - while (--iters > 0) { + for (auto s : state) { size_t bytes = sizes[size_index++ % sizes.size()]; void* p = a.AllocateRaw(1, bytes); a.DeallocateRaw(p); @@ -349,43 +349,53 @@ static void BM_Allocation(int iters) { } BENCHMARK(BM_Allocation); -static void BM_AllocationThreaded(int iters, int num_threads) { - GPUBFCAllocator a(CreateSubAllocator(1ul << 36), 1uLL << 33, "GPU_0_bfc"); - thread::ThreadPool pool(Env::Default(), "test", num_threads); - std::atomic_int_fast32_t count(iters); - mutex done_lock; - condition_variable done; - bool done_flag = false; +static void BM_AllocationThreaded(::testing::benchmark::State& state) { + int num_threads = state.range(0); + int sub_iters = 500; // Pick a reasonably large number. - for (int t = 0; t < num_threads; t++) { - pool.Schedule([&a, &count, &done_lock, &done, &done_flag, iters]() { - // Exercise a few different allocation sizes - std::vector sizes = {256, 4096, 16384, 524288, - 512, 1048576, 10485760, 104857600}; - int size_index = 0; - for (int i = 0; i < iters; i++) { - int bytes = sizes[size_index++ % sizes.size()]; - void* p = a.AllocateRaw(1, bytes); - a.DeallocateRaw(p); - if (count.fetch_sub(1) == 1) { - mutex_lock l(done_lock); - done_flag = true; - done.notify_all(); - break; + for (auto s : state) { + state.PauseTiming(); + GPUBFCAllocator a(CreateSubAllocator(1ul << 36), 1uLL << 33, "GPU_0_bfc"); + thread::ThreadPool pool(Env::Default(), "test", num_threads); + + std::atomic_int_fast32_t count(sub_iters); + mutex done_lock; + condition_variable done; + bool done_flag = false; + state.ResumeTiming(); + for (int t = 0; t < num_threads; t++) { + pool.Schedule([&a, &count, &done_lock, &done, &done_flag, sub_iters]() { + // Exercise a few different allocation sizes + std::vector sizes = {256, 4096, 16384, 524288, + 512, 1048576, 10485760, 104857600}; + int size_index = 0; + for (int i = 0; i < sub_iters; i++) { + int bytes = sizes[size_index++ % sizes.size()]; + void* p = a.AllocateRaw(1, bytes); + a.DeallocateRaw(p); + if (count.fetch_sub(1) == 1) { + mutex_lock l(done_lock); + done_flag = true; + done.notify_all(); + break; + } } - } - }); - } - mutex_lock l(done_lock); - if (!done_flag) { - done.wait(l); + }); + } + + mutex_lock l(done_lock); + if (!done_flag) { + done.wait(l); + } } } + BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16); // A more complex benchmark that defers deallocation of an object for // "delay" allocations. -static void BM_AllocationDelayed(int iters, int delay) { +static void BM_AllocationDelayed(::testing::benchmark::State& state) { + int delay = state.range(0); GPUBFCAllocator a(CreateSubAllocator(1ull << 32), 1 << 30, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 4096, 512, 1024, 1024}; @@ -397,7 +407,7 @@ static void BM_AllocationDelayed(int iters, int delay) { ptrs.push_back(nullptr); } int pindex = 0; - while (--iters > 0) { + for (auto s : state) { if (ptrs[pindex] != nullptr) { a.DeallocateRaw(ptrs[pindex]); ptrs[pindex] = nullptr;