diff --git a/tensorflow/core/kernels/conv_grad_filter_ops_benchmark_test.cc b/tensorflow/core/kernels/conv_grad_filter_ops_benchmark_test.cc index 97148945331..0be09d8e614 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops_benchmark_test.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops_benchmark_test.cc @@ -116,12 +116,15 @@ static Graph* Conv2DBackpropFilter(int batch, int height, int width, #define BM_Conv2DBwdFilter(T, FMT, N, H, W, C, FH, FW, FC, SH, SW, PADDING, \ type) \ static void BM_NAME(BM_Conv2DBackpropFilter, type, T, FMT, N, H, W, C, FH, \ - FW, FC, SH, SW, PADDING)(int iters) { \ - testing::ItemsProcessed(static_cast(iters) * (N) * (H) * (W) * \ - (C)); \ - test::Benchmark(#type, Conv2DBackpropFilter(N, H, W, C, FH, FW, FC, SH, \ - SW, PADDING, FORMAT_##FMT)) \ - .Run(iters); \ + FW, FC, SH, SW, \ + PADDING)(::testing::benchmark::State & state) { \ + test::Benchmark(#type, \ + Conv2DBackpropFilter(N, H, W, C, FH, FW, FC, SH, SW, \ + PADDING, FORMAT_##FMT), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(static_cast(state.iterations()) * (N) * \ + (H) * (W) * (C)); \ } \ BENCHMARK(BM_NAME(BM_Conv2DBackpropFilter, type, T, FMT, N, H, W, C, FH, FW, \ FC, SH, SW, PADDING)); diff --git a/tensorflow/core/kernels/conv_grad_input_ops_benchmark_test.cc b/tensorflow/core/kernels/conv_grad_input_ops_benchmark_test.cc index 713c935dcf7..575e914d944 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops_benchmark_test.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops_benchmark_test.cc @@ -84,9 +84,9 @@ static Graph* Conv2DBackpropInput(int batch, int height, int width, .Input(backprop) .Attr("T", DataTypeToEnum::value) .Attr("strides", {1, stride_h, stride_w, 1}) - .Attr("padding", padding == Padding::SAME - ? "SAME" - : padding == Padding::VALID ? "VALID" : "N/A") + .Attr("padding", padding == Padding::SAME ? "SAME" + : padding == Padding::VALID ? "VALID" + : "N/A") .Attr("data_format", ToString(data_format)) .Finalize(graph, &conv2d)); @@ -115,12 +115,14 @@ static Graph* Conv2DBackpropInput(int batch, int height, int width, #define BM_Conv2DBwdInput(T, FMT, N, H, W, C, FW, FH, FC, SH, SW, PADDING, \ type) \ static void BM_NAME(BM_Conv2DBackpropInput, type, T, FMT, N, H, W, C, FH, \ - FW, FC, SH, SW, PADDING)(int iters) { \ - testing::ItemsProcessed(static_cast(iters) * (N) * (H) * (W) * \ - (C)); \ - test::Benchmark(#type, Conv2DBackpropInput(N, H, W, C, FH, FW, FC, SH, \ - SW, PADDING, FORMAT_##FMT)) \ - .Run(iters); \ + FW, FC, SH, SW, \ + PADDING)(::testing::benchmark::State & state) { \ + test::Benchmark(#type, \ + Conv2DBackpropInput(N, H, W, C, FH, FW, FC, SH, SW, \ + PADDING, FORMAT_##FMT), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(state.iterations() * (N) * (H) * (W) * (C)); \ } \ BENCHMARK(BM_NAME(BM_Conv2DBackpropInput, type, T, FMT, N, H, W, C, FH, FW, \ FC, SH, SW, PADDING)); diff --git a/tensorflow/core/kernels/data/experimental/snapshot_util_test.cc b/tensorflow/core/kernels/data/experimental/snapshot_util_test.cc index e253014bf94..83a5b40b24b 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_util_test.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_util_test.cc @@ -91,10 +91,8 @@ TEST(SnapshotUtilTest, CombinationRoundTripTest) { SnapshotRoundTrip(io::compression::kSnappy, 2); } -void SnapshotReaderBenchmarkLoop(int iters, std::string compression_type, - int version) { - tensorflow::testing::StopTiming(); - +void SnapshotReaderBenchmarkLoop(::testing::benchmark::State& state, + std::string compression_type, int version) { tensorflow::DataTypeVector dtypes; std::vector tensors; GenerateTensorVector(dtypes, tensors); @@ -106,7 +104,7 @@ void SnapshotReaderBenchmarkLoop(int iters, std::string compression_type, TF_ASSERT_OK(Writer::Create(tensorflow::Env::Default(), filename, compression_type, version, dtypes, &writer)); - for (int i = 0; i < iters; ++i) { + for (auto s : state) { writer->WriteTensors(tensors).IgnoreError(); } TF_ASSERT_OK(writer->Close()); @@ -115,34 +113,32 @@ void SnapshotReaderBenchmarkLoop(int iters, std::string compression_type, TF_ASSERT_OK(Reader::Create(Env::Default(), filename, compression_type, version, dtypes, &reader)); - tensorflow::testing::StartTiming(); - for (int i = 0; i < iters; ++i) { + for (auto s : state) { std::vector read_tensors; reader->ReadTensors(&read_tensors).IgnoreError(); } - tensorflow::testing::StopTiming(); TF_ASSERT_OK(Env::Default()->DeleteFile(filename)); } -void SnapshotCustomReaderNoneBenchmark(int iters) { - SnapshotReaderBenchmarkLoop(iters, io::compression::kNone, 1); +void SnapshotCustomReaderNoneBenchmark(::testing::benchmark::State& state) { + SnapshotReaderBenchmarkLoop(state, io::compression::kNone, 1); } -void SnapshotCustomReaderGzipBenchmark(int iters) { - SnapshotReaderBenchmarkLoop(iters, io::compression::kGzip, 1); +void SnapshotCustomReaderGzipBenchmark(::testing::benchmark::State& state) { + SnapshotReaderBenchmarkLoop(state, io::compression::kGzip, 1); } -void SnapshotCustomReaderSnappyBenchmark(int iters) { - SnapshotReaderBenchmarkLoop(iters, io::compression::kSnappy, 1); +void SnapshotCustomReaderSnappyBenchmark(::testing::benchmark::State& state) { + SnapshotReaderBenchmarkLoop(state, io::compression::kSnappy, 1); } -void SnapshotTFRecordReaderNoneBenchmark(int iters) { - SnapshotReaderBenchmarkLoop(iters, io::compression::kNone, 2); +void SnapshotTFRecordReaderNoneBenchmark(::testing::benchmark::State& state) { + SnapshotReaderBenchmarkLoop(state, io::compression::kNone, 2); } -void SnapshotTFRecordReaderGzipBenchmark(int iters) { - SnapshotReaderBenchmarkLoop(iters, io::compression::kGzip, 2); +void SnapshotTFRecordReaderGzipBenchmark(::testing::benchmark::State& state) { + SnapshotReaderBenchmarkLoop(state, io::compression::kGzip, 2); } BENCHMARK(SnapshotCustomReaderNoneBenchmark); @@ -151,10 +147,8 @@ BENCHMARK(SnapshotCustomReaderSnappyBenchmark); BENCHMARK(SnapshotTFRecordReaderNoneBenchmark); BENCHMARK(SnapshotTFRecordReaderGzipBenchmark); -void SnapshotWriterBenchmarkLoop(int iters, std::string compression_type, - int version) { - tensorflow::testing::StopTiming(); - +void SnapshotWriterBenchmarkLoop(::testing::benchmark::State& state, + std::string compression_type, int version) { tensorflow::DataTypeVector dtypes; std::vector tensors; GenerateTensorVector(dtypes, tensors); @@ -166,38 +160,36 @@ void SnapshotWriterBenchmarkLoop(int iters, std::string compression_type, TF_ASSERT_OK(Writer::Create(tensorflow::Env::Default(), filename, compression_type, version, dtypes, &writer)); - tensorflow::testing::StartTiming(); - for (int i = 0; i < iters; ++i) { + for (auto s : state) { writer->WriteTensors(tensors).IgnoreError(); } writer->Close().IgnoreError(); - tensorflow::testing::StopTiming(); TF_ASSERT_OK(Env::Default()->DeleteFile(filename)); } -void SnapshotCustomWriterNoneBenchmark(int iters) { - SnapshotWriterBenchmarkLoop(iters, io::compression::kNone, 1); +void SnapshotCustomWriterNoneBenchmark(::testing::benchmark::State& state) { + SnapshotWriterBenchmarkLoop(state, io::compression::kNone, 1); } -void SnapshotCustomWriterGzipBenchmark(int iters) { - SnapshotWriterBenchmarkLoop(iters, io::compression::kGzip, 1); +void SnapshotCustomWriterGzipBenchmark(::testing::benchmark::State& state) { + SnapshotWriterBenchmarkLoop(state, io::compression::kGzip, 1); } -void SnapshotCustomWriterSnappyBenchmark(int iters) { - SnapshotWriterBenchmarkLoop(iters, io::compression::kSnappy, 1); +void SnapshotCustomWriterSnappyBenchmark(::testing::benchmark::State& state) { + SnapshotWriterBenchmarkLoop(state, io::compression::kSnappy, 1); } -void SnapshotTFRecordWriterNoneBenchmark(int iters) { - SnapshotWriterBenchmarkLoop(iters, io::compression::kNone, 2); +void SnapshotTFRecordWriterNoneBenchmark(::testing::benchmark::State& state) { + SnapshotWriterBenchmarkLoop(state, io::compression::kNone, 2); } -void SnapshotTFRecordWriterGzipBenchmark(int iters) { - SnapshotWriterBenchmarkLoop(iters, io::compression::kGzip, 2); +void SnapshotTFRecordWriterGzipBenchmark(::testing::benchmark::State& state) { + SnapshotWriterBenchmarkLoop(state, io::compression::kGzip, 2); } -void SnapshotTFRecordWriterSnappyBenchmark(int iters) { - SnapshotWriterBenchmarkLoop(iters, io::compression::kSnappy, 2); +void SnapshotTFRecordWriterSnappyBenchmark(::testing::benchmark::State& state) { + SnapshotWriterBenchmarkLoop(state, io::compression::kSnappy, 2); } BENCHMARK(SnapshotCustomWriterNoneBenchmark); diff --git a/tensorflow/core/kernels/eigen_benchmark.h b/tensorflow/core/kernels/eigen_benchmark.h index 87e41b89b3d..8b35bfdcd64 100644 --- a/tensorflow/core/kernels/eigen_benchmark.h +++ b/tensorflow/core/kernels/eigen_benchmark.h @@ -35,8 +35,9 @@ class SpatialConvolutionBenchmarksSuite { using Dimensions = Eigen::DSizes; - SpatialConvolutionBenchmarksSuite(int iters, Device& device) - : iters_(iters), device_(device) {} + SpatialConvolutionBenchmarksSuite(::testing::benchmark::State& state, + Device& device) + : state_(state), device_(device) {} Eigen::Index BufferSize(const Dimensions& dims) { return dims.TotalSize() * sizeof(Scalar); @@ -62,12 +63,10 @@ class SpatialConvolutionBenchmarksSuite { Filter filter(filter_data, filter_dims); Output output(output_data, output_dims); - ::tensorflow::testing::StartTiming(); - for (int i = 0; i < iters_; ++i) { + for (auto s : state_) { output.device(device_) = Eigen::SpatialConvolution(input, filter); tensorflow::testing::DoNotOptimize(output); } - ::tensorflow::testing::StopTiming(); device_.deallocate(input_data); device_.deallocate(filter_data); @@ -102,13 +101,11 @@ class SpatialConvolutionBenchmarksSuite { OutputBackward output_backward(output_backward_data, output_dims); InputBackward input_backward(input_backward_data, input_dims); - ::tensorflow::testing::StartTiming(); - for (int i = 0; i < iters_; ++i) { + for (auto s : state_) { input_backward.device(device_) = Eigen::SpatialConvolutionBackwardInput( filter, output_backward, input_rows, input_cols); tensorflow::testing::DoNotOptimize(input_backward); } - ::tensorflow::testing::StopTiming(); device_.deallocate(filter_data); device_.deallocate(output_backward_data); @@ -143,13 +140,11 @@ class SpatialConvolutionBenchmarksSuite { OutputBackward output_backward(output_backward_data, input_dims); FilterBackward filter_backward(filter_backward_data, filter_dims); - ::tensorflow::testing::StartTiming(); - for (int i = 0; i < iters_; ++i) { + for (auto s : state_) { filter_backward.device(device_) = Eigen::SpatialConvolutionBackwardKernel( input, output_backward, filter_rows, filter_cols); tensorflow::testing::DoNotOptimize(filter_backward); } - ::tensorflow::testing::StopTiming(); device_.deallocate(input_data); device_.deallocate(output_backward_data); @@ -157,7 +152,8 @@ class SpatialConvolutionBenchmarksSuite { } private: - int iters_; + ::testing::benchmark::State& state_; + Device& device_; }; @@ -170,8 +166,9 @@ class CuboidConvolutionBenchmarksSuite { using Dimensions = Eigen::DSizes; - CuboidConvolutionBenchmarksSuite(int iters, Device& device) - : iters_(iters), device_(device) {} + CuboidConvolutionBenchmarksSuite(::testing::benchmark::State& state, + Device& device) + : state_(state), device_(device) {} Eigen::Index BufferSize(const Dimensions& dims) { return dims.TotalSize() * sizeof(Scalar); @@ -198,12 +195,10 @@ class CuboidConvolutionBenchmarksSuite { Filter filter(filter_data, filter_dims); Output output(output_data, output_dims); - ::tensorflow::testing::StartTiming(); - for (int i = 0; i < iters_; ++i) { + for (auto s : state_) { output.device(device_) = Eigen::CuboidConvolution(input, filter); tensorflow::testing::DoNotOptimize(output); } - ::tensorflow::testing::StopTiming(); device_.deallocate(input_data); device_.deallocate(filter_data); @@ -240,13 +235,11 @@ class CuboidConvolutionBenchmarksSuite { OutputBackward output_backward(output_backward_data, output_dims); InputBackward input_backward(input_backward_data, input_dims); - ::tensorflow::testing::StartTiming(); - for (int i = 0; i < iters_; ++i) { + for (auto s : state_) { input_backward.device(device_) = Eigen::CuboidConvolutionBackwardInput( filter, output_backward, input_planes, input_rows, input_cols); tensorflow::testing::DoNotOptimize(input_backward); } - ::tensorflow::testing::StopTiming(); device_.deallocate(filter_data); device_.deallocate(output_backward_data); @@ -283,13 +276,11 @@ class CuboidConvolutionBenchmarksSuite { OutputBackward output_backward(output_backward_data, output_dims); FilterBackward filter_backward(filter_backward_data, filter_dims); - ::tensorflow::testing::StartTiming(); - for (int i = 0; i < iters_; ++i) { + for (auto s : state_) { filter_backward.device(device_) = Eigen::CuboidConvolutionBackwardKernel( input, output_backward, filter_planes, filter_rows, filter_cols); tensorflow::testing::DoNotOptimize(filter_backward); } - ::tensorflow::testing::StopTiming(); device_.deallocate(input_data); device_.deallocate(output_backward_data); @@ -297,7 +288,7 @@ class CuboidConvolutionBenchmarksSuite { } private: - int iters_; + ::testing::benchmark::State& state_; Device& device_; }; diff --git a/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc b/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc index 12fa7f3409d..2abc2e99912 100644 --- a/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc +++ b/tensorflow/core/kernels/eigen_benchmark_cpu_test.cc @@ -27,19 +27,17 @@ limitations under the License. // Spatial Convolutions // // -------------------------------------------------------------------------- // -void SpatialConvolution(int iters, int num_threads, +void SpatialConvolution(::testing::benchmark::State& state, int num_threads, /* Input dimensions: */ int input_batches, int input_height, int input_width, int input_depth, /* Filter (kernel) dimensions: */ int filter_count, int filter_height, int filter_width) { - ::tensorflow::testing::StopTiming(); - CREATE_THREAD_POOL(num_threads); using Benchmark = SpatialConvolutionBenchmarksSuite; - auto benchmark = Benchmark(iters, device); + auto benchmark = Benchmark(state, device); typename Benchmark::Dimensions input_dims(input_batches, input_height, input_width, input_depth); @@ -52,23 +50,22 @@ void SpatialConvolution(int iters, int num_threads, (input_dims.TotalSize() / input_depth) * filter_count; auto flops = num_computed_elements * (input_depth * filter_height * filter_width); - ::tensorflow::testing::ItemsProcessed(flops * iters); + state.SetItemsProcessed(flops * state.iterations()); } -void SpatialConvolutionBackwardInput(int iters, int num_threads, +void SpatialConvolutionBackwardInput(::testing::benchmark::State& state, + int num_threads, /* Input dimensions: */ int input_batches, int input_height, int input_width, int input_depth, /* Filter (kernel) dimensions: */ int filter_count, int filter_height, int filter_width) { - ::tensorflow::testing::StopTiming(); - CREATE_THREAD_POOL(num_threads); using Benchmark = SpatialConvolutionBenchmarksSuite; - auto benchmark = Benchmark(iters, device); + auto benchmark = Benchmark(state, device); typename Benchmark::Dimensions input_dims(input_batches, input_height, input_width, input_depth); @@ -80,23 +77,22 @@ void SpatialConvolutionBackwardInput(int iters, int num_threads, auto num_computed_elements = input_dims.TotalSize(); auto flops = num_computed_elements * (input_depth * filter_height * filter_width); - ::tensorflow::testing::ItemsProcessed(flops * iters); + state.SetItemsProcessed(flops * state.iterations()); } -void SpatialConvolutionBackwardKernel(int iters, int num_threads, +void SpatialConvolutionBackwardKernel(::testing::benchmark::State& state, + int num_threads, /* Input dimensions: */ int input_batches, int input_height, int input_width, int input_depth, /* Filter (kernel) dimensions: */ int filter_count, int filter_height, int filter_width) { - ::tensorflow::testing::StopTiming(); - CREATE_THREAD_POOL(num_threads); using Benchmark = SpatialConvolutionBenchmarksSuite; - auto benchmark = Benchmark(iters, device); + auto benchmark = Benchmark(state, device); typename Benchmark::Dimensions input_dims(input_batches, input_height, input_width, input_depth); @@ -108,7 +104,7 @@ void SpatialConvolutionBackwardKernel(int iters, int num_threads, auto num_computed_elements = filter_dims.TotalSize(); auto flops = num_computed_elements * (input_batches * input_height * input_width); - ::tensorflow::testing::ItemsProcessed(flops * iters); + state.SetItemsProcessed(flops * state.iterations()); } // Macro arguments names: --------------------------------------------------- // @@ -126,26 +122,26 @@ void SpatialConvolutionBackwardKernel(int iters, int num_threads, #define BM_SpatialConvolution(NT, N, H, W, C, FC, FH, FW, LABEL) \ static void BM_SPATIAL_NAME(SpatialConvolution, NT, N, H, W, C, FC, FH, \ - FW)(int iters) { \ - ::tensorflow::testing::SetLabel(LABEL); \ - SpatialConvolution(iters, NT, N, H, W, C, FC, FH, FW); \ + FW)(::testing::benchmark::State & state) { \ + state.SetLabel(LABEL); \ + SpatialConvolution(state, NT, N, H, W, C, FC, FH, FW); \ } \ BENCHMARK(BM_SPATIAL_NAME(SpatialConvolution, NT, N, H, W, C, FC, FH, FW)) #define BM_SpatialConvolutionBwdInput(NT, N, H, W, C, FC, FH, FW, LABEL) \ static void BM_SPATIAL_NAME(SpatialConvolutionBwdInput, NT, N, H, W, C, FC, \ - FH, FW)(int iters) { \ - ::tensorflow::testing::SetLabel(LABEL); \ - SpatialConvolutionBackwardInput(iters, NT, N, H, W, C, FC, FH, FW); \ + FH, FW)(::testing::benchmark::State & state) { \ + state.SetLabel(LABEL); \ + SpatialConvolutionBackwardInput(state, NT, N, H, W, C, FC, FH, FW); \ } \ BENCHMARK( \ BM_SPATIAL_NAME(SpatialConvolutionBwdInput, NT, N, H, W, C, FC, FH, FW)) #define BM_SpatialConvolutionBwdKernel(NT, N, H, W, C, FC, FH, FW, LABEL) \ static void BM_SPATIAL_NAME(SpatialConvolutionBwdKernel, NT, N, H, W, C, FC, \ - FH, FW)(int iters) { \ - ::tensorflow::testing::SetLabel(LABEL); \ - SpatialConvolutionBackwardKernel(iters, NT, N, H, W, C, FC, FH, FW); \ + FH, FW)(::testing::benchmark::State & state) { \ + state.SetLabel(LABEL); \ + SpatialConvolutionBackwardKernel(state, NT, N, H, W, C, FC, FH, FW); \ } \ BENCHMARK(BM_SPATIAL_NAME(SpatialConvolutionBwdKernel, NT, N, H, W, C, FC, \ FH, FW)) @@ -248,20 +244,18 @@ BM_SpatialConvolutionsBwdKernel(32, 7, 7, 192, 384, 3, 3, "conv5_00_3x3"); // Cuboid Convolutions // // -------------------------------------------------------------------------- // -void CuboidConvolution(int iters, int num_threads, +void CuboidConvolution(::testing::benchmark::State& state, int num_threads, /* Input dimensions: */ int input_batches, int input_height, int input_width, int input_planes, int input_depth, /* Filter (kernel) dimensions: */ int filter_count, int filter_height, int filter_width, int filter_planes) { - ::tensorflow::testing::StopTiming(); - CREATE_THREAD_POOL(num_threads); using Benchmark = CuboidConvolutionBenchmarksSuite; - auto benchmark = Benchmark(iters, device); + auto benchmark = Benchmark(state, device); typename Benchmark::Dimensions input_dims( input_batches, input_height, input_width, input_planes, input_depth); @@ -274,10 +268,11 @@ void CuboidConvolution(int iters, int num_threads, (input_dims.TotalSize() / input_depth) * filter_count; auto flops = num_computed_elements * (input_depth * filter_height * filter_width * filter_planes); - ::tensorflow::testing::ItemsProcessed(flops * iters); + state.SetItemsProcessed(flops * state.iterations()); } -void CuboidConvolutionBackwardInput(int iters, int num_threads, +void CuboidConvolutionBackwardInput(::testing::benchmark::State& state, + int num_threads, /* Input dimensions: */ int input_batches, int input_height, int input_width, int input_planes, @@ -285,13 +280,11 @@ void CuboidConvolutionBackwardInput(int iters, int num_threads, /* Filter (kernel) dimensions: */ int filter_count, int filter_height, int filter_width, int filter_planes) { - ::tensorflow::testing::StopTiming(); - CREATE_THREAD_POOL(num_threads); using Benchmark = CuboidConvolutionBenchmarksSuite; - auto benchmark = Benchmark(iters, device); + auto benchmark = Benchmark(state, device); typename Benchmark::Dimensions input_dims( input_batches, input_height, input_width, input_planes, input_depth); @@ -303,10 +296,11 @@ void CuboidConvolutionBackwardInput(int iters, int num_threads, auto num_computed_elements = input_dims.TotalSize(); auto flops = num_computed_elements * (input_depth * filter_height * filter_width * filter_planes); - ::tensorflow::testing::ItemsProcessed(flops * iters); + state.SetItemsProcessed(flops * state.iterations()); } -void CuboidConvolutionBackwardKernel(int iters, int num_threads, +void CuboidConvolutionBackwardKernel(::testing::benchmark::State& state, + int num_threads, /* Input dimensions: */ int input_batches, int input_height, int input_width, int input_planes, @@ -314,13 +308,11 @@ void CuboidConvolutionBackwardKernel(int iters, int num_threads, /* Filter (kernel) dimensions: */ int filter_count, int filter_height, int filter_width, int filter_planes) { - ::tensorflow::testing::StopTiming(); - CREATE_THREAD_POOL(num_threads); using Benchmark = CuboidConvolutionBenchmarksSuite; - auto benchmark = Benchmark(iters, device); + auto benchmark = Benchmark(state, device); typename Benchmark::Dimensions input_dims( input_batches, input_height, input_width, input_planes, input_depth); @@ -332,9 +324,16 @@ void CuboidConvolutionBackwardKernel(int iters, int num_threads, auto num_computed_elements = filter_dims.TotalSize(); auto flops = num_computed_elements * (input_batches * input_height * input_width * input_planes); - ::tensorflow::testing::ItemsProcessed(flops * iters); + state.SetItemsProcessed(flops * state.iterations()); } +// The multiple #'s in the function names + the `::testing::benchmark::State&` +// as parameters apparently confuses clang if they are not on the same line. So +// we need to turn off LINT and clang-format for this block. +// +// clang-format off +// NOLINTBEGIN + // Macro arguments names: --------------------------------------------------- // // NT: num threads // N: batch size @@ -354,33 +353,33 @@ void CuboidConvolutionBackwardKernel(int iters, int num_threads, _f_##FC##_##FH##_##FW##_##FP) #define BM_CuboidConvolution(NT, N, H, W, P, C, FC, FH, FW, FP, LABEL) \ - static void BM_CUBOID_NAME(CuboidConvolution, NT, N, H, W, P, C, FC, FH, FW, \ - FP)(int iters) { \ - ::tensorflow::testing::SetLabel(LABEL); \ - CuboidConvolution(iters, NT, N, H, W, P, C, FC, FH, FW, FP); \ + static void BM_CUBOID_NAME(CuboidConvolution, NT, N, H, W, P, C, FC, FH, FW, FP)(::testing::benchmark::State & state) { \ + state.SetLabel(LABEL); \ + CuboidConvolution(state, NT, N, H, W, P, C, FC, FH, FW, FP); \ } \ BENCHMARK( \ BM_CUBOID_NAME(CuboidConvolution, NT, N, H, W, P, C, FC, FH, FW, FP)) #define BM_CuboidConvolutionBwdInput(NT, N, H, W, P, C, FC, FH, FW, FP, LABEL) \ - static void BM_CUBOID_NAME(CuboidConvolutionBwdInput, NT, N, H, W, P, C, FC, \ - FH, FW, FP)(int iters) { \ - ::tensorflow::testing::SetLabel(LABEL); \ - CuboidConvolutionBackwardInput(iters, NT, N, H, W, P, C, FC, FH, FW, FP); \ + static void BM_CUBOID_NAME(CuboidConvolutionBwdInput, NT, N, H, W, P, C, FC, FH, FW, FP)(::testing::benchmark::State & state) { \ + state.SetLabel(LABEL); \ + CuboidConvolutionBackwardInput(state, NT, N, H, W, P, C, FC, FH, FW, FP); \ } \ BENCHMARK(BM_CUBOID_NAME(CuboidConvolutionBwdInput, NT, N, H, W, P, C, FC, \ FH, FW, FP)) #define BM_CuboidConvolutionBwdKernel(NT, N, H, W, P, C, FC, FH, FW, FP, \ LABEL) \ - static void BM_CUBOID_NAME(CuboidConvolutionBwdKernel, NT, N, H, W, P, C, \ - FC, FH, FW, FP)(int iters) { \ - ::tensorflow::testing::SetLabel(LABEL); \ - CuboidConvolutionBackwardKernel(iters, NT, N, H, W, P, C, FC, FH, FW, FP); \ + static void BM_CUBOID_NAME(CuboidConvolutionBwdKernel, NT, N, H, W, P, C, FC, FH, FW, FP)(::testing::benchmark::State & state) { \ + state.SetLabel(LABEL); \ + CuboidConvolutionBackwardKernel(state, NT, N, H, W, P, C, FC, FH, FW, FP); \ } \ BENCHMARK(BM_CUBOID_NAME(CuboidConvolutionBwdKernel, NT, N, H, W, P, C, FC, \ FH, FW, FP)) +// NOLINTEND +// clang-format on + #define BM_CuboidConvolutions(N, H, W, P, C, FC, FH, FW, FP, LABEL) \ BM_CuboidConvolution(2, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ BM_CuboidConvolution(4, N, H, W, P, C, FC, FH, FW, FP, LABEL); \ diff --git a/tensorflow/core/kernels/fused_batch_norm_op_test.cc b/tensorflow/core/kernels/fused_batch_norm_op_test.cc index 734fb294135..989fbc27b7c 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op_test.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op_test.cc @@ -283,18 +283,23 @@ static Graph* FusedBatchNormGrad(int n, int h, int w, int c, bool is_training, // -------------------------------------------------------------------------- // // FusedBatchNorm inference // -------------------------------------------------------------------------- // +// clang-format off +// NOLINTBEGIN +#define BM_FusedBatchNorm(N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE) \ + static void BM_NAME(FusedBatchNorm, N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE)(::testing::benchmark::State & state) { \ + test::Benchmark( \ + #DEVICE, \ + FusedBatchNormInference(N, H, W, C, IS_TRAINING, FORMAT_##FORMAT), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(state.iterations() * N * H * W * C); \ + } \ + BENCHMARK( \ + BM_NAME(FusedBatchNorm, N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE)) \ + ->UseRealTime(); -#define BM_FusedBatchNorm(N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE) \ - static void BM_NAME(FusedBatchNorm, N, H, W, C, T, IS_TRAINING, FORMAT, \ - DEVICE)(int iters) { \ - testing::UseRealTime(); \ - testing::ItemsProcessed(static_cast(iters) * N * H * W * C); \ - test::Benchmark(#DEVICE, FusedBatchNormInference( \ - N, H, W, C, IS_TRAINING, FORMAT_##FORMAT)) \ - .Run(iters); \ - } \ - BENCHMARK( \ - BM_NAME(FusedBatchNorm, N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE)); +// NOLINTEND +// clang-format on BM_FusedBatchNorm(64, 14, 14, 256, fp32, false, NHWC, cpu); BM_FusedBatchNorm(64, 14, 14, 256, fp16, false, NHWC, cpu); @@ -320,17 +325,19 @@ BM_FusedBatchNorm(64, 14, 14, 256, fp16, true, NCHW, gpu); // FusedBatchNorm gradient // -------------------------------------------------------------------------- // -#define BM_FusedBatchNormGrad(N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE) \ - static void BM_NAME(FusedBatchNormGrad, N, H, W, C, T, IS_TRAINING, FORMAT, \ - DEVICE)(int iters) { \ - testing::UseRealTime(); \ - testing::ItemsProcessed(static_cast(iters) * N * H * W * C); \ - test::Benchmark(#DEVICE, FusedBatchNormGrad(N, H, W, C, IS_TRAINING, \ - FORMAT_##FORMAT)) \ - .Run(iters); \ - } \ - BENCHMARK(BM_NAME(FusedBatchNormGrad, N, H, W, C, T, IS_TRAINING, FORMAT, \ - DEVICE)); +#define BM_FusedBatchNormGrad(N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE) \ + static void BM_NAME(FusedBatchNormGrad, N, H, W, C, T, IS_TRAINING, FORMAT, \ + DEVICE)(::testing::benchmark::State & state) { \ + test::Benchmark( \ + #DEVICE, \ + FusedBatchNormGrad(N, H, W, C, IS_TRAINING, FORMAT_##FORMAT), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(state.iterations() * N * H * W * C); \ + } \ + BENCHMARK( \ + BM_NAME(FusedBatchNormGrad, N, H, W, C, T, IS_TRAINING, FORMAT, DEVICE)) \ + ->UseRealTime(); #define BM_FusedBatchNormGradResnetShapes(T, IS_TRAINING, FORMAT, DEVICE) \ BM_FusedBatchNormGrad(64, 56, 56, 64, T, IS_TRAINING, FORMAT, DEVICE); \ diff --git a/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc index 7c20b88845f..f4b54fb1c6a 100644 --- a/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc @@ -98,14 +98,16 @@ static Graph* BandedTriangularSolve(int64 num_bands, int64 n, int64 m, // BS: boolean indicating whether to use the banded solver // T: C++ type of scalars (e.g. float, std::complex) // TT: TensorFlow type of scalars (e.g. DT_FLOAT, DT_COMPLEX128 -#define BM_BandedTriangularSolveDev(K, N, M, BS, T, TT, D) \ - static void BM_BandedTriangularSolve##_##K##_##N##_##M##_##BS##_##TT( \ - int iters) { \ - testing::UseRealTime(); \ - testing::ItemsProcessed(static_cast(iters) * K * N + N * M); \ - test::Benchmark(#D, BandedTriangularSolve(K, N, M, BS, TT)).Run(iters); \ - } \ - BENCHMARK(BM_BandedTriangularSolve##_##K##_##N##_##M##_##BS##_##TT); +#define BM_BandedTriangularSolveDev(K, N, M, BS, T, TT, D) \ + static void BM_BandedTriangularSolve##_##K##_##N##_##M##_##BS##_##TT( \ + ::testing::benchmark::State& state) { \ + test::Benchmark(#D, BandedTriangularSolve(K, N, M, BS, TT), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(state.iterations() * K * N + N * M); \ + } \ + BENCHMARK(BM_BandedTriangularSolve##_##K##_##N##_##M##_##BS##_##TT) \ + ->UseRealTime(); #define BM_BandedTriangularSolve(K, N, M, BS, D) \ BM_BandedTriangularSolveDev(K, N, M, BS, float, DT_FLOAT, D); \ diff --git a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc index 7bb71ae8b68..e03f29340ae 100644 --- a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc @@ -101,18 +101,18 @@ static Graph* MatrixTriangularSolveWithBroadcast(int64 b0, int64 b1, int64 m, // T: C++ type of scalars (e.g. float, std::complex) // TT: TensorFlow type of scalars (e.g. DT_FLOAT, DT_COMPLEX128 // D: Device (e.g. cpu, gpu) -#define BM_MatrixTriangularSolveDev(B1, B2, M, N, MB, T, TT, D) \ - static void \ - BM_MatrixTriangularSolve##_##B1##_##B2##_##M##_##N##_##MB##_##TT##_##D( \ - int iters) { \ - testing::UseRealTime(); \ - testing::ItemsProcessed(static_cast(iters) * std::max(B1, B2) * M * \ - M * N * 2); \ - test::Benchmark( \ - #D, MatrixTriangularSolveWithBroadcast(B1, B2, M, N, MB, TT)) \ - .Run(iters); \ - } \ - BENCHMARK( \ +#define BM_MatrixTriangularSolveDev(B1, B2, M, N, MB, T, TT, D) \ + static void \ + BM_MatrixTriangularSolve##_##B1##_##B2##_##M##_##N##_##MB##_##TT##_##D( \ + ::testing::benchmark::State& state) { \ + state.SetItemsProcessed(state.iterations() * std::max(B1, B2) * M * M * \ + N * 2); \ + test::Benchmark( \ + #D, MatrixTriangularSolveWithBroadcast(B1, B2, M, N, MB, TT), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + } \ + BENCHMARK( \ BM_MatrixTriangularSolve##_##B1##_##B2##_##M##_##N##_##MB##_##TT##_##D); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc index 92ddf8edbfb..a321f4739eb 100644 --- a/tensorflow/core/kernels/spacetobatch_benchmark_test.cc +++ b/tensorflow/core/kernels/spacetobatch_benchmark_test.cc @@ -56,20 +56,25 @@ static Graph* ConstructSpaceToBatchGraph( // The BM_Expand macro is needed for this to build with VC++. #define BM_Expand(x) x +// Macro is already longer than 80 chars. +// NOLINTBEGIN #define BM_SpaceToBatchDev(OP, DEVICE, DTYPE, B, H, W, D, BS, P00, P01, P10, \ P11) \ static void \ BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11( \ - int iters) { \ - testing::ItemsProcessed(static_cast(iters) * B * (H + P00 + P01) * \ + ::testing::benchmark::State& state) { \ + test::Benchmark( \ + #DEVICE, \ + ConstructSpaceToBatchGraph(#OP, TensorShape({B, H, W, D}), BS, DTYPE, \ + {{P00, P01}, {P10, P11}}), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(state.iterations() * B * (H + P00 + P01) * \ (W + P10 + P11) * D); \ - test::Benchmark(#DEVICE, ConstructSpaceToBatchGraph( \ - #OP, TensorShape({B, H, W, D}), BS, DTYPE, \ - {{P00, P01}, {P10, P11}})) \ - .Run(iters); \ } \ BENCHMARK( \ BM_##OP##_##DEVICE##_##DTYPE##_##B##_##H##_##W##_##D##_bs##BS##_pad##P00##_##P01##_##P10##_##P11); +// NOLINTEND #define BM_SpaceToBatch(OP, ...) \ BM_Expand(BM_SpaceToBatchDev(OP, cpu, DT_FLOAT, __VA_ARGS__)); \ BM_Expand(BM_SpaceToBatchDev(OP, gpu, DT_FLOAT, __VA_ARGS__)); \ diff --git a/tensorflow/core/kernels/sparse_matmul_op_test.cc b/tensorflow/core/kernels/sparse_matmul_op_test.cc index 1dc51cd804c..a0f07d4c4de 100644 --- a/tensorflow/core/kernels/sparse_matmul_op_test.cc +++ b/tensorflow/core/kernels/sparse_matmul_op_test.cc @@ -107,36 +107,30 @@ static Graph* ReplicatedSparseMatMul(int m, int n, int d, float sparsity_1, #define BM_SPARSE(M, K, N, S1, S2, TRA, TRB, TA, TB) \ static void \ BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB( \ - int iters) { \ - testing::StopTiming(); \ - testing::ItemsProcessed(static_cast(iters) * M * K * N * 2); \ + ::testing::benchmark::State& state) { \ auto label = strings::Printf("tr_a: %d tr_b: %d sp_a: %0.2f sp_b: %0.2f", \ TRA, TRB, S1 / 100.0, S2 / 100.0); \ - testing::SetLabel(label); \ - testing::UseRealTime(); \ + state.SetLabel(label); \ auto g = SparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, TRA, TRB); \ - testing::StartTiming(); \ - test::Benchmark("cpu", g).Run(iters); \ + test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \ } \ BENCHMARK( \ - BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB); + BM_Sparse##_##M##_##K##_##N##_##S1##_##S2##_##TRA##_##TRB##_##TA##_##TB) \ + ->UseRealTime(); #define BM_SPARSE_REPLICATED(M, K, N, S1, S2, Copies) \ static void BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \ - int iters) { \ - testing::StopTiming(); \ - testing::ItemsProcessed(static_cast(iters) * M * K * N * Copies * \ - 2); \ + ::testing::benchmark::State& state) { \ auto label = strings::Printf("copies: %d sp_a: %0.2f sp_b: %0.2f", \ (Copies), S1 / 100.0, S2 / 100.0); \ - testing::SetLabel(label); \ - testing::UseRealTime(); \ + state.SetLabel(label); \ auto g = \ ReplicatedSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, (Copies)); \ - testing::StartTiming(); \ - test::Benchmark("cpu", g).Run(iters); \ + test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \ + state.SetItemsProcessed(state.iterations() * M * K * N * Copies * 2); \ } \ - BENCHMARK(BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies); + BENCHMARK(BM_Sparse_replicated##_##M##_##K##_##N##_##S1##_##S2##_##Copies) \ + ->UseRealTime(); #define BM_SPARSE_FLOAT(M, K, N, S1, S2, TRA, TRB) \ BM_SPARSE(M, K, N, S1, S2, TRA, TRB, float, float) @@ -219,22 +213,21 @@ static Graph* MultiSparseMatMul(int m, int n, int d, float sparsity_1, return g; } -#define BM_SPARSE_MULTI(M, K, N, S1, S2, Copies) \ - static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies( \ - int iters) { \ - testing::StopTiming(); \ - testing::ItemsProcessed(static_cast(iters) * M * K * N * 2 * 2 * \ - Copies); \ - auto label = strings::Printf("%d_%d_%d_%d_%0.2f_%0.2f", M, K, N, Copies, \ - S1 / 100.0, S2 / 100.0); \ - testing::SetLabel(label); \ - testing::UseRealTime(); \ - auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, Copies); \ - testing::StartTiming(); \ - test::Benchmark("cpu", g).Run(iters); \ - } \ - BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies); - +// clang-format off +// NOLINTBEGIN +#define BM_SPARSE_MULTI(M, K, N, S1, S2, Copies) \ + static void BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies(::testing::benchmark::State& state) { \ + auto label = strings::Printf("%d_%d_%d_%d_%0.2f_%0.2f", M, K, N, Copies, \ + S1 / 100.0, S2 / 100.0); \ + state.SetLabel(label); \ + auto g = MultiSparseMatMul(M, N, K, S1 / 100.0, S2 / 100.0, Copies); \ + test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \ + state.SetItemsProcessed(state.iterations() * M * K * N * 2 * 2 * Copies); \ + } \ + BENCHMARK(BM_Sparse_Multi##_##M##_##K##_##N##_##S1##_##S2##_##Copies) \ + ->UseRealTime(); +// NOLINTEND +// clang-format on BM_SPARSE_MULTI(1024, 2140, 4096, 0, 82, 1); BM_SPARSE_MULTI(1024, 4096, 2048, 83, 83, 1); BM_SPARSE_MULTI(400, 800, 2560, 85, 85, 1); diff --git a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_test.cc b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_test.cc index 249ddbe8e63..b06f72d42e0 100644 --- a/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_test.cc +++ b/tensorflow/core/kernels/sparse_tensor_dense_matmul_op_test.cc @@ -68,19 +68,22 @@ static Graph* SparseTensorDenseMatmul(int nnz, int m, int k, int n, return g; } +// NOLINTBEGIN #define BM_SparseTensorDenseMatmulDev(NNZ, M, K, N, TA, TB, DEVICE) \ static void \ BM_SparseTensorDenseMatmul##_##NNZ##_##M##_##K##_##N##_##TA##_##TB##_##DEVICE( \ - int iters) { \ + ::testing::benchmark::State& state) { \ int64 items_per_iter = (static_cast(NNZ) * (TB ? K : N)); \ - testing::ItemsProcessed(static_cast(iters) * items_per_iter); \ - testing::BytesProcessed(static_cast(iters) * items_per_iter * \ + test::Benchmark(#DEVICE, SparseTensorDenseMatmul(NNZ, M, K, N, TA, TB), \ + /*old_benchmark_api*/ false) \ + .Run(state); \ + state.SetItemsProcessed(state.iterations() * items_per_iter); \ + state.SetBytesProcessed(state.iterations() * items_per_iter * \ sizeof(float)); \ - test::Benchmark(#DEVICE, SparseTensorDenseMatmul(NNZ, M, K, N, TA, TB)) \ - .Run(iters); \ } \ BENCHMARK( \ BM_SparseTensorDenseMatmul##_##NNZ##_##M##_##K##_##N##_##TA##_##TB##_##DEVICE); +// NOLINTEND #define BM_SparseTensorDenseMatmul(NNZ, M, K, N, TA, TB) \ BM_SparseTensorDenseMatmulDev(NNZ, M, K, N, TA, TB, cpu); \