From 0505299448691c1ff51f91a62f118963ca1f790f Mon Sep 17 00:00:00 2001
From: Chao Mei <chaomei@google.com>
Date: Tue, 30 Jul 2019 19:19:09 -0700
Subject: [PATCH] Output a summary of all runs of different performance
 options.

PiperOrigin-RevId: 260844911
---
 tensorflow/lite/tools/benchmark/BUILD         |  1 +
 .../benchmark_performance_options.cc          | 72 +++++++++++++++++++
 .../benchmark/benchmark_performance_options.h | 47 +++++++++---
 3 files changed, 110 insertions(+), 10 deletions(-)
diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD
index 0090705d44a..081b0fc87af 100644
--- a/tensorflow/lite/tools/benchmark/BUILD
+++ b/tensorflow/lite/tools/benchmark/BUILD
@@ -130,6 +130,7 @@ cc_library(
         ":benchmark_model_lib",
         ":benchmark_utils",
         ":logging",
+        "//tensorflow/core:stats_calculator_portable",
         "//tensorflow/lite/profiling:time",
         "//tensorflow/lite/tools:command_line_flags",
     ],
diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
index 3573671bbae..12aabbb28d1 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
@@ -16,7 +16,12 @@ limitations under the License.
 #include "tensorflow/lite/tools/benchmark/benchmark_performance_options.h"
 
 #include <algorithm>
+#include <iomanip>
+#include <memory>
+#include <sstream>
+#include <utility>
 
+#include "tensorflow/core/util/stats_calculator.h"
 #include "tensorflow/lite/profiling/time.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
 #include "tensorflow/lite/tools/benchmark/logging.h"
@@ -25,6 +30,62 @@ limitations under the License.
 namespace tflite {
 namespace benchmark {
 
+void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) {
+  current_run_name_.clear();
+
+  if (params.Get<bool>("use_nnapi")) {
+    current_run_name_ = "nnapi";
+    return;
+  }
+
+  if (params.Get<bool>("use_gpu")) {
+    current_run_name_ = "gpu";
+    return;
+  }
+
+  // Handle cases run on CPU
+  // Note: could use std::to_string to convert an integer to string but it
+  // requires C++11.
+  std::stringstream sstm;
+  sstm << "cpu w/ " << params.Get<int32_t>("num_threads") << " threads";
+  current_run_name_ = sstm.str();
+}
+
+void MultiRunStatsRecorder::OnBenchmarkEnd(const BenchmarkResults& results) {
+  each_run_stats_.emplace_back(std::make_pair(current_run_name_, results));
+}
+
+void MultiRunStatsRecorder::OutputStats() {
+  // Make a 80-character-long header.
+  TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different "
+                      "Performance Options==============";
+  std::sort(each_run_stats_.begin(), each_run_stats_.end(),
+            EachRunStatsEntryComparator());
+
+  for (const auto& run_stats : each_run_stats_) {
+    std::stringstream stream;
+    // Output the name of this run first.
+    stream << std::setw(26) << run_stats.first << ": ";
+    run_stats.second.inference_time_us().OutputToStream(&stream);
+    TFLITE_LOG(INFO) << stream.str();
+  }
+}
+
+BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
+    BenchmarkModel* single_option_run)
+    : BenchmarkPerformanceOptions(DefaultParams(), single_option_run,
+                                  DefaultRunStatsRecorder()) {}
+
+BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
+    BenchmarkParams params, BenchmarkModel* single_option_run,
+    std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
+    : params_(std::move(params)),
+      single_option_run_(single_option_run),
+      single_option_run_params_(single_option_run->mutable_params()),
+      all_run_stats_(std::move(all_run_stats)) {
+  single_option_run_->AddListener(all_run_stats_.get());
+}
+
 BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() {
   BenchmarkParams params;
   params.AddParam("perf_options_list",
@@ -34,6 +95,11 @@ BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() {
   return params;
 }
 
+std::unique_ptr<MultiRunStatsRecorder>
+BenchmarkPerformanceOptions::DefaultRunStatsRecorder() {
+  return std::unique_ptr<MultiRunStatsRecorder>(new MultiRunStatsRecorder());
+}
+
 std::vector<Flag> BenchmarkPerformanceOptions::GetFlags() {
   return {
       CreateFlag<std::string>(
@@ -154,6 +220,12 @@ void BenchmarkPerformanceOptions::Run(int argc, char** argv) {
     return;
   }
 
+  Run();
+
+  all_run_stats_->OutputStats();
+}
+
+void BenchmarkPerformanceOptions::Run() {
   TFLITE_LOG(INFO) << "The list of TFLite runtime options to be benchmarked: ["
                    << params_.Get<std::string>("perf_options_list") << "]";
 
diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.h b/tensorflow/lite/tools/benchmark/benchmark_performance_options.h
index 38bcd9fa168..bf5262a19e2 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.h
@@ -16,31 +16,55 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_TOOLS_BENCHMARK_BENCHMARK_PERFORMANCE_OPTIONS_H_
 #define TENSORFLOW_LITE_TOOLS_BENCHMARK_BENCHMARK_PERFORMANCE_OPTIONS_H_
 
+#include <memory>
+#include <vector>
+
 #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
 
 namespace tflite {
 namespace benchmark {
 
+class MultiRunStatsRecorder : public BenchmarkListener {
+ public:
+  void OnBenchmarkStart(const BenchmarkParams& params) override;
+  void OnBenchmarkEnd(const BenchmarkResults& results) override;
+
+  virtual void OutputStats();
+
+ protected:
+  using EachRunStatsEntry = std::pair<std::string, BenchmarkResults>;
+
+  // Use this to order the runs by the average inference time in increasing
+  // order (i.e. the fastest run ranks first.)
+  struct EachRunStatsEntryComparator {
+    bool operator()(const EachRunStatsEntry& i, const EachRunStatsEntry& j) {
+      return (i.second.inference_time_us().avg() <
+              j.second.inference_time_us().avg());
+    }
+  };
+
+  std::string current_run_name_;
+  std::vector<EachRunStatsEntry> each_run_stats_;
+};
+
 // Benchmarks all performance options on a model by repeatedly invoking the
 // single-performance-option run on a passed-in 'BenchmarkModel' object.
 class BenchmarkPerformanceOptions {
  public:
   // Doesn't own the memory of 'single_option_run'.
-  explicit BenchmarkPerformanceOptions(BenchmarkModel* single_option_run)
-      : BenchmarkPerformanceOptions(DefaultParams(), single_option_run) {}
-
-  BenchmarkPerformanceOptions(BenchmarkParams params,
-                              BenchmarkModel* single_option_run)
-      : params_(std::move(params)),
-        single_option_run_(single_option_run),
-        single_option_run_params_(single_option_run->mutable_params()) {}
+  explicit BenchmarkPerformanceOptions(BenchmarkModel* single_option_run);
 
   virtual ~BenchmarkPerformanceOptions() {}
 
-  virtual void Run(int argc, char** argv);
+  void Run(int argc, char** argv);
 
  protected:
   static BenchmarkParams DefaultParams();
+  static std::unique_ptr<MultiRunStatsRecorder> DefaultRunStatsRecorder();
+
+  BenchmarkPerformanceOptions(
+      BenchmarkParams params, BenchmarkModel* single_option_run,
+      std::unique_ptr<MultiRunStatsRecorder> all_run_stats);
 
   // Unparsable flags will remain in 'argv' in the original order and 'argc'
   // will be updated accordingly.
@@ -50,8 +74,9 @@ class BenchmarkPerformanceOptions {
   bool ParsePerfOptions();
   virtual std::vector<std::string> GetValidPerfOptions() const;
   bool HasOption(const std::string& option) const;
-  virtual void ResetPerformanceOptions();
 
+  virtual void Run();
+  virtual void ResetPerformanceOptions();
   virtual void BenchmarkCPUOptions();
   virtual void BenchmarkGPUOptions();
   virtual void BenchmarkNnapiOptions();
@@ -62,6 +87,8 @@ class BenchmarkPerformanceOptions {
   // The object that drives a single-performance-option run.
   BenchmarkModel* const single_option_run_;          // Doesn't own the memory.
   BenchmarkParams* const single_option_run_params_;  // Doesn't own the memory.
+
+  std::unique_ptr<MultiRunStatsRecorder> all_run_stats_;
 };
 
 }  // namespace benchmark