From 19a41461e24112a9fde929afdc5be36fd9febf2a Mon Sep 17 00:00:00 2001
From: Chao Mei <chaomei@google.com>
Date: Mon, 6 Apr 2020 21:50:47 -0700
Subject: [PATCH] 1. Record the BenchmarkParam of each run in the
 multi-performance-option benchmark tool.

2. Also record if a particular performance option could result in a failure.

PiperOrigin-RevId: 305182635
Change-Id: I2305c6b030c6f9c2eec257353956d22ab51135b2
---
 tensorflow/lite/tools/benchmark/BUILD         |  2 +
 .../lite/tools/benchmark/benchmark_model.h    | 12 ++--
 .../lite/tools/benchmark/benchmark_params.cc  | 11 ++++
 .../lite/tools/benchmark/benchmark_params.h   | 10 ++++
 .../benchmark_performance_options.cc          | 60 ++++++++-----------
 .../benchmark/benchmark_performance_options.h | 49 +++++++++++----
 .../lite/tools/benchmark/benchmark_test.cc    | 27 ++++++++-
 7 files changed, 120 insertions(+), 51 deletions(-)

diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD
index d10c1acb95d..a979a8a55ef 100644
--- a/tensorflow/lite/tools/benchmark/BUILD
+++ b/tensorflow/lite/tools/benchmark/BUILD
@@ -112,6 +112,7 @@ cc_test(
         "//tensorflow/lite/testing:util",
         "//tensorflow/lite/tools:command_line_flags",
         "@com_google_absl//absl/algorithm",
+        "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_googletest//:gtest",
     ],
@@ -180,6 +181,7 @@ cc_library(
         ":benchmark_params",
         ":benchmark_utils",
         ":logging",
+        "@com_google_absl//absl/memory",
         "//tensorflow/core/util:stats_calculator_portable",
         "//tensorflow/lite/c:common",
         "//tensorflow/lite/nnapi:nnapi_util",
diff --git a/tensorflow/lite/tools/benchmark/benchmark_model.h b/tensorflow/lite/tools/benchmark/benchmark_model.h
index 8a207a6fd45..0aca42dc200 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_model.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_model.h
@@ -40,6 +40,7 @@ enum RunType {
 
 class BenchmarkResults {
  public:
+  BenchmarkResults() {}
   BenchmarkResults(double model_size_mb, int64_t startup_latency_us,
                    uint64_t input_bytes,
                    tensorflow::Stat<int64_t> warmup_time_us,
@@ -75,9 +76,9 @@ class BenchmarkResults {
   }
 
  private:
-  double model_size_mb_;
-  int64_t startup_latency_us_;
-  uint64_t input_bytes_;
+  double model_size_mb_ = 0.0;
+  int64_t startup_latency_us_ = 0;
+  uint64_t input_bytes_ = 0;
   tensorflow::Stat<int64_t> warmup_time_us_;
   tensorflow::Stat<int64_t> inference_time_us_;
   profiling::memory::MemoryUsage init_mem_usage_;
@@ -142,7 +143,7 @@ class BenchmarkListeners : public BenchmarkListener {
     }
   }
 
-  ~BenchmarkListeners() {}
+  ~BenchmarkListeners() override {}
 
  private:
   // Use vector so listeners are invoked in the order they are added.
@@ -171,7 +172,8 @@ class BenchmarkModel {
  public:
   static BenchmarkParams DefaultParams();
   BenchmarkModel();
-  BenchmarkModel(BenchmarkParams params) : params_(std::move(params)) {}
+  explicit BenchmarkModel(BenchmarkParams params)
+      : params_(std::move(params)) {}
   virtual ~BenchmarkModel() {}
   virtual TfLiteStatus Init() = 0;
   TfLiteStatus Run(int argc, char** argv);
diff --git a/tensorflow/lite/tools/benchmark/benchmark_params.cc b/tensorflow/lite/tools/benchmark/benchmark_params.cc
index caff9714d47..1dd6a8d519a 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_params.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_params.cc
@@ -61,5 +61,16 @@ void BenchmarkParams::Set(const BenchmarkParams& other) {
   }
 }
 
+void BenchmarkParams::Merge(const BenchmarkParams& other, bool overwrite) {
+  for (const auto& one : other.params_) {
+    auto it = params_.find(one.first);
+    if (it == params_.end()) {
+      AddParam(one.first, one.second->Clone());
+    } else if (overwrite) {
+      it->second->Set(*one.second);
+    }
+  }
+}
+
 }  // namespace benchmark
 }  // namespace tflite
diff --git a/tensorflow/lite/tools/benchmark/benchmark_params.h b/tensorflow/lite/tools/benchmark/benchmark_params.h
index 1be66dd3ca2..1b3dabf3f7b 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_params.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_params.h
@@ -59,6 +59,8 @@ class BenchmarkParam {
 
   virtual void Set(const BenchmarkParam&) {}
 
+  virtual std::unique_ptr<BenchmarkParam> Clone() const = 0;
+
  private:
   static void AssertHasSameType(ParamType a, ParamType b);
 
@@ -79,6 +81,10 @@ class TypedBenchmarkParam : public BenchmarkParam {
     Set(other.AsConstTyped<T>()->Get());
   }
 
+  std::unique_ptr<BenchmarkParam> Clone() const override {
+    return std::unique_ptr<BenchmarkParam>(new TypedBenchmarkParam<T>(value_));
+  }
+
  private:
   T value_;
 };
@@ -117,6 +123,10 @@ class BenchmarkParams {
   // Set the value of all same parameters from 'other'.
   void Set(const BenchmarkParams& other);
 
+  // Merge the value of all parameters from 'other'. 'overwrite' indicates
+  // whether the value of the same paratmeter is overwrite or not.
+  void Merge(const BenchmarkParams& other, bool overwrite = false);
+
  private:
   void AssertParamExists(const std::string& name) const;
   std::unordered_map<std::string, std::unique_ptr<BenchmarkParam>> params_;
diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
index 46620ae3372..08d07100b44 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
@@ -41,36 +41,32 @@ limitations under the License.
 namespace tflite {
 namespace benchmark {
 
-void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) {
-  current_run_name_.clear();
-
+std::string MultiRunStatsRecorder::PerfOptionName(
+    const BenchmarkParams& params) const {
 #if defined(__ANDROID__)
   if (params.Get<bool>("use_nnapi")) {
     const std::string accelerator =
         params.Get<std::string>("nnapi_accelerator_name");
-    current_run_name_ = accelerator.empty() ? "nnapi(w/o accel name)"
-                                            : "nnapi(" + accelerator + ")";
-    return;
+    return accelerator.empty() ? "nnapi(w/o accel name)"
+                               : "nnapi(" + accelerator + ")";
   }
 #endif
 
   if (params.Get<bool>("use_gpu")) {
 #if defined(__ANDROID__)
     if (params.Get<bool>("gpu_precision_loss_allowed")) {
-      current_run_name_ = "gpu-fp16";
+      return "gpu-fp16";
     } else {
-      current_run_name_ = "gpu-default";
+      return "gpu-default";
     }
 #else
-    current_run_name_ = "gpu-default";
+    return "gpu-default";
 #endif
-    return;
   }
 
 #if defined(TFLITE_ENABLE_HEXAGON)
   if (params.Get<bool>("use_hexagon")) {
-    current_run_name_ = "dsp w/ hexagon";
-    return;
+    return "dsp w/ hexagon";
   }
 #endif
 
@@ -85,37 +81,37 @@ void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) {
     sstm << " (xnnpack)";
   }
 
-  current_run_name_ = sstm.str();
-}
-
-void MultiRunStatsRecorder::OnBenchmarkEnd(const BenchmarkResults& results) {
-  each_run_stats_.emplace_back(std::make_pair(current_run_name_, results));
+  return sstm.str();
 }
 
 void MultiRunStatsRecorder::OutputStats() {
   // Make a 80-character-long header.
   TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different "
                       "Performance Options==============";
-  std::sort(each_run_stats_.begin(), each_run_stats_.end(),
-            EachRunStatsEntryComparator());
+  std::sort(results_.begin(), results_.end(), EachRunStatsEntryComparator());
 
-  for (const auto& run_stats : each_run_stats_) {
+  for (const auto& run_stats : results_) {
+    const auto perf_option_name = PerfOptionName(*run_stats.params);
     std::stringstream stream;
-    // Output the name of this run first.
-    stream << std::setw(26) << run_stats.first << ": ";
-    run_stats.second.inference_time_us().OutputToStream(&stream);
-    // NOTE: As of 2019/11/07, the memory usage is collected in an
-    // OS-process-wide way and this program performs multiple runs in a single
-    // OS process, therefore, the memory usage information of each run becomes
-    // incorrect, hence no output here.
+    stream << std::setw(26) << perf_option_name << ": ";
+    if (!run_stats.completed) {
+      stream << " failed!";
+    } else {
+      run_stats.metrics.inference_time_us().OutputToStream(&stream);
+      // NOTE: As of 2019/11/07, the memory usage is collected in an
+      // OS-process-wide way and this program performs multiple runs in a single
+      // OS process, therefore, the memory usage information of each run becomes
+      // incorrect, hence no output here.
+    }
     TFLITE_LOG(INFO) << stream.str();
   }
 }
 
 BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
-    BenchmarkModel* single_option_run)
+    BenchmarkModel* single_option_run,
+    std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
     : BenchmarkPerformanceOptions(DefaultParams(), single_option_run,
-                                  DefaultRunStatsRecorder()) {}
+                                  std::move(all_run_stats)) {}
 
 BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
     BenchmarkParams params, BenchmarkModel* single_option_run,
@@ -138,11 +134,6 @@ BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() {
   return params;
 }
 
-std::unique_ptr<MultiRunStatsRecorder>
-BenchmarkPerformanceOptions::DefaultRunStatsRecorder() {
-  return std::unique_ptr<MultiRunStatsRecorder>(new MultiRunStatsRecorder());
-}
-
 std::vector<Flag> BenchmarkPerformanceOptions::GetFlags() {
   return {
       CreateFlag<std::string>(
@@ -360,6 +351,7 @@ void BenchmarkPerformanceOptions::Run() {
     // created ones.
     single_option_run_->RemoveListeners(num_external_listners);
 
+    all_run_stats_->MarkBenchmarkStart(*single_option_run_params_);
     single_option_run_->Run();
   }
 
diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.h b/tensorflow/lite/tools/benchmark/benchmark_performance_options.h
index b7ce59d994f..d9ab71f8b74 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.h
@@ -17,34 +17,59 @@ limitations under the License.
 #define TENSORFLOW_LITE_TOOLS_BENCHMARK_BENCHMARK_PERFORMANCE_OPTIONS_H_
 
 #include <memory>
+#include <string>
 #include <vector>
 
+#include "absl/memory/memory.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
+#include "tensorflow/lite/tools/benchmark/benchmark_params.h"
 
 namespace tflite {
 namespace benchmark {
 
 class MultiRunStatsRecorder : public BenchmarkListener {
  public:
-  void OnBenchmarkStart(const BenchmarkParams& params) override;
-  void OnBenchmarkEnd(const BenchmarkResults& results) override;
+  // BenchmarkListener::OnBenchmarkStart is invoked after each run's
+  // BenchmarkModel::Init. However, some run could fail during Init, e.g.
+  // delegate fails to be created etc. To still record such run, we will call
+  // the following function right before a run starts.
+  void MarkBenchmarkStart(const BenchmarkParams& params) {
+    results_.emplace_back(EachRunResult());
+    auto& current = results_.back();
+    current.completed = false;
+    current.params = absl::make_unique<BenchmarkParams>();
+    current.params->Merge(params, true /* overwrite*/);
+  }
+
+  void OnBenchmarkEnd(const BenchmarkResults& results) final {
+    auto& current = results_.back();
+    current.completed = true;
+    current.metrics = results;
+  }
 
   virtual void OutputStats();
 
  protected:
-  using EachRunStatsEntry = std::pair<std::string, BenchmarkResults>;
+  struct EachRunResult {
+    bool completed = false;
+    std::unique_ptr<BenchmarkParams> params;
+    BenchmarkResults metrics;
+  };
+  std::vector<EachRunResult> results_;
 
   // Use this to order the runs by the average inference time in increasing
-  // order (i.e. the fastest run ranks first.)
+  // order (i.e. the fastest run ranks first.). If the run didn't complete,
+  // we consider it to be slowest.
   struct EachRunStatsEntryComparator {
-    bool operator()(const EachRunStatsEntry& i, const EachRunStatsEntry& j) {
-      return (i.second.inference_time_us().avg() <
-              j.second.inference_time_us().avg());
+    bool operator()(const EachRunResult& i, const EachRunResult& j) {
+      if (!i.completed) return false;
+      if (!j.completed) return true;
+      return i.metrics.inference_time_us().avg() <
+             j.metrics.inference_time_us().avg();
     }
   };
 
-  std::string current_run_name_;
-  std::vector<EachRunStatsEntry> each_run_stats_;
+  virtual std::string PerfOptionName(const BenchmarkParams& params) const;
 };
 
 // Benchmarks all performance options on a model by repeatedly invoking the
@@ -52,7 +77,10 @@ class MultiRunStatsRecorder : public BenchmarkListener {
 class BenchmarkPerformanceOptions {
  public:
   // Doesn't own the memory of 'single_option_run'.
-  explicit BenchmarkPerformanceOptions(BenchmarkModel* single_option_run);
+  explicit BenchmarkPerformanceOptions(
+      BenchmarkModel* single_option_run,
+      std::unique_ptr<MultiRunStatsRecorder> all_run_stats =
+          absl::make_unique<MultiRunStatsRecorder>());
 
   virtual ~BenchmarkPerformanceOptions() {}
 
@@ -62,7 +90,6 @@ class BenchmarkPerformanceOptions {
 
  protected:
   static BenchmarkParams DefaultParams();
-  static std::unique_ptr<MultiRunStatsRecorder> DefaultRunStatsRecorder();
 
   BenchmarkPerformanceOptions(
       BenchmarkParams params, BenchmarkModel* single_option_run,
diff --git a/tensorflow/lite/tools/benchmark/benchmark_test.cc b/tensorflow/lite/tools/benchmark/benchmark_test.cc
index 38f8905fcc4..da4082926a2 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_test.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include "absl/algorithm/algorithm.h"
+#include "absl/memory/memory.h"
 #include "absl/strings/str_format.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/string_util.h"
@@ -185,11 +186,35 @@ TEST(BenchmarkTest, DoesntCrashStringModel) {
   benchmark.Run();
 }
 
+class TestMultiRunStatsRecorder : public MultiRunStatsRecorder {
+ public:
+  void OutputStats() override {
+    MultiRunStatsRecorder::OutputStats();
+
+    // Check results have been sorted according to avg. latency in increasing
+    // order, and the incomplete runs are at the back of the results.
+    double pre_avg_latency = -1e6;
+    bool has_incomplete = false;  // ensure complete/incomplete are not mixed.
+    for (const auto& result : results_) {
+      const auto current_avg_latency = result.metrics.inference_time_us().avg();
+      if (result.completed) {
+        EXPECT_GE(current_avg_latency, pre_avg_latency);
+        EXPECT_FALSE(has_incomplete);
+      } else {
+        EXPECT_EQ(0, result.metrics.inference_time_us().count());
+        has_incomplete = true;
+      }
+      pre_avg_latency = current_avg_latency;
+    }
+  }
+};
+
 TEST(BenchmarkTest, DoesntCrashMultiPerfOptions) {
   ASSERT_THAT(g_fp32_model_path, testing::NotNull());
 
   TestBenchmark benchmark(CreateFp32Params());
-  BenchmarkPerformanceOptions all_options_benchmark(&benchmark);
+  BenchmarkPerformanceOptions all_options_benchmark(
+      &benchmark, absl::make_unique<TestMultiRunStatsRecorder>());
   all_options_benchmark.Run();
 }