From 19a41461e24112a9fde929afdc5be36fd9febf2a Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Mon, 6 Apr 2020 21:50:47 -0700 Subject: [PATCH] 1. Record the BenchmarkParam of each run in the multi-performance-option benchmark tool. 2. Also record if a particular performance option could result in a failure. PiperOrigin-RevId: 305182635 Change-Id: I2305c6b030c6f9c2eec257353956d22ab51135b2 --- tensorflow/lite/tools/benchmark/BUILD | 2 + .../lite/tools/benchmark/benchmark_model.h | 12 ++-- .../lite/tools/benchmark/benchmark_params.cc | 11 ++++ .../lite/tools/benchmark/benchmark_params.h | 10 ++++ .../benchmark_performance_options.cc | 60 ++++++++----------- .../benchmark/benchmark_performance_options.h | 49 +++++++++++---- .../lite/tools/benchmark/benchmark_test.cc | 27 ++++++++- 7 files changed, 120 insertions(+), 51 deletions(-) diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD index d10c1acb95d..a979a8a55ef 100644 --- a/tensorflow/lite/tools/benchmark/BUILD +++ b/tensorflow/lite/tools/benchmark/BUILD @@ -112,6 +112,7 @@ cc_test( "//tensorflow/lite/testing:util", "//tensorflow/lite/tools:command_line_flags", "@com_google_absl//absl/algorithm", + "@com_google_absl//absl/memory", "@com_google_absl//absl/strings:str_format", "@com_google_googletest//:gtest", ], @@ -180,6 +181,7 @@ cc_library( ":benchmark_params", ":benchmark_utils", ":logging", + "@com_google_absl//absl/memory", "//tensorflow/core/util:stats_calculator_portable", "//tensorflow/lite/c:common", "//tensorflow/lite/nnapi:nnapi_util", diff --git a/tensorflow/lite/tools/benchmark/benchmark_model.h b/tensorflow/lite/tools/benchmark/benchmark_model.h index 8a207a6fd45..0aca42dc200 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_model.h +++ b/tensorflow/lite/tools/benchmark/benchmark_model.h @@ -40,6 +40,7 @@ enum RunType { class BenchmarkResults { public: + BenchmarkResults() {} BenchmarkResults(double model_size_mb, int64_t startup_latency_us, uint64_t input_bytes, tensorflow::Stat warmup_time_us, @@ -75,9 +76,9 @@ class BenchmarkResults { } private: - double model_size_mb_; - int64_t startup_latency_us_; - uint64_t input_bytes_; + double model_size_mb_ = 0.0; + int64_t startup_latency_us_ = 0; + uint64_t input_bytes_ = 0; tensorflow::Stat warmup_time_us_; tensorflow::Stat inference_time_us_; profiling::memory::MemoryUsage init_mem_usage_; @@ -142,7 +143,7 @@ class BenchmarkListeners : public BenchmarkListener { } } - ~BenchmarkListeners() {} + ~BenchmarkListeners() override {} private: // Use vector so listeners are invoked in the order they are added. @@ -171,7 +172,8 @@ class BenchmarkModel { public: static BenchmarkParams DefaultParams(); BenchmarkModel(); - BenchmarkModel(BenchmarkParams params) : params_(std::move(params)) {} + explicit BenchmarkModel(BenchmarkParams params) + : params_(std::move(params)) {} virtual ~BenchmarkModel() {} virtual TfLiteStatus Init() = 0; TfLiteStatus Run(int argc, char** argv); diff --git a/tensorflow/lite/tools/benchmark/benchmark_params.cc b/tensorflow/lite/tools/benchmark/benchmark_params.cc index caff9714d47..1dd6a8d519a 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_params.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_params.cc @@ -61,5 +61,16 @@ void BenchmarkParams::Set(const BenchmarkParams& other) { } } +void BenchmarkParams::Merge(const BenchmarkParams& other, bool overwrite) { + for (const auto& one : other.params_) { + auto it = params_.find(one.first); + if (it == params_.end()) { + AddParam(one.first, one.second->Clone()); + } else if (overwrite) { + it->second->Set(*one.second); + } + } +} + } // namespace benchmark } // namespace tflite diff --git a/tensorflow/lite/tools/benchmark/benchmark_params.h b/tensorflow/lite/tools/benchmark/benchmark_params.h index 1be66dd3ca2..1b3dabf3f7b 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_params.h +++ b/tensorflow/lite/tools/benchmark/benchmark_params.h @@ -59,6 +59,8 @@ class BenchmarkParam { virtual void Set(const BenchmarkParam&) {} + virtual std::unique_ptr Clone() const = 0; + private: static void AssertHasSameType(ParamType a, ParamType b); @@ -79,6 +81,10 @@ class TypedBenchmarkParam : public BenchmarkParam { Set(other.AsConstTyped()->Get()); } + std::unique_ptr Clone() const override { + return std::unique_ptr(new TypedBenchmarkParam(value_)); + } + private: T value_; }; @@ -117,6 +123,10 @@ class BenchmarkParams { // Set the value of all same parameters from 'other'. void Set(const BenchmarkParams& other); + // Merge the value of all parameters from 'other'. 'overwrite' indicates + // whether the value of the same paratmeter is overwrite or not. + void Merge(const BenchmarkParams& other, bool overwrite = false); + private: void AssertParamExists(const std::string& name) const; std::unordered_map> params_; diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc index 46620ae3372..08d07100b44 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc @@ -41,36 +41,32 @@ limitations under the License. namespace tflite { namespace benchmark { -void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) { - current_run_name_.clear(); - +std::string MultiRunStatsRecorder::PerfOptionName( + const BenchmarkParams& params) const { #if defined(__ANDROID__) if (params.Get("use_nnapi")) { const std::string accelerator = params.Get("nnapi_accelerator_name"); - current_run_name_ = accelerator.empty() ? "nnapi(w/o accel name)" - : "nnapi(" + accelerator + ")"; - return; + return accelerator.empty() ? "nnapi(w/o accel name)" + : "nnapi(" + accelerator + ")"; } #endif if (params.Get("use_gpu")) { #if defined(__ANDROID__) if (params.Get("gpu_precision_loss_allowed")) { - current_run_name_ = "gpu-fp16"; + return "gpu-fp16"; } else { - current_run_name_ = "gpu-default"; + return "gpu-default"; } #else - current_run_name_ = "gpu-default"; + return "gpu-default"; #endif - return; } #if defined(TFLITE_ENABLE_HEXAGON) if (params.Get("use_hexagon")) { - current_run_name_ = "dsp w/ hexagon"; - return; + return "dsp w/ hexagon"; } #endif @@ -85,37 +81,37 @@ void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) { sstm << " (xnnpack)"; } - current_run_name_ = sstm.str(); -} - -void MultiRunStatsRecorder::OnBenchmarkEnd(const BenchmarkResults& results) { - each_run_stats_.emplace_back(std::make_pair(current_run_name_, results)); + return sstm.str(); } void MultiRunStatsRecorder::OutputStats() { // Make a 80-character-long header. TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different " "Performance Options=============="; - std::sort(each_run_stats_.begin(), each_run_stats_.end(), - EachRunStatsEntryComparator()); + std::sort(results_.begin(), results_.end(), EachRunStatsEntryComparator()); - for (const auto& run_stats : each_run_stats_) { + for (const auto& run_stats : results_) { + const auto perf_option_name = PerfOptionName(*run_stats.params); std::stringstream stream; - // Output the name of this run first. - stream << std::setw(26) << run_stats.first << ": "; - run_stats.second.inference_time_us().OutputToStream(&stream); - // NOTE: As of 2019/11/07, the memory usage is collected in an - // OS-process-wide way and this program performs multiple runs in a single - // OS process, therefore, the memory usage information of each run becomes - // incorrect, hence no output here. + stream << std::setw(26) << perf_option_name << ": "; + if (!run_stats.completed) { + stream << " failed!"; + } else { + run_stats.metrics.inference_time_us().OutputToStream(&stream); + // NOTE: As of 2019/11/07, the memory usage is collected in an + // OS-process-wide way and this program performs multiple runs in a single + // OS process, therefore, the memory usage information of each run becomes + // incorrect, hence no output here. + } TFLITE_LOG(INFO) << stream.str(); } } BenchmarkPerformanceOptions::BenchmarkPerformanceOptions( - BenchmarkModel* single_option_run) + BenchmarkModel* single_option_run, + std::unique_ptr all_run_stats) : BenchmarkPerformanceOptions(DefaultParams(), single_option_run, - DefaultRunStatsRecorder()) {} + std::move(all_run_stats)) {} BenchmarkPerformanceOptions::BenchmarkPerformanceOptions( BenchmarkParams params, BenchmarkModel* single_option_run, @@ -138,11 +134,6 @@ BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() { return params; } -std::unique_ptr -BenchmarkPerformanceOptions::DefaultRunStatsRecorder() { - return std::unique_ptr(new MultiRunStatsRecorder()); -} - std::vector BenchmarkPerformanceOptions::GetFlags() { return { CreateFlag( @@ -360,6 +351,7 @@ void BenchmarkPerformanceOptions::Run() { // created ones. single_option_run_->RemoveListeners(num_external_listners); + all_run_stats_->MarkBenchmarkStart(*single_option_run_params_); single_option_run_->Run(); } diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.h b/tensorflow/lite/tools/benchmark/benchmark_performance_options.h index b7ce59d994f..d9ab71f8b74 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.h +++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.h @@ -17,34 +17,59 @@ limitations under the License. #define TENSORFLOW_LITE_TOOLS_BENCHMARK_BENCHMARK_PERFORMANCE_OPTIONS_H_ #include +#include #include +#include "absl/memory/memory.h" #include "tensorflow/lite/tools/benchmark/benchmark_model.h" +#include "tensorflow/lite/tools/benchmark/benchmark_params.h" namespace tflite { namespace benchmark { class MultiRunStatsRecorder : public BenchmarkListener { public: - void OnBenchmarkStart(const BenchmarkParams& params) override; - void OnBenchmarkEnd(const BenchmarkResults& results) override; + // BenchmarkListener::OnBenchmarkStart is invoked after each run's + // BenchmarkModel::Init. However, some run could fail during Init, e.g. + // delegate fails to be created etc. To still record such run, we will call + // the following function right before a run starts. + void MarkBenchmarkStart(const BenchmarkParams& params) { + results_.emplace_back(EachRunResult()); + auto& current = results_.back(); + current.completed = false; + current.params = absl::make_unique(); + current.params->Merge(params, true /* overwrite*/); + } + + void OnBenchmarkEnd(const BenchmarkResults& results) final { + auto& current = results_.back(); + current.completed = true; + current.metrics = results; + } virtual void OutputStats(); protected: - using EachRunStatsEntry = std::pair; + struct EachRunResult { + bool completed = false; + std::unique_ptr params; + BenchmarkResults metrics; + }; + std::vector results_; // Use this to order the runs by the average inference time in increasing - // order (i.e. the fastest run ranks first.) + // order (i.e. the fastest run ranks first.). If the run didn't complete, + // we consider it to be slowest. struct EachRunStatsEntryComparator { - bool operator()(const EachRunStatsEntry& i, const EachRunStatsEntry& j) { - return (i.second.inference_time_us().avg() < - j.second.inference_time_us().avg()); + bool operator()(const EachRunResult& i, const EachRunResult& j) { + if (!i.completed) return false; + if (!j.completed) return true; + return i.metrics.inference_time_us().avg() < + j.metrics.inference_time_us().avg(); } }; - std::string current_run_name_; - std::vector each_run_stats_; + virtual std::string PerfOptionName(const BenchmarkParams& params) const; }; // Benchmarks all performance options on a model by repeatedly invoking the @@ -52,7 +77,10 @@ class MultiRunStatsRecorder : public BenchmarkListener { class BenchmarkPerformanceOptions { public: // Doesn't own the memory of 'single_option_run'. - explicit BenchmarkPerformanceOptions(BenchmarkModel* single_option_run); + explicit BenchmarkPerformanceOptions( + BenchmarkModel* single_option_run, + std::unique_ptr all_run_stats = + absl::make_unique()); virtual ~BenchmarkPerformanceOptions() {} @@ -62,7 +90,6 @@ class BenchmarkPerformanceOptions { protected: static BenchmarkParams DefaultParams(); - static std::unique_ptr DefaultRunStatsRecorder(); BenchmarkPerformanceOptions( BenchmarkParams params, BenchmarkModel* single_option_run, diff --git a/tensorflow/lite/tools/benchmark/benchmark_test.cc b/tensorflow/lite/tools/benchmark/benchmark_test.cc index 38f8905fcc4..da4082926a2 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_test.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include #include "absl/algorithm/algorithm.h" +#include "absl/memory/memory.h" #include "absl/strings/str_format.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/string_util.h" @@ -185,11 +186,35 @@ TEST(BenchmarkTest, DoesntCrashStringModel) { benchmark.Run(); } +class TestMultiRunStatsRecorder : public MultiRunStatsRecorder { + public: + void OutputStats() override { + MultiRunStatsRecorder::OutputStats(); + + // Check results have been sorted according to avg. latency in increasing + // order, and the incomplete runs are at the back of the results. + double pre_avg_latency = -1e6; + bool has_incomplete = false; // ensure complete/incomplete are not mixed. + for (const auto& result : results_) { + const auto current_avg_latency = result.metrics.inference_time_us().avg(); + if (result.completed) { + EXPECT_GE(current_avg_latency, pre_avg_latency); + EXPECT_FALSE(has_incomplete); + } else { + EXPECT_EQ(0, result.metrics.inference_time_us().count()); + has_incomplete = true; + } + pre_avg_latency = current_avg_latency; + } + } +}; + TEST(BenchmarkTest, DoesntCrashMultiPerfOptions) { ASSERT_THAT(g_fp32_model_path, testing::NotNull()); TestBenchmark benchmark(CreateFp32Params()); - BenchmarkPerformanceOptions all_options_benchmark(&benchmark); + BenchmarkPerformanceOptions all_options_benchmark( + &benchmark, absl::make_unique()); all_options_benchmark.Run(); }