1. Record the BenchmarkParam of each run in the multi-performance-option benchmark tool.
2. Also record if a particular performance option could result in a failure. PiperOrigin-RevId: 305182635 Change-Id: I2305c6b030c6f9c2eec257353956d22ab51135b2
This commit is contained in:
parent
418d82803a
commit
19a41461e2
@ -112,6 +112,7 @@ cc_test(
|
|||||||
"//tensorflow/lite/testing:util",
|
"//tensorflow/lite/testing:util",
|
||||||
"//tensorflow/lite/tools:command_line_flags",
|
"//tensorflow/lite/tools:command_line_flags",
|
||||||
"@com_google_absl//absl/algorithm",
|
"@com_google_absl//absl/algorithm",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
"@com_google_absl//absl/strings:str_format",
|
"@com_google_absl//absl/strings:str_format",
|
||||||
"@com_google_googletest//:gtest",
|
"@com_google_googletest//:gtest",
|
||||||
],
|
],
|
||||||
@ -180,6 +181,7 @@ cc_library(
|
|||||||
":benchmark_params",
|
":benchmark_params",
|
||||||
":benchmark_utils",
|
":benchmark_utils",
|
||||||
":logging",
|
":logging",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
"//tensorflow/core/util:stats_calculator_portable",
|
"//tensorflow/core/util:stats_calculator_portable",
|
||||||
"//tensorflow/lite/c:common",
|
"//tensorflow/lite/c:common",
|
||||||
"//tensorflow/lite/nnapi:nnapi_util",
|
"//tensorflow/lite/nnapi:nnapi_util",
|
||||||
|
@ -40,6 +40,7 @@ enum RunType {
|
|||||||
|
|
||||||
class BenchmarkResults {
|
class BenchmarkResults {
|
||||||
public:
|
public:
|
||||||
|
BenchmarkResults() {}
|
||||||
BenchmarkResults(double model_size_mb, int64_t startup_latency_us,
|
BenchmarkResults(double model_size_mb, int64_t startup_latency_us,
|
||||||
uint64_t input_bytes,
|
uint64_t input_bytes,
|
||||||
tensorflow::Stat<int64_t> warmup_time_us,
|
tensorflow::Stat<int64_t> warmup_time_us,
|
||||||
@ -75,9 +76,9 @@ class BenchmarkResults {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double model_size_mb_;
|
double model_size_mb_ = 0.0;
|
||||||
int64_t startup_latency_us_;
|
int64_t startup_latency_us_ = 0;
|
||||||
uint64_t input_bytes_;
|
uint64_t input_bytes_ = 0;
|
||||||
tensorflow::Stat<int64_t> warmup_time_us_;
|
tensorflow::Stat<int64_t> warmup_time_us_;
|
||||||
tensorflow::Stat<int64_t> inference_time_us_;
|
tensorflow::Stat<int64_t> inference_time_us_;
|
||||||
profiling::memory::MemoryUsage init_mem_usage_;
|
profiling::memory::MemoryUsage init_mem_usage_;
|
||||||
@ -142,7 +143,7 @@ class BenchmarkListeners : public BenchmarkListener {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
~BenchmarkListeners() {}
|
~BenchmarkListeners() override {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Use vector so listeners are invoked in the order they are added.
|
// Use vector so listeners are invoked in the order they are added.
|
||||||
@ -171,7 +172,8 @@ class BenchmarkModel {
|
|||||||
public:
|
public:
|
||||||
static BenchmarkParams DefaultParams();
|
static BenchmarkParams DefaultParams();
|
||||||
BenchmarkModel();
|
BenchmarkModel();
|
||||||
BenchmarkModel(BenchmarkParams params) : params_(std::move(params)) {}
|
explicit BenchmarkModel(BenchmarkParams params)
|
||||||
|
: params_(std::move(params)) {}
|
||||||
virtual ~BenchmarkModel() {}
|
virtual ~BenchmarkModel() {}
|
||||||
virtual TfLiteStatus Init() = 0;
|
virtual TfLiteStatus Init() = 0;
|
||||||
TfLiteStatus Run(int argc, char** argv);
|
TfLiteStatus Run(int argc, char** argv);
|
||||||
|
@ -61,5 +61,16 @@ void BenchmarkParams::Set(const BenchmarkParams& other) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BenchmarkParams::Merge(const BenchmarkParams& other, bool overwrite) {
|
||||||
|
for (const auto& one : other.params_) {
|
||||||
|
auto it = params_.find(one.first);
|
||||||
|
if (it == params_.end()) {
|
||||||
|
AddParam(one.first, one.second->Clone());
|
||||||
|
} else if (overwrite) {
|
||||||
|
it->second->Set(*one.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace benchmark
|
} // namespace benchmark
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -59,6 +59,8 @@ class BenchmarkParam {
|
|||||||
|
|
||||||
virtual void Set(const BenchmarkParam&) {}
|
virtual void Set(const BenchmarkParam&) {}
|
||||||
|
|
||||||
|
virtual std::unique_ptr<BenchmarkParam> Clone() const = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void AssertHasSameType(ParamType a, ParamType b);
|
static void AssertHasSameType(ParamType a, ParamType b);
|
||||||
|
|
||||||
@ -79,6 +81,10 @@ class TypedBenchmarkParam : public BenchmarkParam {
|
|||||||
Set(other.AsConstTyped<T>()->Get());
|
Set(other.AsConstTyped<T>()->Get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<BenchmarkParam> Clone() const override {
|
||||||
|
return std::unique_ptr<BenchmarkParam>(new TypedBenchmarkParam<T>(value_));
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
T value_;
|
T value_;
|
||||||
};
|
};
|
||||||
@ -117,6 +123,10 @@ class BenchmarkParams {
|
|||||||
// Set the value of all same parameters from 'other'.
|
// Set the value of all same parameters from 'other'.
|
||||||
void Set(const BenchmarkParams& other);
|
void Set(const BenchmarkParams& other);
|
||||||
|
|
||||||
|
// Merge the value of all parameters from 'other'. 'overwrite' indicates
|
||||||
|
// whether the value of the same paratmeter is overwrite or not.
|
||||||
|
void Merge(const BenchmarkParams& other, bool overwrite = false);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void AssertParamExists(const std::string& name) const;
|
void AssertParamExists(const std::string& name) const;
|
||||||
std::unordered_map<std::string, std::unique_ptr<BenchmarkParam>> params_;
|
std::unordered_map<std::string, std::unique_ptr<BenchmarkParam>> params_;
|
||||||
|
@ -41,36 +41,32 @@ limitations under the License.
|
|||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) {
|
std::string MultiRunStatsRecorder::PerfOptionName(
|
||||||
current_run_name_.clear();
|
const BenchmarkParams& params) const {
|
||||||
|
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
if (params.Get<bool>("use_nnapi")) {
|
if (params.Get<bool>("use_nnapi")) {
|
||||||
const std::string accelerator =
|
const std::string accelerator =
|
||||||
params.Get<std::string>("nnapi_accelerator_name");
|
params.Get<std::string>("nnapi_accelerator_name");
|
||||||
current_run_name_ = accelerator.empty() ? "nnapi(w/o accel name)"
|
return accelerator.empty() ? "nnapi(w/o accel name)"
|
||||||
: "nnapi(" + accelerator + ")";
|
: "nnapi(" + accelerator + ")";
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (params.Get<bool>("use_gpu")) {
|
if (params.Get<bool>("use_gpu")) {
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
if (params.Get<bool>("gpu_precision_loss_allowed")) {
|
if (params.Get<bool>("gpu_precision_loss_allowed")) {
|
||||||
current_run_name_ = "gpu-fp16";
|
return "gpu-fp16";
|
||||||
} else {
|
} else {
|
||||||
current_run_name_ = "gpu-default";
|
return "gpu-default";
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
current_run_name_ = "gpu-default";
|
return "gpu-default";
|
||||||
#endif
|
#endif
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(TFLITE_ENABLE_HEXAGON)
|
#if defined(TFLITE_ENABLE_HEXAGON)
|
||||||
if (params.Get<bool>("use_hexagon")) {
|
if (params.Get<bool>("use_hexagon")) {
|
||||||
current_run_name_ = "dsp w/ hexagon";
|
return "dsp w/ hexagon";
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -85,37 +81,37 @@ void MultiRunStatsRecorder::OnBenchmarkStart(const BenchmarkParams& params) {
|
|||||||
sstm << " (xnnpack)";
|
sstm << " (xnnpack)";
|
||||||
}
|
}
|
||||||
|
|
||||||
current_run_name_ = sstm.str();
|
return sstm.str();
|
||||||
}
|
|
||||||
|
|
||||||
void MultiRunStatsRecorder::OnBenchmarkEnd(const BenchmarkResults& results) {
|
|
||||||
each_run_stats_.emplace_back(std::make_pair(current_run_name_, results));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MultiRunStatsRecorder::OutputStats() {
|
void MultiRunStatsRecorder::OutputStats() {
|
||||||
// Make a 80-character-long header.
|
// Make a 80-character-long header.
|
||||||
TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different "
|
TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different "
|
||||||
"Performance Options==============";
|
"Performance Options==============";
|
||||||
std::sort(each_run_stats_.begin(), each_run_stats_.end(),
|
std::sort(results_.begin(), results_.end(), EachRunStatsEntryComparator());
|
||||||
EachRunStatsEntryComparator());
|
|
||||||
|
|
||||||
for (const auto& run_stats : each_run_stats_) {
|
for (const auto& run_stats : results_) {
|
||||||
|
const auto perf_option_name = PerfOptionName(*run_stats.params);
|
||||||
std::stringstream stream;
|
std::stringstream stream;
|
||||||
// Output the name of this run first.
|
stream << std::setw(26) << perf_option_name << ": ";
|
||||||
stream << std::setw(26) << run_stats.first << ": ";
|
if (!run_stats.completed) {
|
||||||
run_stats.second.inference_time_us().OutputToStream(&stream);
|
stream << " failed!";
|
||||||
|
} else {
|
||||||
|
run_stats.metrics.inference_time_us().OutputToStream(&stream);
|
||||||
// NOTE: As of 2019/11/07, the memory usage is collected in an
|
// NOTE: As of 2019/11/07, the memory usage is collected in an
|
||||||
// OS-process-wide way and this program performs multiple runs in a single
|
// OS-process-wide way and this program performs multiple runs in a single
|
||||||
// OS process, therefore, the memory usage information of each run becomes
|
// OS process, therefore, the memory usage information of each run becomes
|
||||||
// incorrect, hence no output here.
|
// incorrect, hence no output here.
|
||||||
|
}
|
||||||
TFLITE_LOG(INFO) << stream.str();
|
TFLITE_LOG(INFO) << stream.str();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
|
BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
|
||||||
BenchmarkModel* single_option_run)
|
BenchmarkModel* single_option_run,
|
||||||
|
std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
|
||||||
: BenchmarkPerformanceOptions(DefaultParams(), single_option_run,
|
: BenchmarkPerformanceOptions(DefaultParams(), single_option_run,
|
||||||
DefaultRunStatsRecorder()) {}
|
std::move(all_run_stats)) {}
|
||||||
|
|
||||||
BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
|
BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
|
||||||
BenchmarkParams params, BenchmarkModel* single_option_run,
|
BenchmarkParams params, BenchmarkModel* single_option_run,
|
||||||
@ -138,11 +134,6 @@ BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() {
|
|||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<MultiRunStatsRecorder>
|
|
||||||
BenchmarkPerformanceOptions::DefaultRunStatsRecorder() {
|
|
||||||
return std::unique_ptr<MultiRunStatsRecorder>(new MultiRunStatsRecorder());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<Flag> BenchmarkPerformanceOptions::GetFlags() {
|
std::vector<Flag> BenchmarkPerformanceOptions::GetFlags() {
|
||||||
return {
|
return {
|
||||||
CreateFlag<std::string>(
|
CreateFlag<std::string>(
|
||||||
@ -360,6 +351,7 @@ void BenchmarkPerformanceOptions::Run() {
|
|||||||
// created ones.
|
// created ones.
|
||||||
single_option_run_->RemoveListeners(num_external_listners);
|
single_option_run_->RemoveListeners(num_external_listners);
|
||||||
|
|
||||||
|
all_run_stats_->MarkBenchmarkStart(*single_option_run_params_);
|
||||||
single_option_run_->Run();
|
single_option_run_->Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,34 +17,59 @@ limitations under the License.
|
|||||||
#define TENSORFLOW_LITE_TOOLS_BENCHMARK_BENCHMARK_PERFORMANCE_OPTIONS_H_
|
#define TENSORFLOW_LITE_TOOLS_BENCHMARK_BENCHMARK_PERFORMANCE_OPTIONS_H_
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
#include "tensorflow/lite/tools/benchmark/benchmark_model.h"
|
#include "tensorflow/lite/tools/benchmark/benchmark_model.h"
|
||||||
|
#include "tensorflow/lite/tools/benchmark/benchmark_params.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
class MultiRunStatsRecorder : public BenchmarkListener {
|
class MultiRunStatsRecorder : public BenchmarkListener {
|
||||||
public:
|
public:
|
||||||
void OnBenchmarkStart(const BenchmarkParams& params) override;
|
// BenchmarkListener::OnBenchmarkStart is invoked after each run's
|
||||||
void OnBenchmarkEnd(const BenchmarkResults& results) override;
|
// BenchmarkModel::Init. However, some run could fail during Init, e.g.
|
||||||
|
// delegate fails to be created etc. To still record such run, we will call
|
||||||
|
// the following function right before a run starts.
|
||||||
|
void MarkBenchmarkStart(const BenchmarkParams& params) {
|
||||||
|
results_.emplace_back(EachRunResult());
|
||||||
|
auto& current = results_.back();
|
||||||
|
current.completed = false;
|
||||||
|
current.params = absl::make_unique<BenchmarkParams>();
|
||||||
|
current.params->Merge(params, true /* overwrite*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
void OnBenchmarkEnd(const BenchmarkResults& results) final {
|
||||||
|
auto& current = results_.back();
|
||||||
|
current.completed = true;
|
||||||
|
current.metrics = results;
|
||||||
|
}
|
||||||
|
|
||||||
virtual void OutputStats();
|
virtual void OutputStats();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
using EachRunStatsEntry = std::pair<std::string, BenchmarkResults>;
|
struct EachRunResult {
|
||||||
|
bool completed = false;
|
||||||
|
std::unique_ptr<BenchmarkParams> params;
|
||||||
|
BenchmarkResults metrics;
|
||||||
|
};
|
||||||
|
std::vector<EachRunResult> results_;
|
||||||
|
|
||||||
// Use this to order the runs by the average inference time in increasing
|
// Use this to order the runs by the average inference time in increasing
|
||||||
// order (i.e. the fastest run ranks first.)
|
// order (i.e. the fastest run ranks first.). If the run didn't complete,
|
||||||
|
// we consider it to be slowest.
|
||||||
struct EachRunStatsEntryComparator {
|
struct EachRunStatsEntryComparator {
|
||||||
bool operator()(const EachRunStatsEntry& i, const EachRunStatsEntry& j) {
|
bool operator()(const EachRunResult& i, const EachRunResult& j) {
|
||||||
return (i.second.inference_time_us().avg() <
|
if (!i.completed) return false;
|
||||||
j.second.inference_time_us().avg());
|
if (!j.completed) return true;
|
||||||
|
return i.metrics.inference_time_us().avg() <
|
||||||
|
j.metrics.inference_time_us().avg();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string current_run_name_;
|
virtual std::string PerfOptionName(const BenchmarkParams& params) const;
|
||||||
std::vector<EachRunStatsEntry> each_run_stats_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Benchmarks all performance options on a model by repeatedly invoking the
|
// Benchmarks all performance options on a model by repeatedly invoking the
|
||||||
@ -52,7 +77,10 @@ class MultiRunStatsRecorder : public BenchmarkListener {
|
|||||||
class BenchmarkPerformanceOptions {
|
class BenchmarkPerformanceOptions {
|
||||||
public:
|
public:
|
||||||
// Doesn't own the memory of 'single_option_run'.
|
// Doesn't own the memory of 'single_option_run'.
|
||||||
explicit BenchmarkPerformanceOptions(BenchmarkModel* single_option_run);
|
explicit BenchmarkPerformanceOptions(
|
||||||
|
BenchmarkModel* single_option_run,
|
||||||
|
std::unique_ptr<MultiRunStatsRecorder> all_run_stats =
|
||||||
|
absl::make_unique<MultiRunStatsRecorder>());
|
||||||
|
|
||||||
virtual ~BenchmarkPerformanceOptions() {}
|
virtual ~BenchmarkPerformanceOptions() {}
|
||||||
|
|
||||||
@ -62,7 +90,6 @@ class BenchmarkPerformanceOptions {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
static BenchmarkParams DefaultParams();
|
static BenchmarkParams DefaultParams();
|
||||||
static std::unique_ptr<MultiRunStatsRecorder> DefaultRunStatsRecorder();
|
|
||||||
|
|
||||||
BenchmarkPerformanceOptions(
|
BenchmarkPerformanceOptions(
|
||||||
BenchmarkParams params, BenchmarkModel* single_option_run,
|
BenchmarkParams params, BenchmarkModel* single_option_run,
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
#include <gmock/gmock.h>
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include "absl/algorithm/algorithm.h"
|
#include "absl/algorithm/algorithm.h"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
#include "absl/strings/str_format.h"
|
#include "absl/strings/str_format.h"
|
||||||
#include "tensorflow/lite/interpreter.h"
|
#include "tensorflow/lite/interpreter.h"
|
||||||
#include "tensorflow/lite/string_util.h"
|
#include "tensorflow/lite/string_util.h"
|
||||||
@ -185,11 +186,35 @@ TEST(BenchmarkTest, DoesntCrashStringModel) {
|
|||||||
benchmark.Run();
|
benchmark.Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class TestMultiRunStatsRecorder : public MultiRunStatsRecorder {
|
||||||
|
public:
|
||||||
|
void OutputStats() override {
|
||||||
|
MultiRunStatsRecorder::OutputStats();
|
||||||
|
|
||||||
|
// Check results have been sorted according to avg. latency in increasing
|
||||||
|
// order, and the incomplete runs are at the back of the results.
|
||||||
|
double pre_avg_latency = -1e6;
|
||||||
|
bool has_incomplete = false; // ensure complete/incomplete are not mixed.
|
||||||
|
for (const auto& result : results_) {
|
||||||
|
const auto current_avg_latency = result.metrics.inference_time_us().avg();
|
||||||
|
if (result.completed) {
|
||||||
|
EXPECT_GE(current_avg_latency, pre_avg_latency);
|
||||||
|
EXPECT_FALSE(has_incomplete);
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(0, result.metrics.inference_time_us().count());
|
||||||
|
has_incomplete = true;
|
||||||
|
}
|
||||||
|
pre_avg_latency = current_avg_latency;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
TEST(BenchmarkTest, DoesntCrashMultiPerfOptions) {
|
TEST(BenchmarkTest, DoesntCrashMultiPerfOptions) {
|
||||||
ASSERT_THAT(g_fp32_model_path, testing::NotNull());
|
ASSERT_THAT(g_fp32_model_path, testing::NotNull());
|
||||||
|
|
||||||
TestBenchmark benchmark(CreateFp32Params());
|
TestBenchmark benchmark(CreateFp32Params());
|
||||||
BenchmarkPerformanceOptions all_options_benchmark(&benchmark);
|
BenchmarkPerformanceOptions all_options_benchmark(
|
||||||
|
&benchmark, absl::make_unique<TestMultiRunStatsRecorder>());
|
||||||
all_options_benchmark.Run();
|
all_options_benchmark.Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user