Add a --enable_op_profiling flag for TFLite's benchmark_model
Use of `--copt=-DTFLITE_PROFILING_ENABLED` is no longer required to enable per-op profiling when running TFLite's benchmark_model utility. Simply use `--enable_op_profiling=true` to get this information. PiperOrigin-RevId: 247236386
This commit is contained in:
parent
e84d1e517d
commit
bba56b4444
@ -89,9 +89,9 @@ cc_library(
|
|||||||
":logging",
|
":logging",
|
||||||
"//tensorflow/lite:framework",
|
"//tensorflow/lite:framework",
|
||||||
"//tensorflow/lite:string_util",
|
"//tensorflow/lite:string_util",
|
||||||
"//tensorflow/lite/delegates/nnapi:nnapi_delegate",
|
|
||||||
"//tensorflow/lite/kernels:builtin_ops",
|
"//tensorflow/lite/kernels:builtin_ops",
|
||||||
"//tensorflow/lite/profiling:profile_summarizer",
|
"//tensorflow/lite/profiling:profile_summarizer",
|
||||||
|
"//tensorflow/lite/profiling:profiler",
|
||||||
"//tensorflow/lite/tools/evaluation:utils",
|
"//tensorflow/lite/tools/evaluation:utils",
|
||||||
"@gemmlowp",
|
"@gemmlowp",
|
||||||
],
|
],
|
||||||
|
@ -45,6 +45,8 @@ and the following optional parameters:
|
|||||||
* `use_gpu`: `bool` (default=false) \
|
* `use_gpu`: `bool` (default=false) \
|
||||||
Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
|
Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
|
||||||
This option is currently only available on Android devices.
|
This option is currently only available on Android devices.
|
||||||
|
* `enable_op_profiling`: `bool` (default=false) \
|
||||||
|
Whether to enable per-operator profiling measurement.
|
||||||
|
|
||||||
## To build/install/run
|
## To build/install/run
|
||||||
|
|
||||||
@ -129,19 +131,18 @@ where `f0` is the affinity mask for big cores on Pixel 2.
|
|||||||
Note: The affinity mask varies with the device.
|
Note: The affinity mask varies with the device.
|
||||||
|
|
||||||
## Profiling model operators
|
## Profiling model operators
|
||||||
The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
|
The benchmark model binary also allows you to profile operators and give
|
||||||
compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
|
execution times of each operator. To do this, pass the flag
|
||||||
to compile benchmark with profiling support.
|
`--enable_op_profiling=true` to `benchmark_model` during invocation, e.g.,
|
||||||
For example, to compile with profiling support on Android, add this flag to the previous command:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
bazel build -c opt \
|
adb shell taskset f0 /data/local/tmp/benchmark_model \
|
||||||
--config=android_arm \
|
--graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
|
||||||
--cxxopt='--std=c++11' \
|
--enable_op_profiling=true
|
||||||
--copt=-DTFLITE_PROFILING_ENABLED \
|
|
||||||
tensorflow/lite/tools/benchmark:benchmark_model
|
|
||||||
```
|
```
|
||||||
This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
|
|
||||||
|
When enabled, the `benchmark_model` binary will produce detailed statistics for
|
||||||
|
each operation similar to those shown below:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -48,6 +48,7 @@ BenchmarkParams CreateParams() {
|
|||||||
params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
|
params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
|
||||||
params.AddParam("use_legacy_nnapi", BenchmarkParam::Create<bool>(false));
|
params.AddParam("use_legacy_nnapi", BenchmarkParam::Create<bool>(false));
|
||||||
params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
|
params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
|
||||||
|
params.AddParam("enable_op_profiling", BenchmarkParam::Create<bool>(false));
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,6 +26,8 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/register.h"
|
#include "tensorflow/lite/kernels/register.h"
|
||||||
#include "tensorflow/lite/model.h"
|
#include "tensorflow/lite/model.h"
|
||||||
#include "tensorflow/lite/op_resolver.h"
|
#include "tensorflow/lite/op_resolver.h"
|
||||||
|
#include "tensorflow/lite/profiling/buffered_profiler.h"
|
||||||
|
#include "tensorflow/lite/profiling/profile_summarizer.h"
|
||||||
#include "tensorflow/lite/string_util.h"
|
#include "tensorflow/lite/string_util.h"
|
||||||
#include "tensorflow/lite/tools/benchmark/logging.h"
|
#include "tensorflow/lite/tools/benchmark/logging.h"
|
||||||
#include "tensorflow/lite/tools/evaluation/utils.h"
|
#include "tensorflow/lite/tools/evaluation/utils.h"
|
||||||
@ -40,12 +42,44 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
|
|||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
namespace {
|
||||||
|
|
||||||
void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
|
// Backward compat with previous approach to enabling op profiling.
|
||||||
|
#if defined(TFLITE_PROFILING_ENABLED)
|
||||||
|
constexpr int kOpProfilingEnabledDefault = true;
|
||||||
|
#else
|
||||||
|
constexpr int kOpProfilingEnabledDefault = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Dumps profiling events if profiling is enabled.
|
||||||
|
class ProfilingListener : public BenchmarkListener {
|
||||||
|
public:
|
||||||
|
explicit ProfilingListener(Interpreter* interpreter)
|
||||||
|
: interpreter_(interpreter), has_profiles_(false) {
|
||||||
TFLITE_BENCHMARK_CHECK(interpreter);
|
TFLITE_BENCHMARK_CHECK(interpreter);
|
||||||
interpreter_ = interpreter;
|
|
||||||
interpreter_->SetProfiler(&profiler_);
|
interpreter_->SetProfiler(&profiler_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OnSingleRunStart(RunType run_type) override;
|
||||||
|
|
||||||
|
void OnSingleRunEnd() override;
|
||||||
|
|
||||||
|
void OnBenchmarkEnd(const BenchmarkResults& results) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Interpreter* interpreter_;
|
||||||
|
profiling::BufferedProfiler profiler_;
|
||||||
|
profiling::ProfileSummarizer summarizer_;
|
||||||
|
bool has_profiles_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
|
||||||
|
class GemmlowpProfilingListener : public BenchmarkListener {
|
||||||
|
public:
|
||||||
|
void OnBenchmarkStart(const BenchmarkParams& params) override;
|
||||||
|
|
||||||
|
void OnBenchmarkEnd(const BenchmarkResults& results) override;
|
||||||
|
};
|
||||||
|
|
||||||
void ProfilingListener::OnSingleRunStart(RunType run_type) {
|
void ProfilingListener::OnSingleRunStart(RunType run_type) {
|
||||||
if (run_type == REGULAR) {
|
if (run_type == REGULAR) {
|
||||||
@ -82,8 +116,6 @@ void GemmlowpProfilingListener::OnBenchmarkEnd(
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
std::vector<std::string> Split(const std::string& str, const char delim) {
|
std::vector<std::string> Split(const std::string& str, const char delim) {
|
||||||
std::istringstream input(str);
|
std::istringstream input(str);
|
||||||
std::vector<std::string> results;
|
std::vector<std::string> results;
|
||||||
@ -201,6 +233,9 @@ BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
|
|||||||
BenchmarkParam::Create<bool>(false));
|
BenchmarkParam::Create<bool>(false));
|
||||||
default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
|
default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
|
||||||
default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
|
default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
|
||||||
|
default_params.AddParam(
|
||||||
|
"enable_op_profiling",
|
||||||
|
BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
|
||||||
return default_params;
|
return default_params;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,8 +244,6 @@ BenchmarkTfLiteModel::BenchmarkTfLiteModel()
|
|||||||
|
|
||||||
BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
|
BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
|
||||||
: BenchmarkModel(std::move(params)) {
|
: BenchmarkModel(std::move(params)) {
|
||||||
AddListener(&profiling_listener_);
|
|
||||||
AddListener(&gemmlowp_profiling_listener_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkTfLiteModel::CleanUp() {
|
void BenchmarkTfLiteModel::CleanUp() {
|
||||||
@ -236,7 +269,8 @@ std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
|
|||||||
CreateFlag<bool>("use_nnapi", ¶ms_, "use nnapi delegate api"),
|
CreateFlag<bool>("use_nnapi", ¶ms_, "use nnapi delegate api"),
|
||||||
CreateFlag<bool>("use_legacy_nnapi", ¶ms_, "use legacy nnapi api"),
|
CreateFlag<bool>("use_legacy_nnapi", ¶ms_, "use legacy nnapi api"),
|
||||||
CreateFlag<bool>("use_gpu", ¶ms_, "use gpu"),
|
CreateFlag<bool>("use_gpu", ¶ms_, "use gpu"),
|
||||||
CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16")};
|
CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16"),
|
||||||
|
CreateFlag<bool>("enable_op_profiling", ¶ms_, "enable op profiling")};
|
||||||
|
|
||||||
flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
|
flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
|
||||||
return flags;
|
return flags;
|
||||||
@ -255,6 +289,8 @@ void BenchmarkTfLiteModel::LogParams() {
|
|||||||
TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]";
|
TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]";
|
||||||
TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
|
TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
|
||||||
<< "]";
|
<< "]";
|
||||||
|
TFLITE_LOG(INFO) << "Enable op profiling: ["
|
||||||
|
<< params_.Get<bool>("enable_op_profiling") << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BenchmarkTfLiteModel::ValidateParams() {
|
bool BenchmarkTfLiteModel::ValidateParams() {
|
||||||
@ -382,7 +418,6 @@ void BenchmarkTfLiteModel::Init() {
|
|||||||
if (!interpreter) {
|
if (!interpreter) {
|
||||||
TFLITE_LOG(FATAL) << "Failed to construct interpreter";
|
TFLITE_LOG(FATAL) << "Failed to construct interpreter";
|
||||||
}
|
}
|
||||||
profiling_listener_.SetInterpreter(interpreter.get());
|
|
||||||
|
|
||||||
interpreter->UseNNAPI(params_.Get<bool>("use_legacy_nnapi"));
|
interpreter->UseNNAPI(params_.Get<bool>("use_legacy_nnapi"));
|
||||||
|
|
||||||
@ -433,6 +468,16 @@ void BenchmarkTfLiteModel::Init() {
|
|||||||
if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
|
if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
|
||||||
TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
|
TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Install profilers if necessary.
|
||||||
|
if (params_.Get<bool>("enable_op_profiling")) {
|
||||||
|
profiling_listener_.reset(new ProfilingListener(interpreter.get()));
|
||||||
|
AddListener(profiling_listener_.get());
|
||||||
|
}
|
||||||
|
#ifdef GEMMLOWP_PROFILING
|
||||||
|
gemmlowp_profiling_listener_.reset(new GemmlowpProfilingListener());
|
||||||
|
AddListener(gemmlowp_profiling_listener_.get());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates()
|
BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates()
|
||||||
|
@ -22,42 +22,11 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/lite/model.h"
|
#include "tensorflow/lite/model.h"
|
||||||
#include "tensorflow/lite/profiling/profile_summarizer.h"
|
|
||||||
#include "tensorflow/lite/tools/benchmark/benchmark_model.h"
|
#include "tensorflow/lite/tools/benchmark/benchmark_model.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
// Dumps profiling events if profiling is enabled.
|
|
||||||
class ProfilingListener : public BenchmarkListener {
|
|
||||||
public:
|
|
||||||
explicit ProfilingListener() : interpreter_(nullptr), has_profiles_(false) {}
|
|
||||||
|
|
||||||
void SetInterpreter(Interpreter* interpreter);
|
|
||||||
|
|
||||||
void OnSingleRunStart(RunType run_type) override;
|
|
||||||
|
|
||||||
void OnSingleRunEnd() override;
|
|
||||||
|
|
||||||
void OnBenchmarkEnd(const BenchmarkResults& results) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Interpreter* interpreter_;
|
|
||||||
profiling::Profiler profiler_;
|
|
||||||
profiling::ProfileSummarizer summarizer_;
|
|
||||||
bool has_profiles_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
|
|
||||||
class GemmlowpProfilingListener : public BenchmarkListener {
|
|
||||||
public:
|
|
||||||
virtual ~GemmlowpProfilingListener() {}
|
|
||||||
|
|
||||||
void OnBenchmarkStart(const BenchmarkParams& params) override;
|
|
||||||
|
|
||||||
void OnBenchmarkEnd(const BenchmarkResults& results) override;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Benchmarks a TFLite model by running tflite interpreter.
|
// Benchmarks a TFLite model by running tflite interpreter.
|
||||||
class BenchmarkTfLiteModel : public BenchmarkModel {
|
class BenchmarkTfLiteModel : public BenchmarkModel {
|
||||||
public:
|
public:
|
||||||
@ -99,8 +68,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
|
|||||||
};
|
};
|
||||||
std::vector<InputLayerInfo> inputs;
|
std::vector<InputLayerInfo> inputs;
|
||||||
std::vector<InputTensorData> inputs_data_;
|
std::vector<InputTensorData> inputs_data_;
|
||||||
ProfilingListener profiling_listener_;
|
std::unique_ptr<BenchmarkListener> profiling_listener_;
|
||||||
GemmlowpProfilingListener gemmlowp_profiling_listener_;
|
std::unique_ptr<BenchmarkListener> gemmlowp_profiling_listener_;
|
||||||
TfLiteDelegatePtrMap delegates_;
|
TfLiteDelegatePtrMap delegates_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ BUILD_ARCHS="x86_64 armv7 armv7s arm64"
|
|||||||
while getopts "a:p" opt_name; do
|
while getopts "a:p" opt_name; do
|
||||||
case "$opt_name" in
|
case "$opt_name" in
|
||||||
a) BUILD_ARCHS="${OPTARG}";;
|
a) BUILD_ARCHS="${OPTARG}";;
|
||||||
p) profiling_args='-DGEMMLOWP_PROFILING,-DTFLITE_PROFILING_ENABLED';;
|
p) profiling_args='-DGEMMLOWP_PROFILING';;
|
||||||
*) usage;;
|
*) usage;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
Loading…
Reference in New Issue
Block a user