Add a --enable_op_profiling flag for TFLite's benchmark_model

Use of `--copt=-DTFLITE_PROFILING_ENABLED` is no longer required to
enable per-op profiling when running TFLite's benchmark_model utility.
Simply use `--enable_op_profiling=true` to get this information.

PiperOrigin-RevId: 247236386
This commit is contained in:
Jared Duke 2019-05-08 10:16:35 -07:00 committed by TensorFlower Gardener
parent e84d1e517d
commit bba56b4444
6 changed files with 72 additions and 56 deletions

View File

@ -89,9 +89,9 @@ cc_library(
":logging",
"//tensorflow/lite:framework",
"//tensorflow/lite:string_util",
"//tensorflow/lite/delegates/nnapi:nnapi_delegate",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/profiling:profile_summarizer",
"//tensorflow/lite/profiling:profiler",
"//tensorflow/lite/tools/evaluation:utils",
"@gemmlowp",
],

View File

@ -45,6 +45,8 @@ and the following optional parameters:
* `use_gpu`: `bool` (default=false) \
Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
This option is currently only available on Android devices.
* `enable_op_profiling`: `bool` (default=false) \
Whether to enable per-operator profiling measurement.
## To build/install/run
@ -129,19 +131,18 @@ where `f0` is the affinity mask for big cores on Pixel 2.
Note: The affinity mask varies with the device.
## Profiling model operators
The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
to compile benchmark with profiling support.
For example, to compile with profiling support on Android, add this flag to the previous command:
The benchmark model binary also allows you to profile operators and give
execution times of each operator. To do this, pass the flag
`--enable_op_profiling=true` to `benchmark_model` during invocation, e.g.,
```
bazel build -c opt \
--config=android_arm \
--cxxopt='--std=c++11' \
--copt=-DTFLITE_PROFILING_ENABLED \
tensorflow/lite/tools/benchmark:benchmark_model
adb shell taskset f0 /data/local/tmp/benchmark_model \
--graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
--enable_op_profiling=true
```
This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
When enabled, the `benchmark_model` binary will produce detailed statistics for
each operation similar to those shown below:
```

View File

@ -48,6 +48,7 @@ BenchmarkParams CreateParams() {
params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
params.AddParam("use_legacy_nnapi", BenchmarkParam::Create<bool>(false));
params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
params.AddParam("enable_op_profiling", BenchmarkParam::Create<bool>(false));
return params;
}

View File

@ -26,6 +26,8 @@ limitations under the License.
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/op_resolver.h"
#include "tensorflow/lite/profiling/buffered_profiler.h"
#include "tensorflow/lite/profiling/profile_summarizer.h"
#include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/tools/benchmark/logging.h"
#include "tensorflow/lite/tools/evaluation/utils.h"
@ -40,13 +42,45 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
namespace tflite {
namespace benchmark {
namespace {
void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
// Backward compat with previous approach to enabling op profiling.
#if defined(TFLITE_PROFILING_ENABLED)
constexpr int kOpProfilingEnabledDefault = true;
#else
constexpr int kOpProfilingEnabledDefault = false;
#endif
// Dumps profiling events if profiling is enabled.
class ProfilingListener : public BenchmarkListener {
public:
explicit ProfilingListener(Interpreter* interpreter)
: interpreter_(interpreter), has_profiles_(false) {
TFLITE_BENCHMARK_CHECK(interpreter);
interpreter_ = interpreter;
interpreter_->SetProfiler(&profiler_);
}
void OnSingleRunStart(RunType run_type) override;
void OnSingleRunEnd() override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
private:
Interpreter* interpreter_;
profiling::BufferedProfiler profiler_;
profiling::ProfileSummarizer summarizer_;
bool has_profiles_;
};
// Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
class GemmlowpProfilingListener : public BenchmarkListener {
public:
void OnBenchmarkStart(const BenchmarkParams& params) override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
};
void ProfilingListener::OnSingleRunStart(RunType run_type) {
if (run_type == REGULAR) {
profiler_.Reset();
@ -82,8 +116,6 @@ void GemmlowpProfilingListener::OnBenchmarkEnd(
#endif
}
namespace {
std::vector<std::string> Split(const std::string& str, const char delim) {
std::istringstream input(str);
std::vector<std::string> results;
@ -201,6 +233,9 @@ BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
BenchmarkParam::Create<bool>(false));
default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
default_params.AddParam(
"enable_op_profiling",
BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
return default_params;
}
@ -209,8 +244,6 @@ BenchmarkTfLiteModel::BenchmarkTfLiteModel()
BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
: BenchmarkModel(std::move(params)) {
AddListener(&profiling_listener_);
AddListener(&gemmlowp_profiling_listener_);
}
void BenchmarkTfLiteModel::CleanUp() {
@ -236,7 +269,8 @@ std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
CreateFlag<bool>("use_nnapi", &params_, "use nnapi delegate api"),
CreateFlag<bool>("use_legacy_nnapi", &params_, "use legacy nnapi api"),
CreateFlag<bool>("use_gpu", &params_, "use gpu"),
CreateFlag<bool>("allow_fp16", &params_, "allow fp16")};
CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
CreateFlag<bool>("enable_op_profiling", &params_, "enable op profiling")};
flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
return flags;
@ -255,6 +289,8 @@ void BenchmarkTfLiteModel::LogParams() {
TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]";
TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
<< "]";
TFLITE_LOG(INFO) << "Enable op profiling: ["
<< params_.Get<bool>("enable_op_profiling") << "]";
}
bool BenchmarkTfLiteModel::ValidateParams() {
@ -382,7 +418,6 @@ void BenchmarkTfLiteModel::Init() {
if (!interpreter) {
TFLITE_LOG(FATAL) << "Failed to construct interpreter";
}
profiling_listener_.SetInterpreter(interpreter.get());
interpreter->UseNNAPI(params_.Get<bool>("use_legacy_nnapi"));
@ -433,6 +468,16 @@ void BenchmarkTfLiteModel::Init() {
if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
}
// Install profilers if necessary.
if (params_.Get<bool>("enable_op_profiling")) {
profiling_listener_.reset(new ProfilingListener(interpreter.get()));
AddListener(profiling_listener_.get());
}
#ifdef GEMMLOWP_PROFILING
gemmlowp_profiling_listener_.reset(new GemmlowpProfilingListener());
AddListener(gemmlowp_profiling_listener_.get());
#endif
}
BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates()

View File

@ -22,42 +22,11 @@ limitations under the License.
#include <vector>
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/profiling/profile_summarizer.h"
#include "tensorflow/lite/tools/benchmark/benchmark_model.h"
namespace tflite {
namespace benchmark {
// Dumps profiling events if profiling is enabled.
class ProfilingListener : public BenchmarkListener {
public:
explicit ProfilingListener() : interpreter_(nullptr), has_profiles_(false) {}
void SetInterpreter(Interpreter* interpreter);
void OnSingleRunStart(RunType run_type) override;
void OnSingleRunEnd() override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
private:
Interpreter* interpreter_;
profiling::Profiler profiler_;
profiling::ProfileSummarizer summarizer_;
bool has_profiles_;
};
// Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
class GemmlowpProfilingListener : public BenchmarkListener {
public:
virtual ~GemmlowpProfilingListener() {}
void OnBenchmarkStart(const BenchmarkParams& params) override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
};
// Benchmarks a TFLite model by running tflite interpreter.
class BenchmarkTfLiteModel : public BenchmarkModel {
public:
@ -99,8 +68,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
};
std::vector<InputLayerInfo> inputs;
std::vector<InputTensorData> inputs_data_;
ProfilingListener profiling_listener_;
GemmlowpProfilingListener gemmlowp_profiling_listener_;
std::unique_ptr<BenchmarkListener> profiling_listener_;
std::unique_ptr<BenchmarkListener> gemmlowp_profiling_listener_;
TfLiteDelegatePtrMap delegates_;
};

View File

@ -32,7 +32,7 @@ BUILD_ARCHS="x86_64 armv7 armv7s arm64"
while getopts "a:p" opt_name; do
case "$opt_name" in
a) BUILD_ARCHS="${OPTARG}";;
p) profiling_args='-DGEMMLOWP_PROFILING,-DTFLITE_PROFILING_ENABLED';;
p) profiling_args='-DGEMMLOWP_PROFILING';;
*) usage;;
esac
done