Add a --enable_op_profiling flag for TFLite's benchmark_model

Use of `--copt=-DTFLITE_PROFILING_ENABLED` is no longer required to
enable per-op profiling when running TFLite's benchmark_model utility.
Simply use `--enable_op_profiling=true` to get this information.

PiperOrigin-RevId: 247236386
This commit is contained in:
Jared Duke 2019-05-08 10:16:35 -07:00 committed by TensorFlower Gardener
parent e84d1e517d
commit bba56b4444
6 changed files with 72 additions and 56 deletions

View File

@ -89,9 +89,9 @@ cc_library(
":logging", ":logging",
"//tensorflow/lite:framework", "//tensorflow/lite:framework",
"//tensorflow/lite:string_util", "//tensorflow/lite:string_util",
"//tensorflow/lite/delegates/nnapi:nnapi_delegate",
"//tensorflow/lite/kernels:builtin_ops", "//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/profiling:profile_summarizer", "//tensorflow/lite/profiling:profile_summarizer",
"//tensorflow/lite/profiling:profiler",
"//tensorflow/lite/tools/evaluation:utils", "//tensorflow/lite/tools/evaluation:utils",
"@gemmlowp", "@gemmlowp",
], ],

View File

@ -45,6 +45,8 @@ and the following optional parameters:
* `use_gpu`: `bool` (default=false) \ * `use_gpu`: `bool` (default=false) \
Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu). Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
This option is currently only available on Android devices. This option is currently only available on Android devices.
* `enable_op_profiling`: `bool` (default=false) \
Whether to enable per-operator profiling measurement.
## To build/install/run ## To build/install/run
@ -129,19 +131,18 @@ where `f0` is the affinity mask for big cores on Pixel 2.
Note: The affinity mask varies with the device. Note: The affinity mask varies with the device.
## Profiling model operators ## Profiling model operators
The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this, The benchmark model binary also allows you to profile operators and give
compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED** execution times of each operator. To do this, pass the flag
to compile benchmark with profiling support. `--enable_op_profiling=true` to `benchmark_model` during invocation, e.g.,
For example, to compile with profiling support on Android, add this flag to the previous command:
``` ```
bazel build -c opt \ adb shell taskset f0 /data/local/tmp/benchmark_model \
--config=android_arm \ --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
--cxxopt='--std=c++11' \ --enable_op_profiling=true
--copt=-DTFLITE_PROFILING_ENABLED \
tensorflow/lite/tools/benchmark:benchmark_model
``` ```
This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
When enabled, the `benchmark_model` binary will produce detailed statistics for
each operation similar to those shown below:
``` ```

View File

@ -48,6 +48,7 @@ BenchmarkParams CreateParams() {
params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f)); params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
params.AddParam("use_legacy_nnapi", BenchmarkParam::Create<bool>(false)); params.AddParam("use_legacy_nnapi", BenchmarkParam::Create<bool>(false));
params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false)); params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
params.AddParam("enable_op_profiling", BenchmarkParam::Create<bool>(false));
return params; return params;
} }

View File

@ -26,6 +26,8 @@ limitations under the License.
#include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h" #include "tensorflow/lite/model.h"
#include "tensorflow/lite/op_resolver.h" #include "tensorflow/lite/op_resolver.h"
#include "tensorflow/lite/profiling/buffered_profiler.h"
#include "tensorflow/lite/profiling/profile_summarizer.h"
#include "tensorflow/lite/string_util.h" #include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/tools/benchmark/logging.h" #include "tensorflow/lite/tools/benchmark/logging.h"
#include "tensorflow/lite/tools/evaluation/utils.h" #include "tensorflow/lite/tools/evaluation/utils.h"
@ -40,12 +42,44 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
namespace tflite { namespace tflite {
namespace benchmark { namespace benchmark {
namespace {
void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) { // Backward compat with previous approach to enabling op profiling.
TFLITE_BENCHMARK_CHECK(interpreter); #if defined(TFLITE_PROFILING_ENABLED)
interpreter_ = interpreter; constexpr int kOpProfilingEnabledDefault = true;
interpreter_->SetProfiler(&profiler_); #else
} constexpr int kOpProfilingEnabledDefault = false;
#endif
// Dumps profiling events if profiling is enabled.
class ProfilingListener : public BenchmarkListener {
public:
explicit ProfilingListener(Interpreter* interpreter)
: interpreter_(interpreter), has_profiles_(false) {
TFLITE_BENCHMARK_CHECK(interpreter);
interpreter_->SetProfiler(&profiler_);
}
void OnSingleRunStart(RunType run_type) override;
void OnSingleRunEnd() override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
private:
Interpreter* interpreter_;
profiling::BufferedProfiler profiler_;
profiling::ProfileSummarizer summarizer_;
bool has_profiles_;
};
// Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
class GemmlowpProfilingListener : public BenchmarkListener {
public:
void OnBenchmarkStart(const BenchmarkParams& params) override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
};
void ProfilingListener::OnSingleRunStart(RunType run_type) { void ProfilingListener::OnSingleRunStart(RunType run_type) {
if (run_type == REGULAR) { if (run_type == REGULAR) {
@ -82,8 +116,6 @@ void GemmlowpProfilingListener::OnBenchmarkEnd(
#endif #endif
} }
namespace {
std::vector<std::string> Split(const std::string& str, const char delim) { std::vector<std::string> Split(const std::string& str, const char delim) {
std::istringstream input(str); std::istringstream input(str);
std::vector<std::string> results; std::vector<std::string> results;
@ -201,6 +233,9 @@ BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
BenchmarkParam::Create<bool>(false)); BenchmarkParam::Create<bool>(false));
default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false)); default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false)); default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
default_params.AddParam(
"enable_op_profiling",
BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
return default_params; return default_params;
} }
@ -209,8 +244,6 @@ BenchmarkTfLiteModel::BenchmarkTfLiteModel()
BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params) BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
: BenchmarkModel(std::move(params)) { : BenchmarkModel(std::move(params)) {
AddListener(&profiling_listener_);
AddListener(&gemmlowp_profiling_listener_);
} }
void BenchmarkTfLiteModel::CleanUp() { void BenchmarkTfLiteModel::CleanUp() {
@ -236,7 +269,8 @@ std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
CreateFlag<bool>("use_nnapi", &params_, "use nnapi delegate api"), CreateFlag<bool>("use_nnapi", &params_, "use nnapi delegate api"),
CreateFlag<bool>("use_legacy_nnapi", &params_, "use legacy nnapi api"), CreateFlag<bool>("use_legacy_nnapi", &params_, "use legacy nnapi api"),
CreateFlag<bool>("use_gpu", &params_, "use gpu"), CreateFlag<bool>("use_gpu", &params_, "use gpu"),
CreateFlag<bool>("allow_fp16", &params_, "allow fp16")}; CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
CreateFlag<bool>("enable_op_profiling", &params_, "enable op profiling")};
flags.insert(flags.end(), specific_flags.begin(), specific_flags.end()); flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
return flags; return flags;
@ -255,6 +289,8 @@ void BenchmarkTfLiteModel::LogParams() {
TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]"; TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]";
TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16") TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
<< "]"; << "]";
TFLITE_LOG(INFO) << "Enable op profiling: ["
<< params_.Get<bool>("enable_op_profiling") << "]";
} }
bool BenchmarkTfLiteModel::ValidateParams() { bool BenchmarkTfLiteModel::ValidateParams() {
@ -382,7 +418,6 @@ void BenchmarkTfLiteModel::Init() {
if (!interpreter) { if (!interpreter) {
TFLITE_LOG(FATAL) << "Failed to construct interpreter"; TFLITE_LOG(FATAL) << "Failed to construct interpreter";
} }
profiling_listener_.SetInterpreter(interpreter.get());
interpreter->UseNNAPI(params_.Get<bool>("use_legacy_nnapi")); interpreter->UseNNAPI(params_.Get<bool>("use_legacy_nnapi"));
@ -433,6 +468,16 @@ void BenchmarkTfLiteModel::Init() {
if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) { if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
TFLITE_LOG(FATAL) << "Failed to allocate tensors!"; TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
} }
// Install profilers if necessary.
if (params_.Get<bool>("enable_op_profiling")) {
profiling_listener_.reset(new ProfilingListener(interpreter.get()));
AddListener(profiling_listener_.get());
}
#ifdef GEMMLOWP_PROFILING
gemmlowp_profiling_listener_.reset(new GemmlowpProfilingListener());
AddListener(gemmlowp_profiling_listener_.get());
#endif
} }
BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates() BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates()

View File

@ -22,42 +22,11 @@ limitations under the License.
#include <vector> #include <vector>
#include "tensorflow/lite/model.h" #include "tensorflow/lite/model.h"
#include "tensorflow/lite/profiling/profile_summarizer.h"
#include "tensorflow/lite/tools/benchmark/benchmark_model.h" #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
namespace tflite { namespace tflite {
namespace benchmark { namespace benchmark {
// Dumps profiling events if profiling is enabled.
class ProfilingListener : public BenchmarkListener {
public:
explicit ProfilingListener() : interpreter_(nullptr), has_profiles_(false) {}
void SetInterpreter(Interpreter* interpreter);
void OnSingleRunStart(RunType run_type) override;
void OnSingleRunEnd() override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
private:
Interpreter* interpreter_;
profiling::Profiler profiler_;
profiling::ProfileSummarizer summarizer_;
bool has_profiles_;
};
// Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
class GemmlowpProfilingListener : public BenchmarkListener {
public:
virtual ~GemmlowpProfilingListener() {}
void OnBenchmarkStart(const BenchmarkParams& params) override;
void OnBenchmarkEnd(const BenchmarkResults& results) override;
};
// Benchmarks a TFLite model by running tflite interpreter. // Benchmarks a TFLite model by running tflite interpreter.
class BenchmarkTfLiteModel : public BenchmarkModel { class BenchmarkTfLiteModel : public BenchmarkModel {
public: public:
@ -99,8 +68,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
}; };
std::vector<InputLayerInfo> inputs; std::vector<InputLayerInfo> inputs;
std::vector<InputTensorData> inputs_data_; std::vector<InputTensorData> inputs_data_;
ProfilingListener profiling_listener_; std::unique_ptr<BenchmarkListener> profiling_listener_;
GemmlowpProfilingListener gemmlowp_profiling_listener_; std::unique_ptr<BenchmarkListener> gemmlowp_profiling_listener_;
TfLiteDelegatePtrMap delegates_; TfLiteDelegatePtrMap delegates_;
}; };

View File

@ -32,7 +32,7 @@ BUILD_ARCHS="x86_64 armv7 armv7s arm64"
while getopts "a:p" opt_name; do while getopts "a:p" opt_name; do
case "$opt_name" in case "$opt_name" in
a) BUILD_ARCHS="${OPTARG}";; a) BUILD_ARCHS="${OPTARG}";;
p) profiling_args='-DGEMMLOWP_PROFILING,-DTFLITE_PROFILING_ENABLED';; p) profiling_args='-DGEMMLOWP_PROFILING';;
*) usage;; *) usage;;
esac esac
done done