diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD index d4428f4d498..c692b948692 100644 --- a/tensorflow/lite/tools/benchmark/BUILD +++ b/tensorflow/lite/tools/benchmark/BUILD @@ -89,9 +89,9 @@ cc_library( ":logging", "//tensorflow/lite:framework", "//tensorflow/lite:string_util", - "//tensorflow/lite/delegates/nnapi:nnapi_delegate", "//tensorflow/lite/kernels:builtin_ops", "//tensorflow/lite/profiling:profile_summarizer", + "//tensorflow/lite/profiling:profiler", "//tensorflow/lite/tools/evaluation:utils", "@gemmlowp", ], diff --git a/tensorflow/lite/tools/benchmark/README.md b/tensorflow/lite/tools/benchmark/README.md index e432d81f8ec..d5c89bd266b 100644 --- a/tensorflow/lite/tools/benchmark/README.md +++ b/tensorflow/lite/tools/benchmark/README.md @@ -45,6 +45,8 @@ and the following optional parameters: * `use_gpu`: `bool` (default=false) \ Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu). This option is currently only available on Android devices. +* `enable_op_profiling`: `bool` (default=false) \ + Whether to enable per-operator profiling measurement. ## To build/install/run @@ -129,19 +131,18 @@ where `f0` is the affinity mask for big cores on Pixel 2. Note: The affinity mask varies with the device. ## Profiling model operators -The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this, -compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED** -to compile benchmark with profiling support. -For example, to compile with profiling support on Android, add this flag to the previous command: +The benchmark model binary also allows you to profile operators and give +execution times of each operator. To do this, pass the flag +`--enable_op_profiling=true` to `benchmark_model` during invocation, e.g., ``` -bazel build -c opt \ - --config=android_arm \ - --cxxopt='--std=c++11' \ - --copt=-DTFLITE_PROFILING_ENABLED \ - tensorflow/lite/tools/benchmark:benchmark_model +adb shell taskset f0 /data/local/tmp/benchmark_model \ + --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \ + --enable_op_profiling=true ``` -This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below: + +When enabled, the `benchmark_model` binary will produce detailed statistics for +each operation similar to those shown below: ``` diff --git a/tensorflow/lite/tools/benchmark/benchmark_test.cc b/tensorflow/lite/tools/benchmark/benchmark_test.cc index c7fbc24a477..8fd625cf141 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_test.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_test.cc @@ -48,6 +48,7 @@ BenchmarkParams CreateParams() { params.AddParam("warmup_min_secs", BenchmarkParam::Create(0.5f)); params.AddParam("use_legacy_nnapi", BenchmarkParam::Create(false)); params.AddParam("use_gpu", BenchmarkParam::Create(false)); + params.AddParam("enable_op_profiling", BenchmarkParam::Create(false)); return params; } diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc index 161ae1df034..10c9643e9b6 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc @@ -26,6 +26,8 @@ limitations under the License. #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/model.h" #include "tensorflow/lite/op_resolver.h" +#include "tensorflow/lite/profiling/buffered_profiler.h" +#include "tensorflow/lite/profiling/profile_summarizer.h" #include "tensorflow/lite/string_util.h" #include "tensorflow/lite/tools/benchmark/logging.h" #include "tensorflow/lite/tools/evaluation/utils.h" @@ -40,12 +42,44 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); namespace tflite { namespace benchmark { +namespace { -void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) { - TFLITE_BENCHMARK_CHECK(interpreter); - interpreter_ = interpreter; - interpreter_->SetProfiler(&profiler_); -} +// Backward compat with previous approach to enabling op profiling. +#if defined(TFLITE_PROFILING_ENABLED) +constexpr int kOpProfilingEnabledDefault = true; +#else +constexpr int kOpProfilingEnabledDefault = false; +#endif + +// Dumps profiling events if profiling is enabled. +class ProfilingListener : public BenchmarkListener { + public: + explicit ProfilingListener(Interpreter* interpreter) + : interpreter_(interpreter), has_profiles_(false) { + TFLITE_BENCHMARK_CHECK(interpreter); + interpreter_->SetProfiler(&profiler_); + } + + void OnSingleRunStart(RunType run_type) override; + + void OnSingleRunEnd() override; + + void OnBenchmarkEnd(const BenchmarkResults& results) override; + + private: + Interpreter* interpreter_; + profiling::BufferedProfiler profiler_; + profiling::ProfileSummarizer summarizer_; + bool has_profiles_; +}; + +// Dumps gemmlowp profiling events if gemmlowp profiling is enabled. +class GemmlowpProfilingListener : public BenchmarkListener { + public: + void OnBenchmarkStart(const BenchmarkParams& params) override; + + void OnBenchmarkEnd(const BenchmarkResults& results) override; +}; void ProfilingListener::OnSingleRunStart(RunType run_type) { if (run_type == REGULAR) { @@ -82,8 +116,6 @@ void GemmlowpProfilingListener::OnBenchmarkEnd( #endif } -namespace { - std::vector Split(const std::string& str, const char delim) { std::istringstream input(str); std::vector results; @@ -201,6 +233,9 @@ BenchmarkParams BenchmarkTfLiteModel::DefaultParams() { BenchmarkParam::Create(false)); default_params.AddParam("use_gpu", BenchmarkParam::Create(false)); default_params.AddParam("allow_fp16", BenchmarkParam::Create(false)); + default_params.AddParam( + "enable_op_profiling", + BenchmarkParam::Create(kOpProfilingEnabledDefault)); return default_params; } @@ -209,8 +244,6 @@ BenchmarkTfLiteModel::BenchmarkTfLiteModel() BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params) : BenchmarkModel(std::move(params)) { - AddListener(&profiling_listener_); - AddListener(&gemmlowp_profiling_listener_); } void BenchmarkTfLiteModel::CleanUp() { @@ -236,7 +269,8 @@ std::vector BenchmarkTfLiteModel::GetFlags() { CreateFlag("use_nnapi", ¶ms_, "use nnapi delegate api"), CreateFlag("use_legacy_nnapi", ¶ms_, "use legacy nnapi api"), CreateFlag("use_gpu", ¶ms_, "use gpu"), - CreateFlag("allow_fp16", ¶ms_, "allow fp16")}; + CreateFlag("allow_fp16", ¶ms_, "allow fp16"), + CreateFlag("enable_op_profiling", ¶ms_, "enable op profiling")}; flags.insert(flags.end(), specific_flags.begin(), specific_flags.end()); return flags; @@ -255,6 +289,8 @@ void BenchmarkTfLiteModel::LogParams() { TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get("use_gpu") << "]"; TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get("allow_fp16") << "]"; + TFLITE_LOG(INFO) << "Enable op profiling: [" + << params_.Get("enable_op_profiling") << "]"; } bool BenchmarkTfLiteModel::ValidateParams() { @@ -382,7 +418,6 @@ void BenchmarkTfLiteModel::Init() { if (!interpreter) { TFLITE_LOG(FATAL) << "Failed to construct interpreter"; } - profiling_listener_.SetInterpreter(interpreter.get()); interpreter->UseNNAPI(params_.Get("use_legacy_nnapi")); @@ -433,6 +468,16 @@ void BenchmarkTfLiteModel::Init() { if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) { TFLITE_LOG(FATAL) << "Failed to allocate tensors!"; } + + // Install profilers if necessary. + if (params_.Get("enable_op_profiling")) { + profiling_listener_.reset(new ProfilingListener(interpreter.get())); + AddListener(profiling_listener_.get()); + } +#ifdef GEMMLOWP_PROFILING + gemmlowp_profiling_listener_.reset(new GemmlowpProfilingListener()); + AddListener(gemmlowp_profiling_listener_.get()); +#endif } BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates() diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h index 99b9ce35246..ab64effd0ec 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h @@ -22,42 +22,11 @@ limitations under the License. #include #include "tensorflow/lite/model.h" -#include "tensorflow/lite/profiling/profile_summarizer.h" #include "tensorflow/lite/tools/benchmark/benchmark_model.h" namespace tflite { namespace benchmark { -// Dumps profiling events if profiling is enabled. -class ProfilingListener : public BenchmarkListener { - public: - explicit ProfilingListener() : interpreter_(nullptr), has_profiles_(false) {} - - void SetInterpreter(Interpreter* interpreter); - - void OnSingleRunStart(RunType run_type) override; - - void OnSingleRunEnd() override; - - void OnBenchmarkEnd(const BenchmarkResults& results) override; - - private: - Interpreter* interpreter_; - profiling::Profiler profiler_; - profiling::ProfileSummarizer summarizer_; - bool has_profiles_; -}; - -// Dumps gemmlowp profiling events if gemmlowp profiling is enabled. -class GemmlowpProfilingListener : public BenchmarkListener { - public: - virtual ~GemmlowpProfilingListener() {} - - void OnBenchmarkStart(const BenchmarkParams& params) override; - - void OnBenchmarkEnd(const BenchmarkResults& results) override; -}; - // Benchmarks a TFLite model by running tflite interpreter. class BenchmarkTfLiteModel : public BenchmarkModel { public: @@ -99,8 +68,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel { }; std::vector inputs; std::vector inputs_data_; - ProfilingListener profiling_listener_; - GemmlowpProfilingListener gemmlowp_profiling_listener_; + std::unique_ptr profiling_listener_; + std::unique_ptr gemmlowp_profiling_listener_; TfLiteDelegatePtrMap delegates_; }; diff --git a/tensorflow/lite/tools/make/build_ios_universal_lib.sh b/tensorflow/lite/tools/make/build_ios_universal_lib.sh index 8b617ef5937..3678f554d08 100755 --- a/tensorflow/lite/tools/make/build_ios_universal_lib.sh +++ b/tensorflow/lite/tools/make/build_ios_universal_lib.sh @@ -32,7 +32,7 @@ BUILD_ARCHS="x86_64 armv7 armv7s arm64" while getopts "a:p" opt_name; do case "$opt_name" in a) BUILD_ARCHS="${OPTARG}";; - p) profiling_args='-DGEMMLOWP_PROFILING,-DTFLITE_PROFILING_ENABLED';; + p) profiling_args='-DGEMMLOWP_PROFILING';; *) usage;; esac done