Add a --enable_op_profiling flag for TFLite's benchmark_model

Use of `--copt=-DTFLITE_PROFILING_ENABLED` is no longer required to enable per-op profiling when running TFLite's benchmark_model utility. Simply use `--enable_op_profiling=true` to get this information. PiperOrigin-RevId: 247236386
2019-05-08 10:16:35 -07:00 · 2019-05-08 10:16:35 -07:00 · bba56b4444
commit bba56b4444
parent e84d1e517d
6 changed files with 72 additions and 56 deletions
--- a/tensorflow/lite/tools/benchmark/BUILD
+++ b/tensorflow/lite/tools/benchmark/BUILD
@ -89,9 +89,9 @@ cc_library(
        ":logging",
        "//tensorflow/lite:framework",
        "//tensorflow/lite:string_util",
        "//tensorflow/lite/delegates/nnapi:nnapi_delegate",
        "//tensorflow/lite/kernels:builtin_ops",
        "//tensorflow/lite/profiling:profile_summarizer",
        "//tensorflow/lite/profiling:profiler",
        "//tensorflow/lite/tools/evaluation:utils",
        "@gemmlowp",
    ],
--- a/tensorflow/lite/tools/benchmark/README.md
+++ b/tensorflow/lite/tools/benchmark/README.md
@ -45,6 +45,8 @@ and the following optional parameters:
 *   `use_gpu`: `bool` (default=false) \
    Whether to use the [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
    This option is currently only available on Android devices.
 *   `enable_op_profiling`: `bool` (default=false) \
    Whether to enable per-operator profiling measurement.
 ## To build/install/run
@ -129,19 +131,18 @@ where `f0` is the affinity mask for big cores on Pixel 2.
 Note: The affinity mask varies with the device.
 ## Profiling model operators
-The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
+The benchmark model binary also allows you to profile operators and give
-compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
+execution times of each operator. To do this, pass the flag
-to compile benchmark with profiling support.
+`--enable_op_profiling=true` to `benchmark_model` during invocation, e.g.,
 For example, to compile with profiling support on Android, add this flag to the previous command:
 ```
-bazel build -c opt \
+adb shell taskset f0 /data/local/tmp/benchmark_model \
-  --config=android_arm \
+  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --cxxopt='--std=c++11' \
+  --enable_op_profiling=true
  --copt=-DTFLITE_PROFILING_ENABLED \
  tensorflow/lite/tools/benchmark:benchmark_model
 ```
-This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
+
 When enabled, the `benchmark_model` binary will produce detailed statistics for
 each operation similar to those shown below:
 ```
--- a/tensorflow/lite/tools/benchmark/benchmark_test.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_test.cc
@ -48,6 +48,7 @@ BenchmarkParams CreateParams() {
  params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
  params.AddParam("use_legacy_nnapi", BenchmarkParam::Create<bool>(false));
  params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
  params.AddParam("enable_op_profiling", BenchmarkParam::Create<bool>(false));
  return params;
 }
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@ -26,6 +26,8 @@ limitations under the License.
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/op_resolver.h"
 #include "tensorflow/lite/profiling/buffered_profiler.h"
 #include "tensorflow/lite/profiling/profile_summarizer.h"
 #include "tensorflow/lite/string_util.h"
 #include "tensorflow/lite/tools/benchmark/logging.h"
 #include "tensorflow/lite/tools/evaluation/utils.h"
@ -40,12 +42,44 @@ void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
 namespace tflite {
 namespace benchmark {
 namespace {
-void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
+// Backward compat with previous approach to enabling op profiling.
-  TFLITE_BENCHMARK_CHECK(interpreter);
+#if defined(TFLITE_PROFILING_ENABLED)
-  interpreter_ = interpreter;
+constexpr int kOpProfilingEnabledDefault = true;
-  interpreter_->SetProfiler(&profiler_);
+#else
-}
+constexpr int kOpProfilingEnabledDefault = false;
 #endif
 // Dumps profiling events if profiling is enabled.
 class ProfilingListener : public BenchmarkListener {
 public:
  explicit ProfilingListener(Interpreter* interpreter)
      : interpreter_(interpreter), has_profiles_(false) {
    TFLITE_BENCHMARK_CHECK(interpreter);
    interpreter_->SetProfiler(&profiler_);
  }
  void OnSingleRunStart(RunType run_type) override;
  void OnSingleRunEnd() override;
  void OnBenchmarkEnd(const BenchmarkResults& results) override;
 private:
  Interpreter* interpreter_;
  profiling::BufferedProfiler profiler_;
  profiling::ProfileSummarizer summarizer_;
  bool has_profiles_;
 };
 // Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
 class GemmlowpProfilingListener : public BenchmarkListener {
 public:
  void OnBenchmarkStart(const BenchmarkParams& params) override;
  void OnBenchmarkEnd(const BenchmarkResults& results) override;
 };
 void ProfilingListener::OnSingleRunStart(RunType run_type) {
  if (run_type == REGULAR) {
@ -82,8 +116,6 @@ void GemmlowpProfilingListener::OnBenchmarkEnd(
 #endif
 }
 namespace {
 std::vector<std::string> Split(const std::string& str, const char delim) {
  std::istringstream input(str);
  std::vector<std::string> results;
@ -201,6 +233,9 @@ BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
                          BenchmarkParam::Create<bool>(false));
  default_params.AddParam("use_gpu", BenchmarkParam::Create<bool>(false));
  default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
  default_params.AddParam(
      "enable_op_profiling",
      BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
  return default_params;
 }
@ -209,8 +244,6 @@ BenchmarkTfLiteModel::BenchmarkTfLiteModel()
 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
    : BenchmarkModel(std::move(params)) {
  AddListener(&profiling_listener_);
  AddListener(&gemmlowp_profiling_listener_);
 }
 void BenchmarkTfLiteModel::CleanUp() {
@ -236,7 +269,8 @@ std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
      CreateFlag<bool>("use_nnapi", &params_, "use nnapi delegate api"),
      CreateFlag<bool>("use_legacy_nnapi", &params_, "use legacy nnapi api"),
      CreateFlag<bool>("use_gpu", &params_, "use gpu"),
-      CreateFlag<bool>("allow_fp16", &params_, "allow fp16")};
+      CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
      CreateFlag<bool>("enable_op_profiling", &params_, "enable op profiling")};
  flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
  return flags;
@ -255,6 +289,8 @@ void BenchmarkTfLiteModel::LogParams() {
  TFLITE_LOG(INFO) << "Use gpu : [" << params_.Get<bool>("use_gpu") << "]";
  TFLITE_LOG(INFO) << "Allow fp16 : [" << params_.Get<bool>("allow_fp16")
                   << "]";
  TFLITE_LOG(INFO) << "Enable op profiling: ["
                   << params_.Get<bool>("enable_op_profiling") << "]";
 }
 bool BenchmarkTfLiteModel::ValidateParams() {
@ -382,7 +418,6 @@ void BenchmarkTfLiteModel::Init() {
  if (!interpreter) {
    TFLITE_LOG(FATAL) << "Failed to construct interpreter";
  }
  profiling_listener_.SetInterpreter(interpreter.get());
  interpreter->UseNNAPI(params_.Get<bool>("use_legacy_nnapi"));
@ -433,6 +468,16 @@ void BenchmarkTfLiteModel::Init() {
  if (delegates_.empty() && interpreter->AllocateTensors() != kTfLiteOk) {
    TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
  }
  // Install profilers if necessary.
  if (params_.Get<bool>("enable_op_profiling")) {
    profiling_listener_.reset(new ProfilingListener(interpreter.get()));
    AddListener(profiling_listener_.get());
  }
 #ifdef GEMMLOWP_PROFILING
  gemmlowp_profiling_listener_.reset(new GemmlowpProfilingListener());
  AddListener(gemmlowp_profiling_listener_.get());
 #endif
 }
 BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates()
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
@ -22,42 +22,11 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/profiling/profile_summarizer.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
 namespace tflite {
 namespace benchmark {
 // Dumps profiling events if profiling is enabled.
 class ProfilingListener : public BenchmarkListener {
 public:
  explicit ProfilingListener() : interpreter_(nullptr), has_profiles_(false) {}
  void SetInterpreter(Interpreter* interpreter);
  void OnSingleRunStart(RunType run_type) override;
  void OnSingleRunEnd() override;
  void OnBenchmarkEnd(const BenchmarkResults& results) override;
 private:
  Interpreter* interpreter_;
  profiling::Profiler profiler_;
  profiling::ProfileSummarizer summarizer_;
  bool has_profiles_;
 };
 // Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
 class GemmlowpProfilingListener : public BenchmarkListener {
 public:
  virtual ~GemmlowpProfilingListener() {}
  void OnBenchmarkStart(const BenchmarkParams& params) override;
  void OnBenchmarkEnd(const BenchmarkResults& results) override;
 };
 // Benchmarks a TFLite model by running tflite interpreter.
 class BenchmarkTfLiteModel : public BenchmarkModel {
 public:
@ -99,8 +68,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
  };
  std::vector<InputLayerInfo> inputs;
  std::vector<InputTensorData> inputs_data_;
-  ProfilingListener profiling_listener_;
+  std::unique_ptr<BenchmarkListener> profiling_listener_;
-  GemmlowpProfilingListener gemmlowp_profiling_listener_;
+  std::unique_ptr<BenchmarkListener> gemmlowp_profiling_listener_;
  TfLiteDelegatePtrMap delegates_;
 };
--- a/tensorflow/lite/tools/make/build_ios_universal_lib.sh
+++ b/tensorflow/lite/tools/make/build_ios_universal_lib.sh
@ -32,7 +32,7 @@ BUILD_ARCHS="x86_64 armv7 armv7s arm64"
 while getopts "a:p" opt_name; do
  case "$opt_name" in
    a) BUILD_ARCHS="${OPTARG}";;
-    p) profiling_args='-DGEMMLOWP_PROFILING,-DTFLITE_PROFILING_ENABLED';;
+    p) profiling_args='-DGEMMLOWP_PROFILING';;
    *) usage;;
  esac
 done