From fb5f4a4a67fb2f4922d85b59923172da61f19729 Mon Sep 17 00:00:00 2001
From: Nat Jeffries <njeff@google.com>
Date: Fri, 12 Jun 2020 16:53:43 -0700
Subject: [PATCH] Create a micro profiler class and use it to implement per op
 profiling in the micro interpreter.

PiperOrigin-RevId: 316202980
Change-Id: Ifc7e238835a2458d17d8c0917ba2d33b05af8c96
---
 tensorflow/lite/micro/BUILD                   | 19 +++++
 tensorflow/lite/micro/micro_interpreter.cc    | 25 +++++--
 tensorflow/lite/micro/micro_interpreter.h     | 24 ++++---
 .../lite/micro/micro_interpreter_test.cc      | 66 +++++++++++++++++
 tensorflow/lite/micro/micro_profiler.cc       | 41 +++++++++++
 tensorflow/lite/micro/micro_profiler.h        | 71 +++++++++++++++++++
 6 files changed, 229 insertions(+), 17 deletions(-)
 create mode 100644 tensorflow/lite/micro/micro_profiler.cc
 create mode 100644 tensorflow/lite/micro/micro_profiler.h

diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD
index 135b756496c..32d7271734e 100644
--- a/tensorflow/lite/micro/BUILD
+++ b/tensorflow/lite/micro/BUILD
@@ -48,6 +48,7 @@ cc_library(
         "//tensorflow/lite/core/api",
         "//tensorflow/lite/kernels/internal:compatibility",
         "//tensorflow/lite/kernels/internal:tensor",
+        "//tensorflow/lite/micro:micro_profiler",
         "//tensorflow/lite/micro/memory_planner",
         "//tensorflow/lite/micro/memory_planner:greedy_memory_planner",
         "//tensorflow/lite/schema:schema_fbs",
@@ -181,6 +182,24 @@ cc_library(
     deps = ["//tensorflow/lite/c:common"],
 )
 
+cc_library(
+    name = "micro_profiler",
+    srcs = [
+        "micro_profiler.cc",
+    ],
+    hdrs = [
+        "micro_profiler.h",
+    ],
+    build_for_embedded = True,
+    copts = micro_copts(),
+    deps = [
+        ":micro_compatibility",
+        ":micro_time",
+        "//tensorflow/lite/core/api",
+        "//tensorflow/lite/kernels/internal:compatibility",
+    ],
+)
+
 cc_library(
     name = "micro_utils",
     srcs = [
diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc
index 930badb14ab..c20eb1f0984 100644
--- a/tensorflow/lite/micro/micro_interpreter.cc
+++ b/tensorflow/lite/micro/micro_interpreter.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/core/api/tensor_utils.h"
 #include "tensorflow/lite/micro/micro_allocator.h"
 #include "tensorflow/lite/micro/micro_op_resolver.h"
+#include "tensorflow/lite/micro/micro_profiler.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
@@ -75,26 +76,28 @@ MicroInterpreter::MicroInterpreter(const Model* model,
                                    const MicroOpResolver& op_resolver,
                                    uint8_t* tensor_arena,
                                    size_t tensor_arena_size,
-                                   ErrorReporter* error_reporter)
+                                   ErrorReporter* error_reporter,
+                                   tflite::Profiler* profiler)
     : model_(model),
       op_resolver_(op_resolver),
       error_reporter_(error_reporter),
       allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
                                          error_reporter)),
       context_helper_(error_reporter_, &allocator_) {
-  Init();
+  Init(profiler);
 }
 
 MicroInterpreter::MicroInterpreter(const Model* model,
                                    const MicroOpResolver* op_resolver,
                                    MicroAllocator* allocator,
-                                   ErrorReporter* error_reporter)
+                                   ErrorReporter* error_reporter,
+                                   tflite::Profiler* profiler)
     : model_(model),
       op_resolver_(*op_resolver),
       error_reporter_(error_reporter),
       allocator_(*allocator),
       context_helper_(error_reporter_, &allocator_) {
-  Init();
+  Init(profiler);
 }
 
 MicroInterpreter::~MicroInterpreter() {
@@ -112,7 +115,7 @@ MicroInterpreter::~MicroInterpreter() {
   }
 }
 
-void MicroInterpreter::Init() {
+void MicroInterpreter::Init(tflite::Profiler* profiler) {
   const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
       model_->subgraphs();
   if (subgraphs->size() != 1) {
@@ -126,6 +129,7 @@ void MicroInterpreter::Init() {
   context_.impl_ = static_cast<void*>(&context_helper_);
   context_.ReportError = context_helper_.ReportOpError;
   context_.recommended_num_threads = 1;
+  context_.profiler = profiler;
 
   initialization_status_ = kTfLiteOk;
 }
@@ -266,7 +270,16 @@ TfLiteStatus MicroInterpreter::Invoke() {
     auto* registration = node_and_registrations_[i].registration;
 
     if (registration->invoke) {
-      TfLiteStatus invoke_status = registration->invoke(&context_, node);
+      TfLiteStatus invoke_status;
+#ifndef NDEBUG  // Omit profiler overhead from release builds.
+      // The case where profiler == nullptr is handled by ScopedOperatorProfile.
+      tflite::Profiler* profiler =
+          reinterpret_cast<tflite::Profiler*>(context_.profiler);
+      ScopedOperatorProfile scoped_profiler(
+          profiler, OpNameFromRegistration(registration), i);
+#endif
+      invoke_status = registration->invoke(&context_, node);
+
       if (invoke_status == kTfLiteError) {
         TF_LITE_REPORT_ERROR(
             error_reporter_,
diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h
index 34358f051d3..bbe01fa2934 100644
--- a/tensorflow/lite/micro/micro_interpreter.h
+++ b/tensorflow/lite/micro/micro_interpreter.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
+#include "tensorflow/lite/core/api/profiler.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/micro/micro_allocator.h"
 #include "tensorflow/lite/micro/micro_op_resolver.h"
@@ -64,17 +65,17 @@ class ContextHelper {
 
 class MicroInterpreter {
  public:
-  // The lifetime of the model, op resolver, tensor arena, and error reporter
-  // must be at least as long as that of the interpreter object, since the
-  // interpreter may need to access them at any time. This means that you should
-  // usually create them with the same scope as each other, for example having
-  // them all allocated on the stack as local variables through a top-level
-  // function.
-  // The interpreter doesn't do any deallocation of any of the pointed-to
-  // objects, ownership remains with the caller.
+  // The lifetime of the model, op resolver, tensor arena, error reporter and
+  // profiler must be at least as long as that of the interpreter object, since
+  // the interpreter may need to access them at any time. This means that you
+  // should usually create them with the same scope as each other, for example
+  // having them all allocated on the stack as local variables through a
+  // top-level function. The interpreter doesn't do any deallocation of any of
+  // the pointed-to objects, ownership remains with the caller.
   MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
                    uint8_t* tensor_arena, size_t tensor_arena_size,
-                   ErrorReporter* error_reporter);
+                   ErrorReporter* error_reporter,
+                   tflite::Profiler* profiler = nullptr);
 
   // Create an interpreter instance using an existing MicroAllocator instance.
   // This constructor should be used when creating an allocator that needs to
@@ -82,7 +83,8 @@ class MicroInterpreter {
   // allocations inside the interpreter. The lifetime of the allocator must be
   // as long as that of the interpreter object.
   MicroInterpreter(const Model* model, const MicroOpResolver* op_resolver,
-                   MicroAllocator* allocator, ErrorReporter* error_reporter);
+                   MicroAllocator* allocator, ErrorReporter* error_reporter,
+                   tflite::Profiler* profiler = nullptr);
 
   ~MicroInterpreter();
 
@@ -166,7 +168,7 @@ class MicroInterpreter {
  private:
   // TODO(b/158263161): Consider switching to Create() function to enable better
   // error reporting during initialization.
-  void Init();
+  void Init(tflite::Profiler* profiler);
 
   void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
 
diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc
index c6d034819c3..079e23d33eb 100644
--- a/tensorflow/lite/micro/micro_interpreter_test.cc
+++ b/tensorflow/lite/micro/micro_interpreter_test.cc
@@ -24,6 +24,45 @@ limitations under the License.
 #include "tensorflow/lite/micro/test_helpers.h"
 #include "tensorflow/lite/micro/testing/micro_test.h"
 
+namespace tflite {
+namespace {
+
+class MockProfiler : public tflite::Profiler {
+ public:
+  MockProfiler() : event_starts_(0), event_ends_(0) {}
+  ~MockProfiler() override = default;
+
+  // AddEvent is unused for Tf Micro.
+  void AddEvent(const char* tag, EventType event_type, uint64_t start,
+                uint64_t end, int64_t event_metadata1,
+                int64_t event_metadata2) override{};
+
+  // BeginEvent followed by code followed by EndEvent will profile the code
+  // enclosed. Multiple concurrent events are unsupported, so the return value
+  // is always 0. Event_metadata1 and event_metadata2 are unused. The tag
+  // pointer must be valid until EndEvent is called.
+  uint32_t BeginEvent(const char* tag, EventType event_type,
+                      int64_t event_metadata1,
+                      int64_t event_metadata2) override {
+    event_starts_++;
+    return 0;
+  }
+
+  // Event_handle is ignored since TF Micro does not support concurrent events.
+  void EndEvent(uint32_t event_handle) override { event_ends_++; }
+
+  int event_starts() { return event_starts_; }
+  int event_ends() { return event_ends_; }
+
+ private:
+  int event_starts_;
+  int event_ends_;
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace
+}  // namespace tflite
+
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(TestInterpreter) {
@@ -210,4 +249,31 @@ TF_LITE_MICRO_TEST(TestIncompleteInitialization) {
                                        micro_test::reporter);
 }
 
+// Test that an interpreter with a supplied profiler correctly calls the
+// profiler each time an operator is invoked.
+TF_LITE_MICRO_TEST(InterpreterWithProfilerShouldProfileOps) {
+  const tflite::Model* model = tflite::testing::GetComplexMockModel();
+  TF_LITE_MICRO_EXPECT_NE(nullptr, model);
+
+  tflite::testing::MockOpResolver mock_resolver;
+  constexpr size_t allocator_buffer_size = 2048;
+  uint8_t allocator_buffer[allocator_buffer_size];
+  tflite::MockProfiler profiler;
+  tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer,
+                                       allocator_buffer_size,
+                                       micro_test::reporter, &profiler);
+
+  TF_LITE_MICRO_EXPECT_EQ(profiler.event_starts(), 0);
+  TF_LITE_MICRO_EXPECT_EQ(profiler.event_ends(), 0);
+  TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
+  TF_LITE_MICRO_EXPECT_EQ(interpreter.Invoke(), kTfLiteOk);
+#ifndef NDEBUG
+  TF_LITE_MICRO_EXPECT_EQ(profiler.event_starts(), 3);
+  TF_LITE_MICRO_EXPECT_EQ(profiler.event_ends(), 3);
+#else  // Profile events will not occur on release builds.
+  TF_LITE_MICRO_EXPECT_EQ(profiler.event_starts(), 0);
+  TF_LITE_MICRO_EXPECT_EQ(profiler.event_ends(), 0);
+#endif
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc
new file mode 100644
index 00000000000..a765b918108
--- /dev/null
+++ b/tensorflow/lite/micro/micro_profiler.cc
@@ -0,0 +1,41 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/micro/micro_profiler.h"
+
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/micro_time.h"
+
+namespace tflite {
+
+MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter)
+    : reporter_(reporter) {}
+
+uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type,
+                                   int64_t event_metadata1,
+                                   int64_t event_metadata2) {
+  start_time_ = GetCurrentTimeTicks();
+  TFLITE_DCHECK(tag != nullptr);
+  event_tag_ = tag;
+  return 0;
+}
+
+void MicroProfiler::EndEvent(uint32_t event_handle) {
+  int32_t end_time = GetCurrentTimeTicks();
+  TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_,
+                       end_time - start_time_);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h
new file mode 100644
index 00000000000..a3144b3a173
--- /dev/null
+++ b/tensorflow/lite/micro/micro_profiler.h
@@ -0,0 +1,71 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
+
+#include "tensorflow/lite/core/api/error_reporter.h"
+#include "tensorflow/lite/core/api/profiler.h"
+#include "tensorflow/lite/micro/compatibility.h"
+
+namespace tflite {
+
+// MicroProfiler creates a common way to gain fine-grained insight into runtime
+// performance. Bottleck operators can be identified along with slow code
+// sections. This can be used in conjunction with running the relevant micro
+// benchmark to evaluate end-to-end performance.
+//
+// Usage example:
+// MicroProfiler profiler(error_reporter);
+// {
+//   ScopedProfile scoped_profile(profiler, tag);
+//   work_to_profile();
+// }
+//
+// This will call the following methods in order:
+// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
+// work_to_profile();
+// profiler->EndEvent(event_handle)
+class MicroProfiler : public tflite::Profiler {
+ public:
+  explicit MicroProfiler(tflite::ErrorReporter* reporter);
+  ~MicroProfiler() override = default;
+
+  // AddEvent is unused for Tf Micro.
+  void AddEvent(const char* tag, EventType event_type, uint64_t start,
+                uint64_t end, int64_t event_metadata1,
+                int64_t event_metadata2) override{};
+
+  // BeginEvent followed by code followed by EndEvent will profile the code
+  // enclosed. Multiple concurrent events are unsupported, so the return value
+  // is always 0. Event_metadata1 and event_metadata2 are unused. The tag
+  // pointer must be valid until EndEvent is called.
+  uint32_t BeginEvent(const char* tag, EventType event_type,
+                      int64_t event_metadata1,
+                      int64_t event_metadata2) override;
+
+  // Event_handle is ignored since TF Micro does not support concurrent events.
+  void EndEvent(uint32_t event_handle) override;
+
+ private:
+  tflite::ErrorReporter* reporter_;
+  int32_t start_time_;
+  const char* event_tag_;
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_