From fb5f4a4a67fb2f4922d85b59923172da61f19729 Mon Sep 17 00:00:00 2001 From: Nat Jeffries <njeff@google.com> Date: Fri, 12 Jun 2020 16:53:43 -0700 Subject: [PATCH] Create a micro profiler class and use it to implement per op profiling in the micro interpreter. PiperOrigin-RevId: 316202980 Change-Id: Ifc7e238835a2458d17d8c0917ba2d33b05af8c96 --- tensorflow/lite/micro/BUILD | 19 +++++ tensorflow/lite/micro/micro_interpreter.cc | 25 +++++-- tensorflow/lite/micro/micro_interpreter.h | 24 ++++--- .../lite/micro/micro_interpreter_test.cc | 66 +++++++++++++++++ tensorflow/lite/micro/micro_profiler.cc | 41 +++++++++++ tensorflow/lite/micro/micro_profiler.h | 71 +++++++++++++++++++ 6 files changed, 229 insertions(+), 17 deletions(-) create mode 100644 tensorflow/lite/micro/micro_profiler.cc create mode 100644 tensorflow/lite/micro/micro_profiler.h diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 135b756496c..32d7271734e 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -48,6 +48,7 @@ cc_library( "//tensorflow/lite/core/api", "//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:tensor", + "//tensorflow/lite/micro:micro_profiler", "//tensorflow/lite/micro/memory_planner", "//tensorflow/lite/micro/memory_planner:greedy_memory_planner", "//tensorflow/lite/schema:schema_fbs", @@ -181,6 +182,24 @@ cc_library( deps = ["//tensorflow/lite/c:common"], ) +cc_library( + name = "micro_profiler", + srcs = [ + "micro_profiler.cc", + ], + hdrs = [ + "micro_profiler.h", + ], + build_for_embedded = True, + copts = micro_copts(), + deps = [ + ":micro_compatibility", + ":micro_time", + "//tensorflow/lite/core/api", + "//tensorflow/lite/kernels/internal:compatibility", + ], +) + cc_library( name = "micro_utils", srcs = [ diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 930badb14ab..c20eb1f0984 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/core/api/tensor_utils.h" #include "tensorflow/lite/micro/micro_allocator.h" #include "tensorflow/lite/micro/micro_op_resolver.h" +#include "tensorflow/lite/micro/micro_profiler.h" #include "tensorflow/lite/schema/schema_generated.h" namespace tflite { @@ -75,26 +76,28 @@ MicroInterpreter::MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, uint8_t* tensor_arena, size_t tensor_arena_size, - ErrorReporter* error_reporter) + ErrorReporter* error_reporter, + tflite::Profiler* profiler) : model_(model), op_resolver_(op_resolver), error_reporter_(error_reporter), allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size, error_reporter)), context_helper_(error_reporter_, &allocator_) { - Init(); + Init(profiler); } MicroInterpreter::MicroInterpreter(const Model* model, const MicroOpResolver* op_resolver, MicroAllocator* allocator, - ErrorReporter* error_reporter) + ErrorReporter* error_reporter, + tflite::Profiler* profiler) : model_(model), op_resolver_(*op_resolver), error_reporter_(error_reporter), allocator_(*allocator), context_helper_(error_reporter_, &allocator_) { - Init(); + Init(profiler); } MicroInterpreter::~MicroInterpreter() { @@ -112,7 +115,7 @@ MicroInterpreter::~MicroInterpreter() { } } -void MicroInterpreter::Init() { +void MicroInterpreter::Init(tflite::Profiler* profiler) { const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs = model_->subgraphs(); if (subgraphs->size() != 1) { @@ -126,6 +129,7 @@ void MicroInterpreter::Init() { context_.impl_ = static_cast<void*>(&context_helper_); context_.ReportError = context_helper_.ReportOpError; context_.recommended_num_threads = 1; + context_.profiler = profiler; initialization_status_ = kTfLiteOk; } @@ -266,7 +270,16 @@ TfLiteStatus MicroInterpreter::Invoke() { auto* registration = node_and_registrations_[i].registration; if (registration->invoke) { - TfLiteStatus invoke_status = registration->invoke(&context_, node); + TfLiteStatus invoke_status; +#ifndef NDEBUG // Omit profiler overhead from release builds. + // The case where profiler == nullptr is handled by ScopedOperatorProfile. + tflite::Profiler* profiler = + reinterpret_cast<tflite::Profiler*>(context_.profiler); + ScopedOperatorProfile scoped_profiler( + profiler, OpNameFromRegistration(registration), i); +#endif + invoke_status = registration->invoke(&context_, node); + if (invoke_status == kTfLiteError) { TF_LITE_REPORT_ERROR( error_reporter_, diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 34358f051d3..bbe01fa2934 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -21,6 +21,7 @@ limitations under the License. #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" +#include "tensorflow/lite/core/api/profiler.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/micro/micro_allocator.h" #include "tensorflow/lite/micro/micro_op_resolver.h" @@ -64,17 +65,17 @@ class ContextHelper { class MicroInterpreter { public: - // The lifetime of the model, op resolver, tensor arena, and error reporter - // must be at least as long as that of the interpreter object, since the - // interpreter may need to access them at any time. This means that you should - // usually create them with the same scope as each other, for example having - // them all allocated on the stack as local variables through a top-level - // function. - // The interpreter doesn't do any deallocation of any of the pointed-to - // objects, ownership remains with the caller. + // The lifetime of the model, op resolver, tensor arena, error reporter and + // profiler must be at least as long as that of the interpreter object, since + // the interpreter may need to access them at any time. This means that you + // should usually create them with the same scope as each other, for example + // having them all allocated on the stack as local variables through a + // top-level function. The interpreter doesn't do any deallocation of any of + // the pointed-to objects, ownership remains with the caller. MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, uint8_t* tensor_arena, size_t tensor_arena_size, - ErrorReporter* error_reporter); + ErrorReporter* error_reporter, + tflite::Profiler* profiler = nullptr); // Create an interpreter instance using an existing MicroAllocator instance. // This constructor should be used when creating an allocator that needs to @@ -82,7 +83,8 @@ class MicroInterpreter { // allocations inside the interpreter. The lifetime of the allocator must be // as long as that of the interpreter object. MicroInterpreter(const Model* model, const MicroOpResolver* op_resolver, - MicroAllocator* allocator, ErrorReporter* error_reporter); + MicroAllocator* allocator, ErrorReporter* error_reporter, + tflite::Profiler* profiler = nullptr); ~MicroInterpreter(); @@ -166,7 +168,7 @@ class MicroInterpreter { private: // TODO(b/158263161): Consider switching to Create() function to enable better // error reporting during initialization. - void Init(); + void Init(tflite::Profiler* profiler); void CorrectTensorEndianness(TfLiteTensor* tensorCorr); diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index c6d034819c3..079e23d33eb 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -24,6 +24,45 @@ limitations under the License. #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" +namespace tflite { +namespace { + +class MockProfiler : public tflite::Profiler { + public: + MockProfiler() : event_starts_(0), event_ends_(0) {} + ~MockProfiler() override = default; + + // AddEvent is unused for Tf Micro. + void AddEvent(const char* tag, EventType event_type, uint64_t start, + uint64_t end, int64_t event_metadata1, + int64_t event_metadata2) override{}; + + // BeginEvent followed by code followed by EndEvent will profile the code + // enclosed. Multiple concurrent events are unsupported, so the return value + // is always 0. Event_metadata1 and event_metadata2 are unused. The tag + // pointer must be valid until EndEvent is called. + uint32_t BeginEvent(const char* tag, EventType event_type, + int64_t event_metadata1, + int64_t event_metadata2) override { + event_starts_++; + return 0; + } + + // Event_handle is ignored since TF Micro does not support concurrent events. + void EndEvent(uint32_t event_handle) override { event_ends_++; } + + int event_starts() { return event_starts_; } + int event_ends() { return event_ends_; } + + private: + int event_starts_; + int event_ends_; + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace +} // namespace tflite + TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInterpreter) { @@ -210,4 +249,31 @@ TF_LITE_MICRO_TEST(TestIncompleteInitialization) { micro_test::reporter); } +// Test that an interpreter with a supplied profiler correctly calls the +// profiler each time an operator is invoked. +TF_LITE_MICRO_TEST(InterpreterWithProfilerShouldProfileOps) { + const tflite::Model* model = tflite::testing::GetComplexMockModel(); + TF_LITE_MICRO_EXPECT_NE(nullptr, model); + + tflite::testing::MockOpResolver mock_resolver; + constexpr size_t allocator_buffer_size = 2048; + uint8_t allocator_buffer[allocator_buffer_size]; + tflite::MockProfiler profiler; + tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer, + allocator_buffer_size, + micro_test::reporter, &profiler); + + TF_LITE_MICRO_EXPECT_EQ(profiler.event_starts(), 0); + TF_LITE_MICRO_EXPECT_EQ(profiler.event_ends(), 0); + TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + TF_LITE_MICRO_EXPECT_EQ(interpreter.Invoke(), kTfLiteOk); +#ifndef NDEBUG + TF_LITE_MICRO_EXPECT_EQ(profiler.event_starts(), 3); + TF_LITE_MICRO_EXPECT_EQ(profiler.event_ends(), 3); +#else // Profile events will not occur on release builds. + TF_LITE_MICRO_EXPECT_EQ(profiler.event_starts(), 0); + TF_LITE_MICRO_EXPECT_EQ(profiler.event_ends(), 0); +#endif +} + TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc new file mode 100644 index 00000000000..a765b918108 --- /dev/null +++ b/tensorflow/lite/micro/micro_profiler.cc @@ -0,0 +1,41 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/micro_profiler.h" + +#include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/micro/micro_time.h" + +namespace tflite { + +MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter) + : reporter_(reporter) {} + +uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type, + int64_t event_metadata1, + int64_t event_metadata2) { + start_time_ = GetCurrentTimeTicks(); + TFLITE_DCHECK(tag != nullptr); + event_tag_ = tag; + return 0; +} + +void MicroProfiler::EndEvent(uint32_t event_handle) { + int32_t end_time = GetCurrentTimeTicks(); + TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_, + end_time - start_time_); +} + +} // namespace tflite diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h new file mode 100644 index 00000000000..a3144b3a173 --- /dev/null +++ b/tensorflow/lite/micro/micro_profiler.h @@ -0,0 +1,71 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ + +#include "tensorflow/lite/core/api/error_reporter.h" +#include "tensorflow/lite/core/api/profiler.h" +#include "tensorflow/lite/micro/compatibility.h" + +namespace tflite { + +// MicroProfiler creates a common way to gain fine-grained insight into runtime +// performance. Bottleck operators can be identified along with slow code +// sections. This can be used in conjunction with running the relevant micro +// benchmark to evaluate end-to-end performance. +// +// Usage example: +// MicroProfiler profiler(error_reporter); +// { +// ScopedProfile scoped_profile(profiler, tag); +// work_to_profile(); +// } +// +// This will call the following methods in order: +// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0) +// work_to_profile(); +// profiler->EndEvent(event_handle) +class MicroProfiler : public tflite::Profiler { + public: + explicit MicroProfiler(tflite::ErrorReporter* reporter); + ~MicroProfiler() override = default; + + // AddEvent is unused for Tf Micro. + void AddEvent(const char* tag, EventType event_type, uint64_t start, + uint64_t end, int64_t event_metadata1, + int64_t event_metadata2) override{}; + + // BeginEvent followed by code followed by EndEvent will profile the code + // enclosed. Multiple concurrent events are unsupported, so the return value + // is always 0. Event_metadata1 and event_metadata2 are unused. The tag + // pointer must be valid until EndEvent is called. + uint32_t BeginEvent(const char* tag, EventType event_type, + int64_t event_metadata1, + int64_t event_metadata2) override; + + // Event_handle is ignored since TF Micro does not support concurrent events. + void EndEvent(uint32_t event_handle) override; + + private: + tflite::ErrorReporter* reporter_; + int32_t start_time_; + const char* event_tag_; + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_