From b885fb44dd2fca110cd086af3e53b0710d403a5c Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Fri, 29 Jan 2021 13:36:23 -0800 Subject: [PATCH] Add more detailed profiling. This change: * Improved the per-op profiling to the interpreter. * Maintains state within the MicroProfiler object such that all the logging can happen external to the interpreter. * Refactors the benchmarks to make use of this new functionality. --- tensorflow/lite/micro/BUILD | 3 +- .../micro/benchmarks/keyword_benchmark.cc | 58 +++++---- .../lite/micro/benchmarks/micro_benchmark.h | 58 ++------- .../benchmarks/person_detection_benchmark.cc | 83 ++++++++----- tensorflow/lite/micro/micro_interpreter.cc | 55 ++++----- tensorflow/lite/micro/micro_interpreter.h | 8 +- .../lite/micro/micro_interpreter_test.cc | 19 +-- tensorflow/lite/micro/micro_profiler.cc | 44 ++++--- tensorflow/lite/micro/micro_profiler.h | 113 ++++++++++++------ tensorflow/lite/micro/micro_time.h | 7 +- tensorflow/lite/micro/tools/make/Makefile | 1 - 11 files changed, 244 insertions(+), 205 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 6ad30238227..9909f188123 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -179,9 +179,8 @@ cc_library( ], copts = micro_copts(), deps = [ - ":micro_compatibility", + ":micro_error_reporter", ":micro_time", - "//tensorflow/lite/core/api", "//tensorflow/lite/kernels/internal:compatibility", ], ) diff --git a/tensorflow/lite/micro/benchmarks/keyword_benchmark.cc b/tensorflow/lite/micro/benchmarks/keyword_benchmark.cc index 36b53de3535..ef6e876a086 100644 --- a/tensorflow/lite/micro/benchmarks/keyword_benchmark.cc +++ b/tensorflow/lite/micro/benchmarks/keyword_benchmark.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_interpreter.h" #include "tensorflow/lite/micro/micro_mutable_op_resolver.h" +#include "tensorflow/lite/micro/micro_profiler.h" /* * Keyword Spotting Benchmark for performance optimizations. The model used in @@ -30,12 +31,10 @@ limitations under the License. * weights and parameters are not representative of the original model. */ -namespace { +namespace tflite { using KeywordBenchmarkRunner = MicroBenchmarkRunner; -using KeywordOpResolver = tflite::MicroMutableOpResolver<6>; - -constexpr int kRandomSeed = 42; +using KeywordOpResolver = MicroMutableOpResolver<6>; // Create an area of memory to use for input, output, and intermediate arrays. // Align arena to 16 bytes to avoid alignment warnings on certain platforms. @@ -44,12 +43,11 @@ alignas(16) uint8_t tensor_arena[kTensorArenaSize]; uint8_t benchmark_runner_buffer[sizeof(KeywordBenchmarkRunner)]; uint8_t op_resolver_buffer[sizeof(KeywordOpResolver)]; -KeywordBenchmarkRunner* benchmark_runner = nullptr; // Initialize benchmark runner instance explicitly to avoid global init order // issues on Sparkfun. Use new since static variables within a method // are automatically surrounded by locking, which breaks bluepill and stm32f4. -void CreateBenchmarkRunner() { +KeywordBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) { // We allocate the KeywordOpResolver from a global buffer because the object's // lifetime must exceed that of the KeywordBenchmarkRunner object. KeywordOpResolver* op_resolver = new (op_resolver_buffer) KeywordOpResolver(); @@ -58,32 +56,42 @@ void CreateBenchmarkRunner() { op_resolver->AddSoftmax(); op_resolver->AddSvdf(); - benchmark_runner = new (benchmark_runner_buffer) + return new (benchmark_runner_buffer) KeywordBenchmarkRunner(g_keyword_scrambled_model_data, op_resolver, - tensor_arena, kTensorArenaSize); + tensor_arena, kTensorArenaSize, profiler); } -// Initializes keyword runner and sets random inputs. -void InitializeKeywordRunner() { - CreateBenchmarkRunner(); - benchmark_runner->SetRandomInput(kRandomSeed); -} - -// This method assumes InitializeKeywordRunner has already been run. -void KeywordRunNIerations(int iterations) { - for (int i = 0; i < iterations; i++) { - benchmark_runner->RunSingleIteration(); +void KeywordRunNIerations(int iterations, const char* tag, + KeywordBenchmarkRunner& benchmark_runner, + MicroProfiler& profiler) { + int32_t ticks = 0; + for (int i = 0; i < iterations; ++i) { + benchmark_runner.SetRandomInput(i); + profiler.ClearEvents(); + benchmark_runner.RunSingleIteration(); + ticks += profiler.GetTotalTicks(); } + MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks)); } -} // namespace +} // namespace tflite -TF_LITE_MICRO_BENCHMARKS_BEGIN +int main(int argc, char** argv) { + tflite::MicroProfiler profiler; -TF_LITE_MICRO_BENCHMARK(InitializeKeywordRunner()); + uint32_t event_handle = profiler.BeginEvent("InitializeKeywordRunner"); + tflite::KeywordBenchmarkRunner* benchmark_runner = + CreateBenchmarkRunner(&profiler); + profiler.EndEvent(event_handle); + profiler.Log(); + MicroPrintf(""); -TF_LITE_MICRO_BENCHMARK(KeywordRunNIerations(1)); + tflite::KeywordRunNIerations(1, "KeywordRunNIerations(1)", *benchmark_runner, + profiler); + profiler.Log(); + MicroPrintf(""); -TF_LITE_MICRO_BENCHMARK(KeywordRunNIerations(10)); - -TF_LITE_MICRO_BENCHMARKS_END + tflite::KeywordRunNIerations(10, "KeywordRunNIerations(10)", + *benchmark_runner, profiler); + MicroPrintf(""); +} diff --git a/tensorflow/lite/micro/benchmarks/micro_benchmark.h b/tensorflow/lite/micro/benchmarks/micro_benchmark.h index 83b5cbbdd5c..2eb3787e01d 100644 --- a/tensorflow/lite/micro/benchmarks/micro_benchmark.h +++ b/tensorflow/lite/micro/benchmarks/micro_benchmark.h @@ -21,57 +21,22 @@ limitations under the License. #include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_interpreter.h" #include "tensorflow/lite/micro/micro_op_resolver.h" +#include "tensorflow/lite/micro/micro_profiler.h" #include "tensorflow/lite/micro/micro_time.h" -namespace micro_benchmark { -extern tflite::ErrorReporter* reporter; -} // namespace micro_benchmark - -#define TF_LITE_MICRO_BENCHMARKS_BEGIN \ - namespace micro_benchmark { \ - tflite::ErrorReporter* reporter; \ - } \ - \ - int main(int argc, char** argv) { \ - tflite::MicroErrorReporter error_reporter; \ - micro_benchmark::reporter = &error_reporter; \ - int32_t start_ticks; \ - int32_t duration_ticks; \ - int32_t duration_ms; - -#define TF_LITE_MICRO_BENCHMARKS_END \ - return 0; \ - } - -#define TF_LITE_MICRO_BENCHMARK(func) \ - if (tflite::ticks_per_second() == 0) { \ - TF_LITE_REPORT_ERROR(micro_benchmark::reporter, \ - "no timer implementation found"); \ - return 0; \ - } \ - start_ticks = tflite::GetCurrentTimeTicks(); \ - func; \ - duration_ticks = tflite::GetCurrentTimeTicks() - start_ticks; \ - if (duration_ticks > INT_MAX / 1000) { \ - duration_ms = duration_ticks / (tflite::ticks_per_second() / 1000); \ - } else { \ - duration_ms = (duration_ticks * 1000) / tflite::ticks_per_second(); \ - } \ - micro_benchmark::reporter->Report("%s took %d ticks (%d ms)", #func, \ - duration_ticks, duration_ms); +namespace tflite { template class MicroBenchmarkRunner { public: - // The lifetimes of model, op_resolver and tensor_arena must exceed that of - // the created MicroBenchmarkRunner object. + // The lifetimes of model, op_resolver, tensor_arena, profiler must exceed + // that of the created MicroBenchmarkRunner object. MicroBenchmarkRunner(const uint8_t* model, const tflite::MicroOpResolver* op_resolver, - uint8_t* tensor_arena, int tensor_arena_size) - : model_(tflite::GetModel(model)), - reporter_(µ_reporter_), - interpreter_(model_, *op_resolver, tensor_arena, tensor_arena_size, - reporter_) { + uint8_t* tensor_arena, int tensor_arena_size, + MicroProfiler* profiler) + : interpreter_(GetModel(model), *op_resolver, tensor_arena, + tensor_arena_size, GetMicroErrorReporter(), profiler) { interpreter_.AllocateTensors(); } @@ -79,7 +44,7 @@ class MicroBenchmarkRunner { // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter_.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(reporter_, "Invoke failed."); + MicroPrintf("Invoke failed."); } } @@ -109,10 +74,9 @@ class MicroBenchmarkRunner { } private: - const tflite::Model* model_; - tflite::MicroErrorReporter micro_reporter_; - tflite::ErrorReporter* reporter_; tflite::MicroInterpreter interpreter_; }; +} // namespace tflite + #endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_MICRO_BENCHMARK_H_ diff --git a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc index dc34059ee43..824b6d30853 100644 --- a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc +++ b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc @@ -31,7 +31,7 @@ limitations under the License. * exmaples/person_detection. */ -namespace { +namespace tflite { using PersonDetectionOpResolver = tflite::AllOpsResolver; using PersonDetectionBenchmarkRunner = MicroBenchmarkRunner; @@ -43,48 +43,65 @@ alignas(16) uint8_t tensor_arena[kTensorArenaSize]; uint8_t op_resolver_buffer[sizeof(PersonDetectionOpResolver)]; uint8_t benchmark_runner_buffer[sizeof(PersonDetectionBenchmarkRunner)]; -PersonDetectionBenchmarkRunner* benchmark_runner = nullptr; // Initialize benchmark runner instance explicitly to avoid global init order // issues on Sparkfun. Use new since static variables within a method // are automatically surrounded by locking, which breaks bluepill and stm32f4. -void CreateBenchmarkRunner() { +PersonDetectionBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) { // We allocate PersonDetectionOpResolver from a global buffer // because the object's lifetime must exceed that of the // PersonDetectionBenchmarkRunner object. - benchmark_runner = new (benchmark_runner_buffer) - PersonDetectionBenchmarkRunner(g_person_detect_model_data, - new (op_resolver_buffer) - PersonDetectionOpResolver(), - tensor_arena, kTensorArenaSize); + return new (benchmark_runner_buffer) PersonDetectionBenchmarkRunner( + g_person_detect_model_data, + new (op_resolver_buffer) PersonDetectionOpResolver(), tensor_arena, + kTensorArenaSize, profiler); } -void InitializeBenchmarkRunner() { - CreateBenchmarkRunner(); - benchmark_runner->SetInput(reinterpret_cast(g_person_data)); -} - -void PersonDetectionTenIerationsWithPerson() { - benchmark_runner->SetInput(reinterpret_cast(g_person_data)); - for (int i = 0; i < 10; i++) { - benchmark_runner->RunSingleIteration(); +void PersonDetectionNIerations(const int8_t* input, int iterations, + const char* tag, + PersonDetectionBenchmarkRunner& benchmark_runner, + MicroProfiler& profiler) { + benchmark_runner.SetInput(input); + int32_t ticks = 0; + for (int i = 0; i < iterations; ++i) { + profiler.ClearEvents(); + benchmark_runner.RunSingleIteration(); + ticks += profiler.GetTotalTicks(); } + MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks)); } -void PersonDetectionTenIerationsWithoutPerson() { - benchmark_runner->SetInput(reinterpret_cast(g_no_person_data)); - for (int i = 0; i < 10; i++) { - benchmark_runner->RunSingleIteration(); - } +} // namespace tflite + +int main(int argc, char** argv) { + tflite::MicroProfiler profiler; + + uint32_t event_handle = profiler.BeginEvent("InitializeBenchmarkRunner"); + tflite::PersonDetectionBenchmarkRunner* benchmark_runner = + CreateBenchmarkRunner(&profiler); + profiler.EndEvent(event_handle); + profiler.Log(); + MicroPrintf(""); + + tflite::PersonDetectionNIerations( + reinterpret_cast(g_person_data), 1, + "WithPersonDataIterations(1)", *benchmark_runner, profiler); + profiler.Log(); + MicroPrintf(""); + + tflite::PersonDetectionNIerations( + reinterpret_cast(g_no_person_data), 1, + "NoPersonDataIterations(1)", *benchmark_runner, profiler); + profiler.Log(); + MicroPrintf(""); + + tflite::PersonDetectionNIerations( + reinterpret_cast(g_person_data), 10, + "WithPersonDataIterations(10)", *benchmark_runner, profiler); + MicroPrintf(""); + + tflite::PersonDetectionNIerations( + reinterpret_cast(g_no_person_data), 10, + "NoPersonDataIterations(10)", *benchmark_runner, profiler); + MicroPrintf(""); } - -} // namespace - -TF_LITE_MICRO_BENCHMARKS_BEGIN - -TF_LITE_MICRO_BENCHMARK(InitializeBenchmarkRunner()); -TF_LITE_MICRO_BENCHMARK(benchmark_runner->RunSingleIteration()); -TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithPerson()); -TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithoutPerson()); - -TF_LITE_MICRO_BENCHMARKS_END diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index cb59c4bb357..f01ed641478 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/core/api/tensor_utils.h" #include "tensorflow/lite/micro/memory_helpers.h" #include "tensorflow/lite/micro/micro_allocator.h" +#include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_op_resolver.h" #include "tensorflow/lite/micro/micro_profiler.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -108,7 +109,7 @@ MicroInterpreter::MicroInterpreter(const Model* model, uint8_t* tensor_arena, size_t tensor_arena_size, ErrorReporter* error_reporter, - tflite::Profiler* profiler) + MicroProfiler* profiler) : model_(model), op_resolver_(op_resolver), error_reporter_(error_reporter), @@ -127,7 +128,7 @@ MicroInterpreter::MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, MicroAllocator* allocator, ErrorReporter* error_reporter, - tflite::Profiler* profiler) + MicroProfiler* profiler) : model_(model), op_resolver_(op_resolver), error_reporter_(error_reporter), @@ -156,7 +157,7 @@ MicroInterpreter::~MicroInterpreter() { } } -void MicroInterpreter::Init(tflite::Profiler* profiler) { +void MicroInterpreter::Init(MicroProfiler* profiler) { const flatbuffers::Vector>* subgraphs = model_->subgraphs(); if (subgraphs->size() != 1) { @@ -320,35 +321,35 @@ TfLiteStatus MicroInterpreter::Invoke() { auto* node = &(node_and_registrations_[i].node); auto* registration = node_and_registrations_[i].registration; - if (registration->invoke) { - TfLiteStatus invoke_status; -#ifndef NDEBUG // Omit profiler overhead from release builds. - // The case where profiler == nullptr is handled by - // ScopedOperatorProfile. - tflite::Profiler* profiler = - reinterpret_cast(context_.profiler); - ScopedOperatorProfile scoped_profiler( - profiler, OpNameFromRegistration(registration), i); +// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with +// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is +// only defined for builds with the error strings. +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + ScopedMicroProfiler scoped_profiler( + OpNameFromRegistration(registration), + reinterpret_cast(context_.profiler)); #endif - invoke_status = registration->invoke(&context_, node); - // All TfLiteTensor structs used in the kernel are allocated from temp - // memory in the allocator. This creates a chain of allocations in the - // temp section. The call below resets the chain of allocations to - // prepare for the next call. - allocator_.ResetTempAllocations(); + TFLITE_DCHECK(registration->invoke); + TfLiteStatus invoke_status = registration->invoke(&context_, node); - if (invoke_status == kTfLiteError) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "Node %s (number %d) failed to invoke with status %d", - OpNameFromRegistration(registration), i, invoke_status); - return kTfLiteError; - } else if (invoke_status != kTfLiteOk) { - return invoke_status; - } + // All TfLiteTensor structs used in the kernel are allocated from temp + // memory in the allocator. This creates a chain of allocations in the + // temp section. The call below resets the chain of allocations to + // prepare for the next call. + allocator_.ResetTempAllocations(); + + if (invoke_status == kTfLiteError) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Node %s (number %d) failed to invoke with status %d", + OpNameFromRegistration(registration), i, invoke_status); + return kTfLiteError; + } else if (invoke_status != kTfLiteOk) { + return invoke_status; } } + return kTfLiteOk; } diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 88a2741dd3d..39fb09b2a26 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -21,10 +21,10 @@ limitations under the License. #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/core/api/profiler.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/micro/micro_allocator.h" #include "tensorflow/lite/micro/micro_op_resolver.h" +#include "tensorflow/lite/micro/micro_profiler.h" #include "tensorflow/lite/portable_type_to_tflitetype.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -86,7 +86,7 @@ class MicroInterpreter { MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, uint8_t* tensor_arena, size_t tensor_arena_size, ErrorReporter* error_reporter, - tflite::Profiler* profiler = nullptr); + MicroProfiler* profiler = nullptr); // Create an interpreter instance using an existing MicroAllocator instance. // This constructor should be used when creating an allocator that needs to @@ -95,7 +95,7 @@ class MicroInterpreter { // as long as that of the interpreter object. MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, MicroAllocator* allocator, ErrorReporter* error_reporter, - tflite::Profiler* profiler = nullptr); + MicroProfiler* profiler = nullptr); ~MicroInterpreter(); @@ -179,7 +179,7 @@ class MicroInterpreter { private: // TODO(b/158263161): Consider switching to Create() function to enable better // error reporting during initialization. - void Init(tflite::Profiler* profiler); + void Init(MicroProfiler* profiler); NodeAndRegistration* node_and_registrations_ = nullptr; diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index fbc7a99912d..6336ec2271f 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/lite/core/api/flatbuffer_conversions.h" #include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/micro_error_reporter.h" +#include "tensorflow/lite/micro/micro_profiler.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/recording_micro_allocator.h" #include "tensorflow/lite/micro/test_helpers.h" @@ -28,28 +29,15 @@ limitations under the License. namespace tflite { namespace { -class MockProfiler : public tflite::Profiler { +class MockProfiler : public MicroProfiler { public: MockProfiler() : event_starts_(0), event_ends_(0) {} - ~MockProfiler() override = default; - // AddEvent is unused for Tf Micro. - void AddEvent(const char* tag, EventType event_type, uint64_t start, - uint64_t end, int64_t event_metadata1, - int64_t event_metadata2) override{}; - - // BeginEvent followed by code followed by EndEvent will profile the code - // enclosed. Multiple concurrent events are unsupported, so the return value - // is always 0. Event_metadata1 and event_metadata2 are unused. The tag - // pointer must be valid until EndEvent is called. - uint32_t BeginEvent(const char* tag, EventType event_type, - int64_t event_metadata1, - int64_t event_metadata2) override { + uint32_t BeginEvent(const char* tag) override { event_starts_++; return 0; } - // Event_handle is ignored since TF Micro does not support concurrent events. void EndEvent(uint32_t event_handle) override { event_ends_++; } int event_starts() { return event_starts_; } @@ -58,7 +46,6 @@ class MockProfiler : public tflite::Profiler { private: int event_starts_; int event_ends_; - TF_LITE_REMOVE_VIRTUAL_DELETE }; } // namespace diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc index 83fb9f64713..792d8ae0b05 100644 --- a/tensorflow/lite/micro/micro_profiler.cc +++ b/tensorflow/lite/micro/micro_profiler.cc @@ -12,31 +12,47 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #include "tensorflow/lite/micro/micro_profiler.h" +#include + #include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_time.h" namespace tflite { -MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter) - : reporter_(reporter) {} +uint32_t MicroProfiler::BeginEvent(const char* tag) { + if (num_events_ == kMaxEvents) { + num_events_ = 0; + } -uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type, - int64_t event_metadata1, - int64_t event_metadata2) { - start_time_ = GetCurrentTimeTicks(); - TFLITE_DCHECK(tag != nullptr); - event_tag_ = tag; - return 0; + tags_[num_events_] = tag; + start_ticks_[num_events_] = GetCurrentTimeTicks(); + end_ticks_[num_events_] = start_ticks_[num_events_] - 1; + return num_events_++; } void MicroProfiler::EndEvent(uint32_t event_handle) { -#ifndef TF_LITE_STRIP_ERROR_STRINGS - int32_t end_time = GetCurrentTimeTicks(); - TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_, - end_time - start_time_); + TFLITE_DCHECK(event_handle < kMaxEvents); + end_ticks_[event_handle] = GetCurrentTimeTicks(); +} + +int32_t MicroProfiler::GetTotalTicks() const { + int32_t ticks = 0; + for (int i = 0; i < num_events_; ++i) { + ticks += end_ticks_[i] - start_ticks_[i]; + } + return ticks; +} + +void MicroProfiler::Log() const { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + for (int i = 0; i < num_events_; ++i) { + int32_t ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%s took %d ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); + } #endif } + } // namespace tflite diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index a3144b3a173..62ea40b1772 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -16,9 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ #define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/core/api/profiler.h" -#include "tensorflow/lite/micro/compatibility.h" +#include namespace tflite { @@ -26,46 +24,91 @@ namespace tflite { // performance. Bottleck operators can be identified along with slow code // sections. This can be used in conjunction with running the relevant micro // benchmark to evaluate end-to-end performance. -// -// Usage example: -// MicroProfiler profiler(error_reporter); -// { -// ScopedProfile scoped_profile(profiler, tag); -// work_to_profile(); -// } -// -// This will call the following methods in order: -// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0) -// work_to_profile(); -// profiler->EndEvent(event_handle) -class MicroProfiler : public tflite::Profiler { +class MicroProfiler { public: - explicit MicroProfiler(tflite::ErrorReporter* reporter); - ~MicroProfiler() override = default; + MicroProfiler() = default; + ~MicroProfiler() = default; - // AddEvent is unused for Tf Micro. - void AddEvent(const char* tag, EventType event_type, uint64_t start, - uint64_t end, int64_t event_metadata1, - int64_t event_metadata2) override{}; + // Marks the start of a new event and returns an event handle that can be used + // to mark the end of the event via EndEvent. The lifetime of the tag + // parameter must exceed that of the MicroProfiler. + virtual uint32_t BeginEvent(const char* tag); - // BeginEvent followed by code followed by EndEvent will profile the code - // enclosed. Multiple concurrent events are unsupported, so the return value - // is always 0. Event_metadata1 and event_metadata2 are unused. The tag - // pointer must be valid until EndEvent is called. - uint32_t BeginEvent(const char* tag, EventType event_type, - int64_t event_metadata1, - int64_t event_metadata2) override; + // Marks the end of an event associated with event_handle. It is the + // responsibility of the caller to ensure than EndEvent is called once and + // only once per event_handle. + // + // If EndEvent is called more than once for the same event_handle, the last + // call will be used as the end of event marker.If EndEvent is called 0 times + // for a particular event_handle, the duration of that event will be 0 ticks. + virtual void EndEvent(uint32_t event_handle); - // Event_handle is ignored since TF Micro does not support concurrent events. - void EndEvent(uint32_t event_handle) override; + // Clears all the events that have been currently profiled. + void ClearEvents() { num_events_ = 0; } + + // Returns the sum of the ticks taken across all the events. This number + // is only meaningful if all of the events are disjoint (the end time of + // event[i] <= start time of event[i+1]). + int32_t GetTotalTicks() const; + + // Prints the profiling information of each of the events. + void Log() const; private: - tflite::ErrorReporter* reporter_; - int32_t start_time_; - const char* event_tag_; - TF_LITE_REMOVE_VIRTUAL_DELETE + // Maximum number of events that this class can keep track of. If we call + // AddEvent more than kMaxEvents number of times, then the oldest event's + // profiling information will be overwritten. + static constexpr int kMaxEvents = 50; + + const char* tags_[kMaxEvents]; + int32_t start_ticks_[kMaxEvents]; + int32_t end_ticks_[kMaxEvents]; + int num_events_ = 0; }; +#if defined(NDEBUG) +// For release builds, the ScopedMicroProfiler is a noop. +// +// This is done because the ScipedProfiler is used as part of the +// MicroInterpreter and we want to ensure zero overhead for the release builds. +class ScopedMicroProfiler { + public: + explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler) {} +}; + +#else + +// This class can be used to add events to a MicroProfiler object that span the +// lifetime of the ScopedMicroProfiler object. +// Usage example: +// +// MicroProfiler profiler(); +// ... +// { +// ScopedMicroProfiler scoped_profiler("custom_tag", profiler); +// work_to_profile(); +// } +class ScopedMicroProfiler { + public: + explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler) + : profiler_(profiler) { + if (profiler_ != nullptr) { + event_handle_ = profiler_->BeginEvent(tag); + } + } + + ~ScopedMicroProfiler() { + if (profiler_ != nullptr) { + profiler_->EndEvent(event_handle_); + } + } + + private: + uint32_t event_handle_ = 0; + MicroProfiler* profiler_ = nullptr; +}; +#endif // !defined(NDEBUG) + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ diff --git a/tensorflow/lite/micro/micro_time.h b/tensorflow/lite/micro/micro_time.h index 465490a8ed9..fac9069b1a7 100644 --- a/tensorflow/lite/micro/micro_time.h +++ b/tensorflow/lite/micro/micro_time.h @@ -15,7 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ #define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ -#include +#include namespace tflite { @@ -26,6 +26,11 @@ int32_t ticks_per_second(); // Return time in ticks. The meaning of a tick varies per platform. int32_t GetCurrentTimeTicks(); +inline int32_t TicksToMs(int32_t ticks) { + return static_cast(1000.0f * static_cast(ticks) / + static_cast(ticks_per_second())); +} + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index a44895281df..79e1b8a8181 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -393,7 +393,6 @@ tensorflow/lite/c/common.h \ tensorflow/lite/core/api/error_reporter.h \ tensorflow/lite/core/api/flatbuffer_conversions.h \ tensorflow/lite/core/api/op_resolver.h \ -tensorflow/lite/core/api/profiler.h \ tensorflow/lite/core/api/tensor_utils.h \ tensorflow/lite/kernels/internal/common.h \ tensorflow/lite/kernels/internal/compatibility.h \