Add more detailed profiling.

This change:
 * Improved the per-op profiling to the interpreter.
 * Maintains state within the MicroProfiler object such that all the logging can happen external to the interpreter.
 * Refactors the benchmarks to make use of this new functionality.
This commit is contained in:
Advait Jain 2021-01-29 13:36:23 -08:00
parent 06e2d90cc1
commit b885fb44dd
11 changed files with 244 additions and 205 deletions

View File

@ -179,9 +179,8 @@ cc_library(
],
copts = micro_copts(),
deps = [
":micro_compatibility",
":micro_error_reporter",
":micro_time",
"//tensorflow/lite/core/api",
"//tensorflow/lite/kernels/internal:compatibility",
],
)

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
/*
* Keyword Spotting Benchmark for performance optimizations. The model used in
@ -30,12 +31,10 @@ limitations under the License.
* weights and parameters are not representative of the original model.
*/
namespace {
namespace tflite {
using KeywordBenchmarkRunner = MicroBenchmarkRunner<int16_t>;
using KeywordOpResolver = tflite::MicroMutableOpResolver<6>;
constexpr int kRandomSeed = 42;
using KeywordOpResolver = MicroMutableOpResolver<6>;
// Create an area of memory to use for input, output, and intermediate arrays.
// Align arena to 16 bytes to avoid alignment warnings on certain platforms.
@ -44,12 +43,11 @@ alignas(16) uint8_t tensor_arena[kTensorArenaSize];
uint8_t benchmark_runner_buffer[sizeof(KeywordBenchmarkRunner)];
uint8_t op_resolver_buffer[sizeof(KeywordOpResolver)];
KeywordBenchmarkRunner* benchmark_runner = nullptr;
// Initialize benchmark runner instance explicitly to avoid global init order
// issues on Sparkfun. Use new since static variables within a method
// are automatically surrounded by locking, which breaks bluepill and stm32f4.
void CreateBenchmarkRunner() {
KeywordBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
// We allocate the KeywordOpResolver from a global buffer because the object's
// lifetime must exceed that of the KeywordBenchmarkRunner object.
KeywordOpResolver* op_resolver = new (op_resolver_buffer) KeywordOpResolver();
@ -58,32 +56,42 @@ void CreateBenchmarkRunner() {
op_resolver->AddSoftmax();
op_resolver->AddSvdf();
benchmark_runner = new (benchmark_runner_buffer)
return new (benchmark_runner_buffer)
KeywordBenchmarkRunner(g_keyword_scrambled_model_data, op_resolver,
tensor_arena, kTensorArenaSize);
tensor_arena, kTensorArenaSize, profiler);
}
// Initializes keyword runner and sets random inputs.
void InitializeKeywordRunner() {
CreateBenchmarkRunner();
benchmark_runner->SetRandomInput(kRandomSeed);
}
// This method assumes InitializeKeywordRunner has already been run.
void KeywordRunNIerations(int iterations) {
for (int i = 0; i < iterations; i++) {
benchmark_runner->RunSingleIteration();
void KeywordRunNIerations(int iterations, const char* tag,
KeywordBenchmarkRunner& benchmark_runner,
MicroProfiler& profiler) {
int32_t ticks = 0;
for (int i = 0; i < iterations; ++i) {
benchmark_runner.SetRandomInput(i);
profiler.ClearEvents();
benchmark_runner.RunSingleIteration();
ticks += profiler.GetTotalTicks();
}
MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks));
}
} // namespace
} // namespace tflite
TF_LITE_MICRO_BENCHMARKS_BEGIN
int main(int argc, char** argv) {
tflite::MicroProfiler profiler;
TF_LITE_MICRO_BENCHMARK(InitializeKeywordRunner());
uint32_t event_handle = profiler.BeginEvent("InitializeKeywordRunner");
tflite::KeywordBenchmarkRunner* benchmark_runner =
CreateBenchmarkRunner(&profiler);
profiler.EndEvent(event_handle);
profiler.Log();
MicroPrintf("");
TF_LITE_MICRO_BENCHMARK(KeywordRunNIerations(1));
tflite::KeywordRunNIerations(1, "KeywordRunNIerations(1)", *benchmark_runner,
profiler);
profiler.Log();
MicroPrintf("");
TF_LITE_MICRO_BENCHMARK(KeywordRunNIerations(10));
TF_LITE_MICRO_BENCHMARKS_END
tflite::KeywordRunNIerations(10, "KeywordRunNIerations(10)",
*benchmark_runner, profiler);
MicroPrintf("");
}

View File

@ -21,57 +21,22 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/micro/micro_time.h"
namespace micro_benchmark {
extern tflite::ErrorReporter* reporter;
} // namespace micro_benchmark
#define TF_LITE_MICRO_BENCHMARKS_BEGIN \
namespace micro_benchmark { \
tflite::ErrorReporter* reporter; \
} \
\
int main(int argc, char** argv) { \
tflite::MicroErrorReporter error_reporter; \
micro_benchmark::reporter = &error_reporter; \
int32_t start_ticks; \
int32_t duration_ticks; \
int32_t duration_ms;
#define TF_LITE_MICRO_BENCHMARKS_END \
return 0; \
}
#define TF_LITE_MICRO_BENCHMARK(func) \
if (tflite::ticks_per_second() == 0) { \
TF_LITE_REPORT_ERROR(micro_benchmark::reporter, \
"no timer implementation found"); \
return 0; \
} \
start_ticks = tflite::GetCurrentTimeTicks(); \
func; \
duration_ticks = tflite::GetCurrentTimeTicks() - start_ticks; \
if (duration_ticks > INT_MAX / 1000) { \
duration_ms = duration_ticks / (tflite::ticks_per_second() / 1000); \
} else { \
duration_ms = (duration_ticks * 1000) / tflite::ticks_per_second(); \
} \
micro_benchmark::reporter->Report("%s took %d ticks (%d ms)", #func, \
duration_ticks, duration_ms);
namespace tflite {
template <typename inputT>
class MicroBenchmarkRunner {
public:
// The lifetimes of model, op_resolver and tensor_arena must exceed that of
// the created MicroBenchmarkRunner object.
// The lifetimes of model, op_resolver, tensor_arena, profiler must exceed
// that of the created MicroBenchmarkRunner object.
MicroBenchmarkRunner(const uint8_t* model,
const tflite::MicroOpResolver* op_resolver,
uint8_t* tensor_arena, int tensor_arena_size)
: model_(tflite::GetModel(model)),
reporter_(&micro_reporter_),
interpreter_(model_, *op_resolver, tensor_arena, tensor_arena_size,
reporter_) {
uint8_t* tensor_arena, int tensor_arena_size,
MicroProfiler* profiler)
: interpreter_(GetModel(model), *op_resolver, tensor_arena,
tensor_arena_size, GetMicroErrorReporter(), profiler) {
interpreter_.AllocateTensors();
}
@ -79,7 +44,7 @@ class MicroBenchmarkRunner {
// Run the model on this input and make sure it succeeds.
TfLiteStatus invoke_status = interpreter_.Invoke();
if (invoke_status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(reporter_, "Invoke failed.");
MicroPrintf("Invoke failed.");
}
}
@ -109,10 +74,9 @@ class MicroBenchmarkRunner {
}
private:
const tflite::Model* model_;
tflite::MicroErrorReporter micro_reporter_;
tflite::ErrorReporter* reporter_;
tflite::MicroInterpreter interpreter_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_MICRO_BENCHMARK_H_

View File

@ -31,7 +31,7 @@ limitations under the License.
* exmaples/person_detection.
*/
namespace {
namespace tflite {
using PersonDetectionOpResolver = tflite::AllOpsResolver;
using PersonDetectionBenchmarkRunner = MicroBenchmarkRunner<int8_t>;
@ -43,48 +43,65 @@ alignas(16) uint8_t tensor_arena[kTensorArenaSize];
uint8_t op_resolver_buffer[sizeof(PersonDetectionOpResolver)];
uint8_t benchmark_runner_buffer[sizeof(PersonDetectionBenchmarkRunner)];
PersonDetectionBenchmarkRunner* benchmark_runner = nullptr;
// Initialize benchmark runner instance explicitly to avoid global init order
// issues on Sparkfun. Use new since static variables within a method
// are automatically surrounded by locking, which breaks bluepill and stm32f4.
void CreateBenchmarkRunner() {
PersonDetectionBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
// We allocate PersonDetectionOpResolver from a global buffer
// because the object's lifetime must exceed that of the
// PersonDetectionBenchmarkRunner object.
benchmark_runner = new (benchmark_runner_buffer)
PersonDetectionBenchmarkRunner(g_person_detect_model_data,
new (op_resolver_buffer)
PersonDetectionOpResolver(),
tensor_arena, kTensorArenaSize);
return new (benchmark_runner_buffer) PersonDetectionBenchmarkRunner(
g_person_detect_model_data,
new (op_resolver_buffer) PersonDetectionOpResolver(), tensor_arena,
kTensorArenaSize, profiler);
}
void InitializeBenchmarkRunner() {
CreateBenchmarkRunner();
benchmark_runner->SetInput(reinterpret_cast<const int8_t*>(g_person_data));
}
void PersonDetectionTenIerationsWithPerson() {
benchmark_runner->SetInput(reinterpret_cast<const int8_t*>(g_person_data));
for (int i = 0; i < 10; i++) {
benchmark_runner->RunSingleIteration();
void PersonDetectionNIerations(const int8_t* input, int iterations,
const char* tag,
PersonDetectionBenchmarkRunner& benchmark_runner,
MicroProfiler& profiler) {
benchmark_runner.SetInput(input);
int32_t ticks = 0;
for (int i = 0; i < iterations; ++i) {
profiler.ClearEvents();
benchmark_runner.RunSingleIteration();
ticks += profiler.GetTotalTicks();
}
MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks));
}
void PersonDetectionTenIerationsWithoutPerson() {
benchmark_runner->SetInput(reinterpret_cast<const int8_t*>(g_no_person_data));
for (int i = 0; i < 10; i++) {
benchmark_runner->RunSingleIteration();
}
} // namespace tflite
int main(int argc, char** argv) {
tflite::MicroProfiler profiler;
uint32_t event_handle = profiler.BeginEvent("InitializeBenchmarkRunner");
tflite::PersonDetectionBenchmarkRunner* benchmark_runner =
CreateBenchmarkRunner(&profiler);
profiler.EndEvent(event_handle);
profiler.Log();
MicroPrintf("");
tflite::PersonDetectionNIerations(
reinterpret_cast<const int8_t*>(g_person_data), 1,
"WithPersonDataIterations(1)", *benchmark_runner, profiler);
profiler.Log();
MicroPrintf("");
tflite::PersonDetectionNIerations(
reinterpret_cast<const int8_t*>(g_no_person_data), 1,
"NoPersonDataIterations(1)", *benchmark_runner, profiler);
profiler.Log();
MicroPrintf("");
tflite::PersonDetectionNIerations(
reinterpret_cast<const int8_t*>(g_person_data), 10,
"WithPersonDataIterations(10)", *benchmark_runner, profiler);
MicroPrintf("");
tflite::PersonDetectionNIerations(
reinterpret_cast<const int8_t*>(g_no_person_data), 10,
"NoPersonDataIterations(10)", *benchmark_runner, profiler);
MicroPrintf("");
}
} // namespace
TF_LITE_MICRO_BENCHMARKS_BEGIN
TF_LITE_MICRO_BENCHMARK(InitializeBenchmarkRunner());
TF_LITE_MICRO_BENCHMARK(benchmark_runner->RunSingleIteration());
TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithPerson());
TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithoutPerson());
TF_LITE_MICRO_BENCHMARKS_END

View File

@ -24,6 +24,7 @@ limitations under the License.
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -108,7 +109,7 @@ MicroInterpreter::MicroInterpreter(const Model* model,
uint8_t* tensor_arena,
size_t tensor_arena_size,
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
MicroProfiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
@ -127,7 +128,7 @@ MicroInterpreter::MicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
MicroAllocator* allocator,
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
MicroProfiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
@ -156,7 +157,7 @@ MicroInterpreter::~MicroInterpreter() {
}
}
void MicroInterpreter::Init(tflite::Profiler* profiler) {
void MicroInterpreter::Init(MicroProfiler* profiler) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model_->subgraphs();
if (subgraphs->size() != 1) {
@ -320,35 +321,35 @@ TfLiteStatus MicroInterpreter::Invoke() {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->invoke) {
TfLiteStatus invoke_status;
#ifndef NDEBUG // Omit profiler overhead from release builds.
// The case where profiler == nullptr is handled by
// ScopedOperatorProfile.
tflite::Profiler* profiler =
reinterpret_cast<tflite::Profiler*>(context_.profiler);
ScopedOperatorProfile scoped_profiler(
profiler, OpNameFromRegistration(registration), i);
// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
// only defined for builds with the error strings.
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
ScopedMicroProfiler scoped_profiler(
OpNameFromRegistration(registration),
reinterpret_cast<MicroProfiler*>(context_.profiler));
#endif
invoke_status = registration->invoke(&context_, node);
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_.ResetTempAllocations();
TFLITE_DCHECK(registration->invoke);
TfLiteStatus invoke_status = registration->invoke(&context_, node);
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %d) failed to invoke with status %d",
OpNameFromRegistration(registration), i, invoke_status);
return kTfLiteError;
} else if (invoke_status != kTfLiteOk) {
return invoke_status;
}
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_.ResetTempAllocations();
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %d) failed to invoke with status %d",
OpNameFromRegistration(registration), i, invoke_status);
return kTfLiteError;
} else if (invoke_status != kTfLiteOk) {
return invoke_status;
}
}
return kTfLiteOk;
}

View File

@ -21,10 +21,10 @@ limitations under the License.
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/portable_type_to_tflitetype.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -86,7 +86,7 @@ class MicroInterpreter {
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
MicroProfiler* profiler = nullptr);
// Create an interpreter instance using an existing MicroAllocator instance.
// This constructor should be used when creating an allocator that needs to
@ -95,7 +95,7 @@ class MicroInterpreter {
// as long as that of the interpreter object.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
MicroAllocator* allocator, ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
MicroProfiler* profiler = nullptr);
~MicroInterpreter();
@ -179,7 +179,7 @@ class MicroInterpreter {
private:
// TODO(b/158263161): Consider switching to Create() function to enable better
// error reporting during initialization.
void Init(tflite::Profiler* profiler);
void Init(MicroProfiler* profiler);
NodeAndRegistration* node_and_registrations_ = nullptr;

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/micro/recording_micro_allocator.h"
#include "tensorflow/lite/micro/test_helpers.h"
@ -28,28 +29,15 @@ limitations under the License.
namespace tflite {
namespace {
class MockProfiler : public tflite::Profiler {
class MockProfiler : public MicroProfiler {
public:
MockProfiler() : event_starts_(0), event_ends_(0) {}
~MockProfiler() override = default;
// AddEvent is unused for Tf Micro.
void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata1,
int64_t event_metadata2) override{};
// BeginEvent followed by code followed by EndEvent will profile the code
// enclosed. Multiple concurrent events are unsupported, so the return value
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
// pointer must be valid until EndEvent is called.
uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) override {
uint32_t BeginEvent(const char* tag) override {
event_starts_++;
return 0;
}
// Event_handle is ignored since TF Micro does not support concurrent events.
void EndEvent(uint32_t event_handle) override { event_ends_++; }
int event_starts() { return event_starts_; }
@ -58,7 +46,6 @@ class MockProfiler : public tflite::Profiler {
private:
int event_starts_;
int event_ends_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace

View File

@ -12,31 +12,47 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_profiler.h"
#include <cstdint>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_time.h"
namespace tflite {
MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter)
: reporter_(reporter) {}
uint32_t MicroProfiler::BeginEvent(const char* tag) {
if (num_events_ == kMaxEvents) {
num_events_ = 0;
}
uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) {
start_time_ = GetCurrentTimeTicks();
TFLITE_DCHECK(tag != nullptr);
event_tag_ = tag;
return 0;
tags_[num_events_] = tag;
start_ticks_[num_events_] = GetCurrentTimeTicks();
end_ticks_[num_events_] = start_ticks_[num_events_] - 1;
return num_events_++;
}
void MicroProfiler::EndEvent(uint32_t event_handle) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
int32_t end_time = GetCurrentTimeTicks();
TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_,
end_time - start_time_);
TFLITE_DCHECK(event_handle < kMaxEvents);
end_ticks_[event_handle] = GetCurrentTimeTicks();
}
int32_t MicroProfiler::GetTotalTicks() const {
int32_t ticks = 0;
for (int i = 0; i < num_events_; ++i) {
ticks += end_ticks_[i] - start_ticks_[i];
}
return ticks;
}
void MicroProfiler::Log() const {
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
for (int i = 0; i < num_events_; ++i) {
int32_t ticks = end_ticks_[i] - start_ticks_[i];
MicroPrintf("%s took %d ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks));
}
#endif
}
} // namespace tflite

View File

@ -16,9 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/micro/compatibility.h"
#include <cstdint>
namespace tflite {
@ -26,46 +24,91 @@ namespace tflite {
// performance. Bottleck operators can be identified along with slow code
// sections. This can be used in conjunction with running the relevant micro
// benchmark to evaluate end-to-end performance.
//
// Usage example:
// MicroProfiler profiler(error_reporter);
// {
// ScopedProfile scoped_profile(profiler, tag);
// work_to_profile();
// }
//
// This will call the following methods in order:
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
// work_to_profile();
// profiler->EndEvent(event_handle)
class MicroProfiler : public tflite::Profiler {
class MicroProfiler {
public:
explicit MicroProfiler(tflite::ErrorReporter* reporter);
~MicroProfiler() override = default;
MicroProfiler() = default;
~MicroProfiler() = default;
// AddEvent is unused for Tf Micro.
void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata1,
int64_t event_metadata2) override{};
// Marks the start of a new event and returns an event handle that can be used
// to mark the end of the event via EndEvent. The lifetime of the tag
// parameter must exceed that of the MicroProfiler.
virtual uint32_t BeginEvent(const char* tag);
// BeginEvent followed by code followed by EndEvent will profile the code
// enclosed. Multiple concurrent events are unsupported, so the return value
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
// pointer must be valid until EndEvent is called.
uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) override;
// Marks the end of an event associated with event_handle. It is the
// responsibility of the caller to ensure than EndEvent is called once and
// only once per event_handle.
//
// If EndEvent is called more than once for the same event_handle, the last
// call will be used as the end of event marker.If EndEvent is called 0 times
// for a particular event_handle, the duration of that event will be 0 ticks.
virtual void EndEvent(uint32_t event_handle);
// Event_handle is ignored since TF Micro does not support concurrent events.
void EndEvent(uint32_t event_handle) override;
// Clears all the events that have been currently profiled.
void ClearEvents() { num_events_ = 0; }
// Returns the sum of the ticks taken across all the events. This number
// is only meaningful if all of the events are disjoint (the end time of
// event[i] <= start time of event[i+1]).
int32_t GetTotalTicks() const;
// Prints the profiling information of each of the events.
void Log() const;
private:
tflite::ErrorReporter* reporter_;
int32_t start_time_;
const char* event_tag_;
TF_LITE_REMOVE_VIRTUAL_DELETE
// Maximum number of events that this class can keep track of. If we call
// AddEvent more than kMaxEvents number of times, then the oldest event's
// profiling information will be overwritten.
static constexpr int kMaxEvents = 50;
const char* tags_[kMaxEvents];
int32_t start_ticks_[kMaxEvents];
int32_t end_ticks_[kMaxEvents];
int num_events_ = 0;
};
#if defined(NDEBUG)
// For release builds, the ScopedMicroProfiler is a noop.
//
// This is done because the ScipedProfiler is used as part of the
// MicroInterpreter and we want to ensure zero overhead for the release builds.
class ScopedMicroProfiler {
public:
explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler) {}
};
#else
// This class can be used to add events to a MicroProfiler object that span the
// lifetime of the ScopedMicroProfiler object.
// Usage example:
//
// MicroProfiler profiler();
// ...
// {
// ScopedMicroProfiler scoped_profiler("custom_tag", profiler);
// work_to_profile();
// }
class ScopedMicroProfiler {
public:
explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler)
: profiler_(profiler) {
if (profiler_ != nullptr) {
event_handle_ = profiler_->BeginEvent(tag);
}
}
~ScopedMicroProfiler() {
if (profiler_ != nullptr) {
profiler_->EndEvent(event_handle_);
}
}
private:
uint32_t event_handle_ = 0;
MicroProfiler* profiler_ = nullptr;
};
#endif // !defined(NDEBUG)
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_

View File

@ -15,7 +15,7 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#include <stdint.h>
#include <cstdint>
namespace tflite {
@ -26,6 +26,11 @@ int32_t ticks_per_second();
// Return time in ticks. The meaning of a tick varies per platform.
int32_t GetCurrentTimeTicks();
inline int32_t TicksToMs(int32_t ticks) {
return static_cast<int32_t>(1000.0f * static_cast<float>(ticks) /
static_cast<float>(ticks_per_second()));
}
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_

View File

@ -393,7 +393,6 @@ tensorflow/lite/c/common.h \
tensorflow/lite/core/api/error_reporter.h \
tensorflow/lite/core/api/flatbuffer_conversions.h \
tensorflow/lite/core/api/op_resolver.h \
tensorflow/lite/core/api/profiler.h \
tensorflow/lite/core/api/tensor_utils.h \
tensorflow/lite/kernels/internal/common.h \
tensorflow/lite/kernels/internal/compatibility.h \