Add more detailed profiling.
This change: * Improved the per-op profiling to the interpreter. * Maintains state within the MicroProfiler object such that all the logging can happen external to the interpreter. * Refactors the benchmarks to make use of this new functionality.
This commit is contained in:
parent
06e2d90cc1
commit
b885fb44dd
@ -179,9 +179,8 @@ cc_library(
|
||||
],
|
||||
copts = micro_copts(),
|
||||
deps = [
|
||||
":micro_compatibility",
|
||||
":micro_error_reporter",
|
||||
":micro_time",
|
||||
"//tensorflow/lite/core/api",
|
||||
"//tensorflow/lite/kernels/internal:compatibility",
|
||||
],
|
||||
)
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
|
||||
/*
|
||||
* Keyword Spotting Benchmark for performance optimizations. The model used in
|
||||
@ -30,12 +31,10 @@ limitations under the License.
|
||||
* weights and parameters are not representative of the original model.
|
||||
*/
|
||||
|
||||
namespace {
|
||||
namespace tflite {
|
||||
|
||||
using KeywordBenchmarkRunner = MicroBenchmarkRunner<int16_t>;
|
||||
using KeywordOpResolver = tflite::MicroMutableOpResolver<6>;
|
||||
|
||||
constexpr int kRandomSeed = 42;
|
||||
using KeywordOpResolver = MicroMutableOpResolver<6>;
|
||||
|
||||
// Create an area of memory to use for input, output, and intermediate arrays.
|
||||
// Align arena to 16 bytes to avoid alignment warnings on certain platforms.
|
||||
@ -44,12 +43,11 @@ alignas(16) uint8_t tensor_arena[kTensorArenaSize];
|
||||
|
||||
uint8_t benchmark_runner_buffer[sizeof(KeywordBenchmarkRunner)];
|
||||
uint8_t op_resolver_buffer[sizeof(KeywordOpResolver)];
|
||||
KeywordBenchmarkRunner* benchmark_runner = nullptr;
|
||||
|
||||
// Initialize benchmark runner instance explicitly to avoid global init order
|
||||
// issues on Sparkfun. Use new since static variables within a method
|
||||
// are automatically surrounded by locking, which breaks bluepill and stm32f4.
|
||||
void CreateBenchmarkRunner() {
|
||||
KeywordBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
|
||||
// We allocate the KeywordOpResolver from a global buffer because the object's
|
||||
// lifetime must exceed that of the KeywordBenchmarkRunner object.
|
||||
KeywordOpResolver* op_resolver = new (op_resolver_buffer) KeywordOpResolver();
|
||||
@ -58,32 +56,42 @@ void CreateBenchmarkRunner() {
|
||||
op_resolver->AddSoftmax();
|
||||
op_resolver->AddSvdf();
|
||||
|
||||
benchmark_runner = new (benchmark_runner_buffer)
|
||||
return new (benchmark_runner_buffer)
|
||||
KeywordBenchmarkRunner(g_keyword_scrambled_model_data, op_resolver,
|
||||
tensor_arena, kTensorArenaSize);
|
||||
tensor_arena, kTensorArenaSize, profiler);
|
||||
}
|
||||
|
||||
// Initializes keyword runner and sets random inputs.
|
||||
void InitializeKeywordRunner() {
|
||||
CreateBenchmarkRunner();
|
||||
benchmark_runner->SetRandomInput(kRandomSeed);
|
||||
}
|
||||
|
||||
// This method assumes InitializeKeywordRunner has already been run.
|
||||
void KeywordRunNIerations(int iterations) {
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
benchmark_runner->RunSingleIteration();
|
||||
void KeywordRunNIerations(int iterations, const char* tag,
|
||||
KeywordBenchmarkRunner& benchmark_runner,
|
||||
MicroProfiler& profiler) {
|
||||
int32_t ticks = 0;
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
benchmark_runner.SetRandomInput(i);
|
||||
profiler.ClearEvents();
|
||||
benchmark_runner.RunSingleIteration();
|
||||
ticks += profiler.GetTotalTicks();
|
||||
}
|
||||
MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace tflite
|
||||
|
||||
TF_LITE_MICRO_BENCHMARKS_BEGIN
|
||||
int main(int argc, char** argv) {
|
||||
tflite::MicroProfiler profiler;
|
||||
|
||||
TF_LITE_MICRO_BENCHMARK(InitializeKeywordRunner());
|
||||
uint32_t event_handle = profiler.BeginEvent("InitializeKeywordRunner");
|
||||
tflite::KeywordBenchmarkRunner* benchmark_runner =
|
||||
CreateBenchmarkRunner(&profiler);
|
||||
profiler.EndEvent(event_handle);
|
||||
profiler.Log();
|
||||
MicroPrintf("");
|
||||
|
||||
TF_LITE_MICRO_BENCHMARK(KeywordRunNIerations(1));
|
||||
tflite::KeywordRunNIerations(1, "KeywordRunNIerations(1)", *benchmark_runner,
|
||||
profiler);
|
||||
profiler.Log();
|
||||
MicroPrintf("");
|
||||
|
||||
TF_LITE_MICRO_BENCHMARK(KeywordRunNIerations(10));
|
||||
|
||||
TF_LITE_MICRO_BENCHMARKS_END
|
||||
tflite::KeywordRunNIerations(10, "KeywordRunNIerations(10)",
|
||||
*benchmark_runner, profiler);
|
||||
MicroPrintf("");
|
||||
}
|
||||
|
@ -21,57 +21,22 @@ limitations under the License.
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
#include "tensorflow/lite/micro/micro_time.h"
|
||||
|
||||
namespace micro_benchmark {
|
||||
extern tflite::ErrorReporter* reporter;
|
||||
} // namespace micro_benchmark
|
||||
|
||||
#define TF_LITE_MICRO_BENCHMARKS_BEGIN \
|
||||
namespace micro_benchmark { \
|
||||
tflite::ErrorReporter* reporter; \
|
||||
} \
|
||||
\
|
||||
int main(int argc, char** argv) { \
|
||||
tflite::MicroErrorReporter error_reporter; \
|
||||
micro_benchmark::reporter = &error_reporter; \
|
||||
int32_t start_ticks; \
|
||||
int32_t duration_ticks; \
|
||||
int32_t duration_ms;
|
||||
|
||||
#define TF_LITE_MICRO_BENCHMARKS_END \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
#define TF_LITE_MICRO_BENCHMARK(func) \
|
||||
if (tflite::ticks_per_second() == 0) { \
|
||||
TF_LITE_REPORT_ERROR(micro_benchmark::reporter, \
|
||||
"no timer implementation found"); \
|
||||
return 0; \
|
||||
} \
|
||||
start_ticks = tflite::GetCurrentTimeTicks(); \
|
||||
func; \
|
||||
duration_ticks = tflite::GetCurrentTimeTicks() - start_ticks; \
|
||||
if (duration_ticks > INT_MAX / 1000) { \
|
||||
duration_ms = duration_ticks / (tflite::ticks_per_second() / 1000); \
|
||||
} else { \
|
||||
duration_ms = (duration_ticks * 1000) / tflite::ticks_per_second(); \
|
||||
} \
|
||||
micro_benchmark::reporter->Report("%s took %d ticks (%d ms)", #func, \
|
||||
duration_ticks, duration_ms);
|
||||
namespace tflite {
|
||||
|
||||
template <typename inputT>
|
||||
class MicroBenchmarkRunner {
|
||||
public:
|
||||
// The lifetimes of model, op_resolver and tensor_arena must exceed that of
|
||||
// the created MicroBenchmarkRunner object.
|
||||
// The lifetimes of model, op_resolver, tensor_arena, profiler must exceed
|
||||
// that of the created MicroBenchmarkRunner object.
|
||||
MicroBenchmarkRunner(const uint8_t* model,
|
||||
const tflite::MicroOpResolver* op_resolver,
|
||||
uint8_t* tensor_arena, int tensor_arena_size)
|
||||
: model_(tflite::GetModel(model)),
|
||||
reporter_(µ_reporter_),
|
||||
interpreter_(model_, *op_resolver, tensor_arena, tensor_arena_size,
|
||||
reporter_) {
|
||||
uint8_t* tensor_arena, int tensor_arena_size,
|
||||
MicroProfiler* profiler)
|
||||
: interpreter_(GetModel(model), *op_resolver, tensor_arena,
|
||||
tensor_arena_size, GetMicroErrorReporter(), profiler) {
|
||||
interpreter_.AllocateTensors();
|
||||
}
|
||||
|
||||
@ -79,7 +44,7 @@ class MicroBenchmarkRunner {
|
||||
// Run the model on this input and make sure it succeeds.
|
||||
TfLiteStatus invoke_status = interpreter_.Invoke();
|
||||
if (invoke_status != kTfLiteOk) {
|
||||
TF_LITE_REPORT_ERROR(reporter_, "Invoke failed.");
|
||||
MicroPrintf("Invoke failed.");
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,10 +74,9 @@ class MicroBenchmarkRunner {
|
||||
}
|
||||
|
||||
private:
|
||||
const tflite::Model* model_;
|
||||
tflite::MicroErrorReporter micro_reporter_;
|
||||
tflite::ErrorReporter* reporter_;
|
||||
tflite::MicroInterpreter interpreter_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_MICRO_BENCHMARK_H_
|
||||
|
@ -31,7 +31,7 @@ limitations under the License.
|
||||
* exmaples/person_detection.
|
||||
*/
|
||||
|
||||
namespace {
|
||||
namespace tflite {
|
||||
|
||||
using PersonDetectionOpResolver = tflite::AllOpsResolver;
|
||||
using PersonDetectionBenchmarkRunner = MicroBenchmarkRunner<int8_t>;
|
||||
@ -43,48 +43,65 @@ alignas(16) uint8_t tensor_arena[kTensorArenaSize];
|
||||
|
||||
uint8_t op_resolver_buffer[sizeof(PersonDetectionOpResolver)];
|
||||
uint8_t benchmark_runner_buffer[sizeof(PersonDetectionBenchmarkRunner)];
|
||||
PersonDetectionBenchmarkRunner* benchmark_runner = nullptr;
|
||||
|
||||
// Initialize benchmark runner instance explicitly to avoid global init order
|
||||
// issues on Sparkfun. Use new since static variables within a method
|
||||
// are automatically surrounded by locking, which breaks bluepill and stm32f4.
|
||||
void CreateBenchmarkRunner() {
|
||||
PersonDetectionBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
|
||||
// We allocate PersonDetectionOpResolver from a global buffer
|
||||
// because the object's lifetime must exceed that of the
|
||||
// PersonDetectionBenchmarkRunner object.
|
||||
benchmark_runner = new (benchmark_runner_buffer)
|
||||
PersonDetectionBenchmarkRunner(g_person_detect_model_data,
|
||||
new (op_resolver_buffer)
|
||||
PersonDetectionOpResolver(),
|
||||
tensor_arena, kTensorArenaSize);
|
||||
return new (benchmark_runner_buffer) PersonDetectionBenchmarkRunner(
|
||||
g_person_detect_model_data,
|
||||
new (op_resolver_buffer) PersonDetectionOpResolver(), tensor_arena,
|
||||
kTensorArenaSize, profiler);
|
||||
}
|
||||
|
||||
void InitializeBenchmarkRunner() {
|
||||
CreateBenchmarkRunner();
|
||||
benchmark_runner->SetInput(reinterpret_cast<const int8_t*>(g_person_data));
|
||||
}
|
||||
|
||||
void PersonDetectionTenIerationsWithPerson() {
|
||||
benchmark_runner->SetInput(reinterpret_cast<const int8_t*>(g_person_data));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
benchmark_runner->RunSingleIteration();
|
||||
void PersonDetectionNIerations(const int8_t* input, int iterations,
|
||||
const char* tag,
|
||||
PersonDetectionBenchmarkRunner& benchmark_runner,
|
||||
MicroProfiler& profiler) {
|
||||
benchmark_runner.SetInput(input);
|
||||
int32_t ticks = 0;
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
profiler.ClearEvents();
|
||||
benchmark_runner.RunSingleIteration();
|
||||
ticks += profiler.GetTotalTicks();
|
||||
}
|
||||
MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks));
|
||||
}
|
||||
|
||||
void PersonDetectionTenIerationsWithoutPerson() {
|
||||
benchmark_runner->SetInput(reinterpret_cast<const int8_t*>(g_no_person_data));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
benchmark_runner->RunSingleIteration();
|
||||
}
|
||||
} // namespace tflite
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
tflite::MicroProfiler profiler;
|
||||
|
||||
uint32_t event_handle = profiler.BeginEvent("InitializeBenchmarkRunner");
|
||||
tflite::PersonDetectionBenchmarkRunner* benchmark_runner =
|
||||
CreateBenchmarkRunner(&profiler);
|
||||
profiler.EndEvent(event_handle);
|
||||
profiler.Log();
|
||||
MicroPrintf("");
|
||||
|
||||
tflite::PersonDetectionNIerations(
|
||||
reinterpret_cast<const int8_t*>(g_person_data), 1,
|
||||
"WithPersonDataIterations(1)", *benchmark_runner, profiler);
|
||||
profiler.Log();
|
||||
MicroPrintf("");
|
||||
|
||||
tflite::PersonDetectionNIerations(
|
||||
reinterpret_cast<const int8_t*>(g_no_person_data), 1,
|
||||
"NoPersonDataIterations(1)", *benchmark_runner, profiler);
|
||||
profiler.Log();
|
||||
MicroPrintf("");
|
||||
|
||||
tflite::PersonDetectionNIerations(
|
||||
reinterpret_cast<const int8_t*>(g_person_data), 10,
|
||||
"WithPersonDataIterations(10)", *benchmark_runner, profiler);
|
||||
MicroPrintf("");
|
||||
|
||||
tflite::PersonDetectionNIerations(
|
||||
reinterpret_cast<const int8_t*>(g_no_person_data), 10,
|
||||
"NoPersonDataIterations(10)", *benchmark_runner, profiler);
|
||||
MicroPrintf("");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TF_LITE_MICRO_BENCHMARKS_BEGIN
|
||||
|
||||
TF_LITE_MICRO_BENCHMARK(InitializeBenchmarkRunner());
|
||||
TF_LITE_MICRO_BENCHMARK(benchmark_runner->RunSingleIteration());
|
||||
TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithPerson());
|
||||
TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithoutPerson());
|
||||
|
||||
TF_LITE_MICRO_BENCHMARKS_END
|
||||
|
@ -24,6 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
@ -108,7 +109,7 @@ MicroInterpreter::MicroInterpreter(const Model* model,
|
||||
uint8_t* tensor_arena,
|
||||
size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler)
|
||||
MicroProfiler* profiler)
|
||||
: model_(model),
|
||||
op_resolver_(op_resolver),
|
||||
error_reporter_(error_reporter),
|
||||
@ -127,7 +128,7 @@ MicroInterpreter::MicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator,
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler)
|
||||
MicroProfiler* profiler)
|
||||
: model_(model),
|
||||
op_resolver_(op_resolver),
|
||||
error_reporter_(error_reporter),
|
||||
@ -156,7 +157,7 @@ MicroInterpreter::~MicroInterpreter() {
|
||||
}
|
||||
}
|
||||
|
||||
void MicroInterpreter::Init(tflite::Profiler* profiler) {
|
||||
void MicroInterpreter::Init(MicroProfiler* profiler) {
|
||||
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
|
||||
model_->subgraphs();
|
||||
if (subgraphs->size() != 1) {
|
||||
@ -320,35 +321,35 @@ TfLiteStatus MicroInterpreter::Invoke() {
|
||||
auto* node = &(node_and_registrations_[i].node);
|
||||
auto* registration = node_and_registrations_[i].registration;
|
||||
|
||||
if (registration->invoke) {
|
||||
TfLiteStatus invoke_status;
|
||||
#ifndef NDEBUG // Omit profiler overhead from release builds.
|
||||
// The case where profiler == nullptr is handled by
|
||||
// ScopedOperatorProfile.
|
||||
tflite::Profiler* profiler =
|
||||
reinterpret_cast<tflite::Profiler*>(context_.profiler);
|
||||
ScopedOperatorProfile scoped_profiler(
|
||||
profiler, OpNameFromRegistration(registration), i);
|
||||
// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
|
||||
// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
|
||||
// only defined for builds with the error strings.
|
||||
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
||||
ScopedMicroProfiler scoped_profiler(
|
||||
OpNameFromRegistration(registration),
|
||||
reinterpret_cast<MicroProfiler*>(context_.profiler));
|
||||
#endif
|
||||
invoke_status = registration->invoke(&context_, node);
|
||||
|
||||
// All TfLiteTensor structs used in the kernel are allocated from temp
|
||||
// memory in the allocator. This creates a chain of allocations in the
|
||||
// temp section. The call below resets the chain of allocations to
|
||||
// prepare for the next call.
|
||||
allocator_.ResetTempAllocations();
|
||||
TFLITE_DCHECK(registration->invoke);
|
||||
TfLiteStatus invoke_status = registration->invoke(&context_, node);
|
||||
|
||||
if (invoke_status == kTfLiteError) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Node %s (number %d) failed to invoke with status %d",
|
||||
OpNameFromRegistration(registration), i, invoke_status);
|
||||
return kTfLiteError;
|
||||
} else if (invoke_status != kTfLiteOk) {
|
||||
return invoke_status;
|
||||
}
|
||||
// All TfLiteTensor structs used in the kernel are allocated from temp
|
||||
// memory in the allocator. This creates a chain of allocations in the
|
||||
// temp section. The call below resets the chain of allocations to
|
||||
// prepare for the next call.
|
||||
allocator_.ResetTempAllocations();
|
||||
|
||||
if (invoke_status == kTfLiteError) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Node %s (number %d) failed to invoke with status %d",
|
||||
OpNameFromRegistration(registration), i, invoke_status);
|
||||
return kTfLiteError;
|
||||
} else if (invoke_status != kTfLiteOk) {
|
||||
return invoke_status;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
|
@ -21,10 +21,10 @@ limitations under the License.
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
#include "tensorflow/lite/portable_type_to_tflitetype.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
@ -86,7 +86,7 @@ class MicroInterpreter {
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena, size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
MicroProfiler* profiler = nullptr);
|
||||
|
||||
// Create an interpreter instance using an existing MicroAllocator instance.
|
||||
// This constructor should be used when creating an allocator that needs to
|
||||
@ -95,7 +95,7 @@ class MicroInterpreter {
|
||||
// as long as that of the interpreter object.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator, ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
MicroProfiler* profiler = nullptr);
|
||||
|
||||
~MicroInterpreter();
|
||||
|
||||
@ -179,7 +179,7 @@ class MicroInterpreter {
|
||||
private:
|
||||
// TODO(b/158263161): Consider switching to Create() function to enable better
|
||||
// error reporting during initialization.
|
||||
void Init(tflite::Profiler* profiler);
|
||||
void Init(MicroProfiler* profiler);
|
||||
|
||||
NodeAndRegistration* node_and_registrations_ = nullptr;
|
||||
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
#include "tensorflow/lite/micro/recording_micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/test_helpers.h"
|
||||
@ -28,28 +29,15 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
class MockProfiler : public tflite::Profiler {
|
||||
class MockProfiler : public MicroProfiler {
|
||||
public:
|
||||
MockProfiler() : event_starts_(0), event_ends_(0) {}
|
||||
~MockProfiler() override = default;
|
||||
|
||||
// AddEvent is unused for Tf Micro.
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override{};
|
||||
|
||||
// BeginEvent followed by code followed by EndEvent will profile the code
|
||||
// enclosed. Multiple concurrent events are unsupported, so the return value
|
||||
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
|
||||
// pointer must be valid until EndEvent is called.
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
uint32_t BeginEvent(const char* tag) override {
|
||||
event_starts_++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Event_handle is ignored since TF Micro does not support concurrent events.
|
||||
void EndEvent(uint32_t event_handle) override { event_ends_++; }
|
||||
|
||||
int event_starts() { return event_starts_; }
|
||||
@ -58,7 +46,6 @@ class MockProfiler : public tflite::Profiler {
|
||||
private:
|
||||
int event_starts_;
|
||||
int event_ends_;
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -12,31 +12,47 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_time.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter)
|
||||
: reporter_(reporter) {}
|
||||
uint32_t MicroProfiler::BeginEvent(const char* tag) {
|
||||
if (num_events_ == kMaxEvents) {
|
||||
num_events_ = 0;
|
||||
}
|
||||
|
||||
uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) {
|
||||
start_time_ = GetCurrentTimeTicks();
|
||||
TFLITE_DCHECK(tag != nullptr);
|
||||
event_tag_ = tag;
|
||||
return 0;
|
||||
tags_[num_events_] = tag;
|
||||
start_ticks_[num_events_] = GetCurrentTimeTicks();
|
||||
end_ticks_[num_events_] = start_ticks_[num_events_] - 1;
|
||||
return num_events_++;
|
||||
}
|
||||
|
||||
void MicroProfiler::EndEvent(uint32_t event_handle) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
int32_t end_time = GetCurrentTimeTicks();
|
||||
TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_,
|
||||
end_time - start_time_);
|
||||
TFLITE_DCHECK(event_handle < kMaxEvents);
|
||||
end_ticks_[event_handle] = GetCurrentTimeTicks();
|
||||
}
|
||||
|
||||
int32_t MicroProfiler::GetTotalTicks() const {
|
||||
int32_t ticks = 0;
|
||||
for (int i = 0; i < num_events_; ++i) {
|
||||
ticks += end_ticks_[i] - start_ticks_[i];
|
||||
}
|
||||
return ticks;
|
||||
}
|
||||
|
||||
void MicroProfiler::Log() const {
|
||||
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
||||
for (int i = 0; i < num_events_; ++i) {
|
||||
int32_t ticks = end_ticks_[i] - start_ticks_[i];
|
||||
MicroPrintf("%s took %d ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
@ -16,9 +16,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include <cstdint>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -26,46 +24,91 @@ namespace tflite {
|
||||
// performance. Bottleck operators can be identified along with slow code
|
||||
// sections. This can be used in conjunction with running the relevant micro
|
||||
// benchmark to evaluate end-to-end performance.
|
||||
//
|
||||
// Usage example:
|
||||
// MicroProfiler profiler(error_reporter);
|
||||
// {
|
||||
// ScopedProfile scoped_profile(profiler, tag);
|
||||
// work_to_profile();
|
||||
// }
|
||||
//
|
||||
// This will call the following methods in order:
|
||||
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
|
||||
// work_to_profile();
|
||||
// profiler->EndEvent(event_handle)
|
||||
class MicroProfiler : public tflite::Profiler {
|
||||
class MicroProfiler {
|
||||
public:
|
||||
explicit MicroProfiler(tflite::ErrorReporter* reporter);
|
||||
~MicroProfiler() override = default;
|
||||
MicroProfiler() = default;
|
||||
~MicroProfiler() = default;
|
||||
|
||||
// AddEvent is unused for Tf Micro.
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override{};
|
||||
// Marks the start of a new event and returns an event handle that can be used
|
||||
// to mark the end of the event via EndEvent. The lifetime of the tag
|
||||
// parameter must exceed that of the MicroProfiler.
|
||||
virtual uint32_t BeginEvent(const char* tag);
|
||||
|
||||
// BeginEvent followed by code followed by EndEvent will profile the code
|
||||
// enclosed. Multiple concurrent events are unsupported, so the return value
|
||||
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
|
||||
// pointer must be valid until EndEvent is called.
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override;
|
||||
// Marks the end of an event associated with event_handle. It is the
|
||||
// responsibility of the caller to ensure than EndEvent is called once and
|
||||
// only once per event_handle.
|
||||
//
|
||||
// If EndEvent is called more than once for the same event_handle, the last
|
||||
// call will be used as the end of event marker.If EndEvent is called 0 times
|
||||
// for a particular event_handle, the duration of that event will be 0 ticks.
|
||||
virtual void EndEvent(uint32_t event_handle);
|
||||
|
||||
// Event_handle is ignored since TF Micro does not support concurrent events.
|
||||
void EndEvent(uint32_t event_handle) override;
|
||||
// Clears all the events that have been currently profiled.
|
||||
void ClearEvents() { num_events_ = 0; }
|
||||
|
||||
// Returns the sum of the ticks taken across all the events. This number
|
||||
// is only meaningful if all of the events are disjoint (the end time of
|
||||
// event[i] <= start time of event[i+1]).
|
||||
int32_t GetTotalTicks() const;
|
||||
|
||||
// Prints the profiling information of each of the events.
|
||||
void Log() const;
|
||||
|
||||
private:
|
||||
tflite::ErrorReporter* reporter_;
|
||||
int32_t start_time_;
|
||||
const char* event_tag_;
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
// Maximum number of events that this class can keep track of. If we call
|
||||
// AddEvent more than kMaxEvents number of times, then the oldest event's
|
||||
// profiling information will be overwritten.
|
||||
static constexpr int kMaxEvents = 50;
|
||||
|
||||
const char* tags_[kMaxEvents];
|
||||
int32_t start_ticks_[kMaxEvents];
|
||||
int32_t end_ticks_[kMaxEvents];
|
||||
int num_events_ = 0;
|
||||
};
|
||||
|
||||
#if defined(NDEBUG)
|
||||
// For release builds, the ScopedMicroProfiler is a noop.
|
||||
//
|
||||
// This is done because the ScipedProfiler is used as part of the
|
||||
// MicroInterpreter and we want to ensure zero overhead for the release builds.
|
||||
class ScopedMicroProfiler {
|
||||
public:
|
||||
explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler) {}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
// This class can be used to add events to a MicroProfiler object that span the
|
||||
// lifetime of the ScopedMicroProfiler object.
|
||||
// Usage example:
|
||||
//
|
||||
// MicroProfiler profiler();
|
||||
// ...
|
||||
// {
|
||||
// ScopedMicroProfiler scoped_profiler("custom_tag", profiler);
|
||||
// work_to_profile();
|
||||
// }
|
||||
class ScopedMicroProfiler {
|
||||
public:
|
||||
explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler)
|
||||
: profiler_(profiler) {
|
||||
if (profiler_ != nullptr) {
|
||||
event_handle_ = profiler_->BeginEvent(tag);
|
||||
}
|
||||
}
|
||||
|
||||
~ScopedMicroProfiler() {
|
||||
if (profiler_ != nullptr) {
|
||||
profiler_->EndEvent(event_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t event_handle_ = 0;
|
||||
MicroProfiler* profiler_ = nullptr;
|
||||
};
|
||||
#endif // !defined(NDEBUG)
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
|
@ -15,7 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -26,6 +26,11 @@ int32_t ticks_per_second();
|
||||
// Return time in ticks. The meaning of a tick varies per platform.
|
||||
int32_t GetCurrentTimeTicks();
|
||||
|
||||
inline int32_t TicksToMs(int32_t ticks) {
|
||||
return static_cast<int32_t>(1000.0f * static_cast<float>(ticks) /
|
||||
static_cast<float>(ticks_per_second()));
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
|
||||
|
@ -393,7 +393,6 @@ tensorflow/lite/c/common.h \
|
||||
tensorflow/lite/core/api/error_reporter.h \
|
||||
tensorflow/lite/core/api/flatbuffer_conversions.h \
|
||||
tensorflow/lite/core/api/op_resolver.h \
|
||||
tensorflow/lite/core/api/profiler.h \
|
||||
tensorflow/lite/core/api/tensor_utils.h \
|
||||
tensorflow/lite/kernels/internal/common.h \
|
||||
tensorflow/lite/kernels/internal/compatibility.h \
|
||||
|
Loading…
Reference in New Issue
Block a user