1. Add a new event type for general tflite runtime instrumentation, and allow a Profiler to choose which event type it will record.
2. An initial introduction of InterpreterDetailedStatus to detail the error code that a TFLite interpreter may have during runtime. 3. Apply the above two to instrument the overall invoke latency and status as an initial exemplar usage. PiperOrigin-RevId: 313573701 Change-Id: I8c4189c72066d7d6f4c91014ef4f30e32635c115
This commit is contained in:
parent
f0f84935e3
commit
6688e1c23b
|
@ -252,6 +252,7 @@ cc_library(
|
|||
":version",
|
||||
"//tensorflow/lite/c:common",
|
||||
"//tensorflow/lite/core/api",
|
||||
"//tensorflow/lite/delegates:status",
|
||||
"//tensorflow/lite/delegates/nnapi:nnapi_delegate",
|
||||
"//tensorflow/lite/experimental/resource",
|
||||
"//tensorflow/lite/kernels/internal:compatibility",
|
||||
|
|
|
@ -22,34 +22,56 @@ namespace tflite {
|
|||
// A simple utility for enabling profiled event tracing in TensorFlow Lite.
|
||||
class Profiler {
|
||||
public:
|
||||
// As certain Profiler instance might be only interested in certain event
|
||||
// types, we define each event type value to allow a Profiler to use
|
||||
// bitmasking bitwise operations to determine whether an event should be
|
||||
// recorded or not.
|
||||
enum class EventType {
|
||||
// Default event type, the metadata field has no special significance.
|
||||
DEFAULT = 0,
|
||||
DEFAULT = 1,
|
||||
|
||||
// The event is an operator invocation and the event_metadata field is the
|
||||
// index of operator node.
|
||||
OPERATOR_INVOKE_EVENT = 1,
|
||||
OPERATOR_INVOKE_EVENT = 2,
|
||||
|
||||
// The event is an invocation for an internal operator of a TFLite delegate.
|
||||
// The event_metadata field is the index of operator node that's specific to
|
||||
// the delegate.
|
||||
DELEGATE_OPERATOR_INVOKE_EVENT = 2
|
||||
DELEGATE_OPERATOR_INVOKE_EVENT = 4,
|
||||
|
||||
// The event is a recording of runtime instrumentation such as the overall
|
||||
// TFLite runtime status, the TFLite delegate status (if a delegate
|
||||
// is applied), and the overall model inference latency etc.
|
||||
// Note, the delegate status and overall status are stored as separate
|
||||
// event_metadata fields. In particular, the delegate status is encoded
|
||||
// as DelegateStatus::full_status().
|
||||
GENERAL_RUNTIME_INSTRUMENTATION_EVENT = 8,
|
||||
};
|
||||
|
||||
virtual ~Profiler() {}
|
||||
|
||||
// Signals the beginning of an event from a subgraph indexed at
|
||||
// 'event_subgraph_index', returning a handle to the profile event.
|
||||
// Signals the beginning of an event and returns a handle to the profile
|
||||
// event. The `event_metadata1` and `event_metadata2` have different
|
||||
// interpretations based on the actual Profiler instance and the `event_type`.
|
||||
// For example, as for the 'SubgraphAwareProfiler' defined in
|
||||
// lite/core/subgraph.h, when the event_type is OPERATOR_INVOKE_EVENT,
|
||||
// `event_metadata1` represents the index of a TFLite node, and
|
||||
// `event_metadata2` represents the index of the subgraph that this event
|
||||
// comes from.
|
||||
virtual uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata,
|
||||
uint32_t event_subgraph_index) = 0;
|
||||
// Similar w/ the above, but the event comes from the primary subgraph that's
|
||||
// indexed at 0.
|
||||
virtual uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata) {
|
||||
return BeginEvent(tag, event_type, event_metadata, /*primary subgraph*/ 0);
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) = 0;
|
||||
// Similar w/ the above, but `event_metadata2` defaults to 0.
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata) {
|
||||
return BeginEvent(tag, event_type, event_metadata, /*event_metadata2*/ 0);
|
||||
}
|
||||
|
||||
// Signals an end to the specified profile event with 'event_metadata's, This
|
||||
// is useful when 'event_metadata's are not available when the event begins
|
||||
// or when one wants to overwrite the 'event_metadata's set at the beginning.
|
||||
virtual void EndEvent(uint32_t event_handle, int64_t event_metadata1,
|
||||
int64_t event_metadata2) {}
|
||||
// Signals an end to the specified profile event.
|
||||
virtual void EndEvent(uint32_t event_handle) = 0;
|
||||
|
||||
|
@ -60,15 +82,18 @@ class Profiler {
|
|||
// they assume the value is in "usec", if in any case subclasses
|
||||
// didn't put usec, then the values are not meaningful.
|
||||
// TODO karimnosseir: Revisit and make the function more clear.
|
||||
virtual void AddEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata, uint64_t start, uint64_t end) {
|
||||
AddEvent(tag, event_type, event_metadata, start, end,
|
||||
/*event_subgraph_index*/ 0);
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata) {
|
||||
AddEvent(tag, event_type, start, end, event_metadata,
|
||||
/*event_metadata2*/ 0);
|
||||
}
|
||||
|
||||
virtual void AddEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata, uint64_t start, uint64_t end,
|
||||
uint32_t event_subgraph_index) {}
|
||||
virtual void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) {}
|
||||
|
||||
protected:
|
||||
friend class ScopedProfile;
|
||||
};
|
||||
|
||||
// Adds a profile event to `profiler` that begins with the construction
|
||||
|
@ -79,7 +104,7 @@ class ScopedProfile {
|
|||
public:
|
||||
ScopedProfile(Profiler* profiler, const char* tag,
|
||||
Profiler::EventType event_type = Profiler::EventType::DEFAULT,
|
||||
uint32_t event_metadata = 0)
|
||||
int64_t event_metadata = 0)
|
||||
: profiler_(profiler), event_handle_(0) {
|
||||
if (profiler) {
|
||||
event_handle_ = profiler_->BeginEvent(tag, event_type, event_metadata);
|
||||
|
@ -92,8 +117,8 @@ class ScopedProfile {
|
|||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Profiler* const profiler_;
|
||||
protected:
|
||||
Profiler* profiler_;
|
||||
uint32_t event_handle_;
|
||||
};
|
||||
|
||||
|
@ -113,6 +138,31 @@ class ScopedDelegateOperatorProfile : public ScopedProfile {
|
|||
static_cast<uint32_t>(node_index)) {}
|
||||
};
|
||||
|
||||
class ScopedRuntimeInstrumentationProfile : public ScopedProfile {
|
||||
public:
|
||||
ScopedRuntimeInstrumentationProfile(Profiler* profiler, const char* tag)
|
||||
: ScopedProfile(
|
||||
profiler, tag,
|
||||
Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, -1) {}
|
||||
|
||||
void set_runtime_status(int64_t delegate_status, int64_t interpreter_status) {
|
||||
if (profiler_) {
|
||||
delegate_status_ = delegate_status;
|
||||
interpreter_status_ = interpreter_status;
|
||||
}
|
||||
}
|
||||
|
||||
~ScopedRuntimeInstrumentationProfile() {
|
||||
if (profiler_) {
|
||||
profiler_->EndEvent(event_handle_, delegate_status_, interpreter_status_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t delegate_status_;
|
||||
int64_t interpreter_status_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#define TFLITE_VARNAME_UNIQ_IMPL(name, ctr) name##ctr
|
||||
|
@ -130,4 +180,15 @@ class ScopedDelegateOperatorProfile : public ScopedProfile {
|
|||
tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ( \
|
||||
_profile_, __COUNTER__)((profiler), (tag), (node_index))
|
||||
|
||||
#define TFLITE_ADD_RUNTIME_INSTRUMENTATION_EVENT( \
|
||||
profiler, tag, delegate_status, interpreter_status) \
|
||||
do { \
|
||||
if (!profiler) { \
|
||||
const auto handle = profiler->BeginEvent( \
|
||||
tag, Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, \
|
||||
delegate_status, interpreter_status); \
|
||||
profiler->EndEvent(handle); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_PROFILER_H_
|
||||
|
|
|
@ -15,6 +15,7 @@ limitations under the License.
|
|||
#ifndef TENSORFLOW_LITE_CORE_SUBGRAPH_H_
|
||||
#define TENSORFLOW_LITE_CORE_SUBGRAPH_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
@ -338,21 +339,16 @@ class Subgraph {
|
|||
class SubgraphAwareProfiler : public Profiler {
|
||||
public:
|
||||
// Constructor should be called with the non-nullptr profiler argument.
|
||||
SubgraphAwareProfiler(Profiler* profiler, uint32_t subgraph_index)
|
||||
SubgraphAwareProfiler(Profiler* profiler, int64_t subgraph_index)
|
||||
: profiler_(profiler), subgraph_index_(subgraph_index) {}
|
||||
~SubgraphAwareProfiler() override {}
|
||||
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata,
|
||||
uint32_t subgraph_index) override {
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
if (!profiler_) return 0;
|
||||
return profiler_->BeginEvent(tag, event_type, event_metadata,
|
||||
subgraph_index);
|
||||
}
|
||||
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata) override {
|
||||
return BeginEvent(tag, event_type, event_metadata, subgraph_index_);
|
||||
return profiler_->BeginEvent(tag, event_type, event_metadata1,
|
||||
subgraph_index_);
|
||||
}
|
||||
|
||||
void EndEvent(uint32_t event_handle) override {
|
||||
|
@ -360,17 +356,24 @@ class Subgraph {
|
|||
profiler_->EndEvent(event_handle);
|
||||
}
|
||||
|
||||
void AddEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata, uint64_t start,
|
||||
uint64_t end) override {
|
||||
void EndEvent(uint32_t event_handle, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
if (!profiler_) return;
|
||||
profiler_->AddEvent(tag, event_type, event_metadata, start, end);
|
||||
profiler_->EndEvent(event_handle, event_metadata1, event_metadata2);
|
||||
}
|
||||
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
if (!profiler_) return;
|
||||
profiler_->AddEvent(tag, event_type, start, end, event_metadata1,
|
||||
subgraph_index_);
|
||||
}
|
||||
|
||||
private:
|
||||
// Not own the memory.
|
||||
Profiler* const profiler_;
|
||||
const uint32_t subgraph_index_;
|
||||
const int64_t subgraph_index_;
|
||||
};
|
||||
|
||||
// Prevent 'context_' from accessing functions that are only available to
|
||||
|
|
|
@ -20,6 +20,15 @@ package(
|
|||
licenses = ["notice"], # Apache 2.0
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "status",
|
||||
hdrs = ["status.h"],
|
||||
copts = tflite_copts(),
|
||||
deps = [
|
||||
"//tensorflow/lite/c:common",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "utils",
|
||||
srcs = ["utils.cc"],
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_STATUS_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_STATUS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
// This file defines data structures to represent detailed TFLite delegate
|
||||
// status, e.g. NNAPI delegate application failure because of a driver issue
|
||||
// etc. Such status is ONLY to be used for internal APIs.
|
||||
// Note, we simply use TfLiteStatus to represent high-level status while
|
||||
// delegate-specific status codes are defined with DelegateStatus.
|
||||
// WARNING: This is an experimental feature that is subject to change.
|
||||
namespace tflite {
|
||||
namespace delegates {
|
||||
|
||||
// Defines the source of the code where it is generated from. We list all TFLite
|
||||
// delegates that're officially implemented and available as of April, 2020
|
||||
// (i.e. w/ 'TFLITE_' prefix to imply this).
|
||||
enum class DelegateStatusSource {
|
||||
NONE = 0,
|
||||
TFLITE_GPU = 1,
|
||||
TFLITE_NNAPI = 2,
|
||||
TFLITE_HEXAGON = 3,
|
||||
TFLITE_XNNPACK = 4,
|
||||
TFLITE_COREML = 5,
|
||||
MAX_NUM_SOURCES = std::numeric_limits<int32_t>::max(),
|
||||
};
|
||||
|
||||
// Defines the detailed status that combines a DelegateStatusSource and a
|
||||
// status int32_t code.
|
||||
class DelegateStatus {
|
||||
public:
|
||||
DelegateStatus() : DelegateStatus(DelegateStatusSource::NONE, 0) {}
|
||||
explicit DelegateStatus(int32_t code)
|
||||
: DelegateStatus(DelegateStatusSource::NONE, code) {}
|
||||
explicit DelegateStatus(int64_t full_status)
|
||||
: DelegateStatus(
|
||||
static_cast<DelegateStatusSource>(
|
||||
full_status >> 32 &
|
||||
static_cast<int32_t>(DelegateStatusSource::MAX_NUM_SOURCES)),
|
||||
static_cast<int32_t>(full_status &
|
||||
std::numeric_limits<int32_t>::max())) {}
|
||||
DelegateStatus(DelegateStatusSource source, int32_t code)
|
||||
: source_(static_cast<int32_t>(source)), code_(code) {}
|
||||
|
||||
// Return the detailed full status encoded as a int64_t value.
|
||||
int64_t full_status() const {
|
||||
return static_cast<int64_t>(source_) << 32 | code_;
|
||||
}
|
||||
|
||||
DelegateStatusSource source() const {
|
||||
return static_cast<DelegateStatusSource>(source_);
|
||||
}
|
||||
|
||||
int32_t code() const { return code_; }
|
||||
|
||||
private:
|
||||
// value of a DelegateStatusSource, like DelegateStatusSource::TFLITE_GPU
|
||||
int32_t source_;
|
||||
// value of a status code, like kTfLiteOk.
|
||||
int32_t code_;
|
||||
};
|
||||
|
||||
} // namespace delegates
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_STATUS_H_
|
|
@ -301,7 +301,7 @@ void RunInference(Settings* s) {
|
|||
profiler->StopProfiling();
|
||||
auto profile_events = profiler->GetProfileEvents();
|
||||
for (int i = 0; i < profile_events.size(); i++) {
|
||||
auto subgraph_index = profile_events[i]->event_subgraph_index;
|
||||
auto subgraph_index = profile_events[i]->extra_event_metadata;
|
||||
auto op_index = profile_events[i]->event_metadata;
|
||||
const auto subgraph = interpreter->subgraph(subgraph_index);
|
||||
const auto node_and_registration =
|
||||
|
|
|
@ -24,6 +24,7 @@ limitations under the License.
|
|||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/context_util.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/delegates/status.h"
|
||||
#include "tensorflow/lite/graph_info.h"
|
||||
#include "tensorflow/lite/memory_planner.h"
|
||||
#include "tensorflow/lite/minimal_logging.h"
|
||||
|
@ -71,6 +72,17 @@ TfLiteQuantization GetQuantizationFromLegacy(
|
|||
return quantization;
|
||||
}
|
||||
|
||||
// TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
|
||||
// temporarily because delegate-specific error codes are either not retrievable
|
||||
// at the moment, which we will add later.
|
||||
#define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
|
||||
do { \
|
||||
TfLiteStatus status = (a); \
|
||||
runtime_event.set_runtime_status(/*delegate_status=*/0, \
|
||||
static_cast<int64_t>(status)); \
|
||||
TF_LITE_ENSURE_STATUS(status); \
|
||||
} while (0)
|
||||
|
||||
} // namespace
|
||||
|
||||
Interpreter::Interpreter(ErrorReporter* error_reporter)
|
||||
|
@ -210,11 +222,15 @@ TfLiteStatus Interpreter::ReleaseNonPersistentMemory() {
|
|||
}
|
||||
|
||||
TfLiteStatus Interpreter::Invoke() {
|
||||
TF_LITE_ENSURE_STATUS(primary_subgraph().Invoke());
|
||||
ScopedRuntimeInstrumentationProfile scoped_runtime_event(installed_profiler_,
|
||||
"invoke");
|
||||
TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
|
||||
scoped_runtime_event, primary_subgraph().Invoke());
|
||||
|
||||
if (!allow_buffer_handle_output_) {
|
||||
for (int tensor_index : outputs()) {
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
|
||||
scoped_runtime_event,
|
||||
primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
|
||||
}
|
||||
}
|
||||
|
@ -381,18 +397,21 @@ void Interpreter::SetProfiler(Profiler* profiler) {
|
|||
// Release resources occupied by owned_profiler_ which is replaced by
|
||||
// caller-owned profiler.
|
||||
owned_profiler_.reset(nullptr);
|
||||
SetSubgraphProfiler(profiler);
|
||||
installed_profiler_ = profiler;
|
||||
SetSubgraphProfiler();
|
||||
}
|
||||
|
||||
void Interpreter::SetProfiler(std::unique_ptr<Profiler> profiler) {
|
||||
owned_profiler_ = std::move(profiler);
|
||||
SetSubgraphProfiler(owned_profiler_.get());
|
||||
installed_profiler_ = owned_profiler_.get();
|
||||
SetSubgraphProfiler();
|
||||
}
|
||||
|
||||
void Interpreter::SetSubgraphProfiler(Profiler* profiler) {
|
||||
void Interpreter::SetSubgraphProfiler() {
|
||||
for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
|
||||
++subgraph_index) {
|
||||
subgraphs_[subgraph_index]->SetProfiler(profiler, subgraph_index);
|
||||
subgraphs_[subgraph_index]->SetProfiler(installed_profiler_,
|
||||
subgraph_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -540,7 +540,7 @@ class Interpreter {
|
|||
TfLiteExternalContext* ctx);
|
||||
|
||||
// Sets the profiler to all subgraphs.
|
||||
void SetSubgraphProfiler(Profiler* profiler);
|
||||
void SetSubgraphProfiler();
|
||||
|
||||
// Remove delegates (for fallback behaviour). The interpreter is invokable
|
||||
// afterwards.
|
||||
|
@ -559,10 +559,10 @@ class Interpreter {
|
|||
// interface. To avoid copying tensor metadata, this is also the definitive
|
||||
// structure to store tensors.
|
||||
// This is the primary subgraph context.
|
||||
TfLiteContext* context_;
|
||||
TfLiteContext* context_ = nullptr;
|
||||
|
||||
// The error reporter delegate that tflite will forward queries errors to.
|
||||
ErrorReporter* error_reporter_;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
// List of delegates that have been installed and are owned by this
|
||||
// interpreter instance. Useful if client delegate ownership is burdensome.
|
||||
|
@ -574,6 +574,9 @@ class Interpreter {
|
|||
// Useful if client profiler ownership is burdensome.
|
||||
std::unique_ptr<Profiler> owned_profiler_;
|
||||
|
||||
// Points to the installed Profiler instance.
|
||||
Profiler* installed_profiler_ = nullptr;
|
||||
|
||||
bool allow_buffer_handle_output_ = false;
|
||||
|
||||
// List of active external contexts.
|
||||
|
|
|
@ -57,16 +57,19 @@ class ATraceProfiler : public tflite::Profiler {
|
|||
}
|
||||
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata,
|
||||
uint32_t event_subgraph_index) override {
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
if (handle_ && atrace_is_enabled_()) {
|
||||
// Note: When recording an OPERATOR_INVOKE_EVENT, we have recorded the op
|
||||
// name as tag and node index as event_metadata. See the macro
|
||||
// TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE defined in
|
||||
// tensorflow/lite/core/api/profiler.h for details.
|
||||
// op_name@node_index/subgraph_index
|
||||
// name
|
||||
// as tag, node index as event_metadata1 and subgraph index as
|
||||
// event_metadata2. See the macro TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE
|
||||
// defined in tensorflow/lite/core/api/profiler.h for details.
|
||||
// Regardless the 'event_type', we encode the perfetto event name as
|
||||
// tag@event_metadata1/event_metadata2. In case of OPERATOR_INVOKE_EVENT,
|
||||
// the perfetto event name will be op_name@node_index/subgraph_index
|
||||
std::string trace_event_tag =
|
||||
absl::StrCat(tag, "@", event_metadata, "/", event_subgraph_index);
|
||||
absl::StrCat(tag, "@", event_metadata1, "/", event_metadata2);
|
||||
atrace_begin_section_(trace_event_tag.c_str());
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -15,6 +15,7 @@ limitations under the License.
|
|||
#ifndef TENSORFLOW_LITE_PROFILING_BUFFERED_PROFILER_H_
|
||||
#define TENSORFLOW_LITE_PROFILING_BUFFERED_PROFILER_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
|
@ -75,24 +76,33 @@ namespace profiling {
|
|||
class BufferedProfiler : public tflite::Profiler {
|
||||
public:
|
||||
explicit BufferedProfiler(uint32_t max_num_entries)
|
||||
: buffer_(max_num_entries, false) {}
|
||||
: buffer_(max_num_entries, false),
|
||||
supported_event_types_(~static_cast<uint64_t>(
|
||||
EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT)) {}
|
||||
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
uint32_t event_metadata,
|
||||
uint32_t event_subgraph_index) override {
|
||||
return buffer_.BeginEvent(tag, event_type, event_metadata,
|
||||
event_subgraph_index);
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
if (!ShouldAddEvent(event_type)) return kInvalidEventHandle;
|
||||
return buffer_.BeginEvent(tag, event_type, event_metadata1,
|
||||
event_metadata2);
|
||||
}
|
||||
|
||||
void EndEvent(uint32_t event_handle) override {
|
||||
buffer_.EndEvent(event_handle);
|
||||
}
|
||||
|
||||
void AddEvent(const char* tag, EventType event_type, uint32_t event_metadata,
|
||||
uint64_t start, uint64_t end,
|
||||
uint32_t event_subgraph_index) override {
|
||||
buffer_.AddEvent(tag, event_type, event_metadata, start, end,
|
||||
event_subgraph_index);
|
||||
void EndEvent(uint32_t event_handle, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
buffer_.EndEvent(event_handle, &event_metadata1, &event_metadata2);
|
||||
}
|
||||
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override {
|
||||
if (!ShouldAddEvent(event_type)) return;
|
||||
buffer_.AddEvent(tag, event_type, start, end, event_metadata1,
|
||||
event_metadata2);
|
||||
}
|
||||
|
||||
void StartProfiling() { buffer_.SetEnabled(true); }
|
||||
|
@ -107,9 +117,15 @@ class BufferedProfiler : public tflite::Profiler {
|
|||
return profile_events;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool ShouldAddEvent(EventType event_type) {
|
||||
return (static_cast<uint64_t>(event_type) & supported_event_types_) != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
ProfileBuffer* GetProfileBuffer() { return &buffer_; }
|
||||
ProfileBuffer buffer_;
|
||||
const uint64_t supported_event_types_;
|
||||
};
|
||||
|
||||
} // namespace profiling
|
||||
|
|
|
@ -29,7 +29,7 @@ class NoopProfiler : public tflite::Profiler {
|
|||
NoopProfiler() {}
|
||||
explicit NoopProfiler(int max_profiling_buffer_entries) {}
|
||||
|
||||
uint32_t BeginEvent(const char*, EventType, uint32_t, uint32_t) override {
|
||||
uint32_t BeginEvent(const char*, EventType, int64_t, int64_t) override {
|
||||
return 0;
|
||||
}
|
||||
void EndEvent(uint32_t) override {}
|
||||
|
|
|
@ -51,10 +51,11 @@ struct ProfileEvent {
|
|||
// The field containing the type of event. This must be one of the event types
|
||||
// in EventType.
|
||||
EventType event_type;
|
||||
// Extra data describing the details of the event.
|
||||
uint32_t event_metadata;
|
||||
// The index of subgraph where an event came from.
|
||||
uint32_t event_subgraph_index;
|
||||
// Meta data associated w/ the event.
|
||||
int64_t event_metadata;
|
||||
// Note: if this is an OPERATOR_INVOKE_EVENT, 'extra_event_metadata' will
|
||||
// represent the index of the subgraph that this event comes from.
|
||||
int64_t extra_event_metadata;
|
||||
};
|
||||
|
||||
// A ring buffer of profile events.
|
||||
|
@ -69,7 +70,7 @@ class ProfileBuffer {
|
|||
// buffer is disabled this has no affect.
|
||||
// The tag of the event should remain valid till the buffer is valid.
|
||||
uint32_t BeginEvent(const char* tag, ProfileEvent::EventType event_type,
|
||||
uint32_t event_metadata, uint32_t event_subgraph_index) {
|
||||
int64_t event_metadata1, int64_t event_metadata2) {
|
||||
if (!enabled_) {
|
||||
return kInvalidEventHandle;
|
||||
}
|
||||
|
@ -81,8 +82,8 @@ class ProfileBuffer {
|
|||
}
|
||||
event_buffer_[index].tag = tag;
|
||||
event_buffer_[index].event_type = event_type;
|
||||
event_buffer_[index].event_subgraph_index = event_subgraph_index;
|
||||
event_buffer_[index].event_metadata = event_metadata;
|
||||
event_buffer_[index].event_metadata = event_metadata1;
|
||||
event_buffer_[index].extra_event_metadata = event_metadata2;
|
||||
event_buffer_[index].begin_timestamp_us = timestamp;
|
||||
event_buffer_[index].end_timestamp_us = 0;
|
||||
if (event_type != Profiler::EventType::OPERATOR_INVOKE_EVENT) {
|
||||
|
@ -98,7 +99,8 @@ class ProfileBuffer {
|
|||
// Sets the end timestamp for event for the handle to current time.
|
||||
// If the buffer is disabled or previous event has been overwritten this
|
||||
// operation has not effect.
|
||||
void EndEvent(uint32_t event_handle) {
|
||||
void EndEvent(uint32_t event_handle, const int64_t* event_metadata1 = nullptr,
|
||||
const int64_t* event_metadata2 = nullptr) {
|
||||
if (!enabled_ || event_handle == kInvalidEventHandle ||
|
||||
event_handle > current_index_) {
|
||||
return;
|
||||
|
@ -116,11 +118,17 @@ class ProfileBuffer {
|
|||
Profiler::EventType::OPERATOR_INVOKE_EVENT) {
|
||||
event_buffer_[event_index].end_mem_usage = memory::GetMemoryUsage();
|
||||
}
|
||||
if (event_metadata1) {
|
||||
event_buffer_[event_index].event_metadata = *event_metadata1;
|
||||
}
|
||||
if (event_metadata2) {
|
||||
event_buffer_[event_index].extra_event_metadata = *event_metadata2;
|
||||
}
|
||||
}
|
||||
|
||||
void AddEvent(const char* tag, ProfileEvent::EventType event_type,
|
||||
uint32_t event_metadata, uint64_t start, uint64_t end,
|
||||
uint32_t event_subgraph_index) {
|
||||
uint64_t start, uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) {
|
||||
if (!enabled_) {
|
||||
return;
|
||||
}
|
||||
|
@ -131,8 +139,8 @@ class ProfileBuffer {
|
|||
}
|
||||
event_buffer_[index].tag = tag;
|
||||
event_buffer_[index].event_type = event_type;
|
||||
event_buffer_[index].event_subgraph_index = event_subgraph_index;
|
||||
event_buffer_[index].event_metadata = event_metadata;
|
||||
event_buffer_[index].event_metadata = event_metadata1;
|
||||
event_buffer_[index].extra_event_metadata = event_metadata2;
|
||||
event_buffer_[index].begin_timestamp_us = start;
|
||||
event_buffer_[index].end_timestamp_us = end;
|
||||
current_index_++;
|
||||
|
|
|
@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/profiling/profile_buffer.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "tensorflow/lite/profiling/profile_buffer.h"
|
||||
#include "tensorflow/lite/testing/util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
@ -43,7 +45,7 @@ TEST(ProfileBufferTest, AddEvent) {
|
|||
EXPECT_EQ(0, buffer.Size());
|
||||
auto event_handle =
|
||||
buffer.BeginEvent("hello", ProfileEvent::EventType::DEFAULT,
|
||||
/*event_metadata*/ 42, /*event_subgraph_index*/ 0);
|
||||
/*event_metadata1*/ 42, /*event_metadata2*/ 0);
|
||||
|
||||
EXPECT_GE(event_handle, 0);
|
||||
EXPECT_EQ(1, buffer.Size());
|
||||
|
@ -59,6 +61,28 @@ TEST(ProfileBufferTest, AddEvent) {
|
|||
EXPECT_GE(event->end_timestamp_us, event->begin_timestamp_us);
|
||||
}
|
||||
|
||||
TEST(ProfileBufferTest, EndEventWithMetadata) {
|
||||
ProfileBuffer buffer(/*max_size*/ 10, /*enabled*/ true);
|
||||
EXPECT_EQ(0, buffer.Size());
|
||||
auto event_handle =
|
||||
buffer.BeginEvent("hello", ProfileEvent::EventType::DEFAULT,
|
||||
/*event_metadata1*/ 42, /*event_metadata2*/ 0);
|
||||
const int64_t kEventMetadata1 = 18;
|
||||
const int64_t kEventMetadata2 = 36;
|
||||
buffer.EndEvent(event_handle, &kEventMetadata1, &kEventMetadata2);
|
||||
|
||||
EXPECT_GE(event_handle, 0);
|
||||
EXPECT_EQ(1, buffer.Size());
|
||||
auto event = GetProfileEvents(buffer)[0];
|
||||
EXPECT_EQ(event->tag, "hello");
|
||||
EXPECT_GT(event->begin_timestamp_us, 0);
|
||||
EXPECT_EQ(event->event_type, ProfileEvent::EventType::DEFAULT);
|
||||
EXPECT_EQ(event->event_metadata, kEventMetadata1);
|
||||
EXPECT_EQ(event->extra_event_metadata, kEventMetadata2);
|
||||
EXPECT_EQ(1, buffer.Size());
|
||||
EXPECT_GE(event->end_timestamp_us, event->begin_timestamp_us);
|
||||
}
|
||||
|
||||
TEST(ProfileBufferTest, OverFlow) {
|
||||
const int max_size = 4;
|
||||
ProfileBuffer buffer{max_size, true};
|
||||
|
@ -83,13 +107,13 @@ TEST(ProfileBufferTest, Enable) {
|
|||
EXPECT_EQ(0, buffer.Size());
|
||||
auto event_handle =
|
||||
buffer.BeginEvent("hello", ProfileEvent::EventType::DEFAULT,
|
||||
/*event_metadata*/ 42, /*event_subgraph_index*/ 0);
|
||||
/*event_metadata1*/ 42, /*event_metadata2*/ 0);
|
||||
EXPECT_EQ(kInvalidEventHandle, event_handle);
|
||||
EXPECT_EQ(0, buffer.Size());
|
||||
buffer.SetEnabled(true);
|
||||
event_handle =
|
||||
buffer.BeginEvent("hello", ProfileEvent::EventType::DEFAULT,
|
||||
/*event_metadata*/ 42, /*event_subgraph_index*/ 0);
|
||||
/*event_metadata1*/ 42, /*event_metadata2*/ 0);
|
||||
EXPECT_GE(event_handle, 0);
|
||||
EXPECT_EQ(1, buffer.Size());
|
||||
}
|
||||
|
|
|
@ -128,7 +128,7 @@ void ProfileSummarizer::ProcessProfiles(
|
|||
int64_t delegate_internal_total_us = 0;
|
||||
|
||||
for (auto event : events) {
|
||||
const auto subgraph_index = event->event_subgraph_index;
|
||||
const auto subgraph_index = event->extra_event_metadata;
|
||||
auto stats_calculator = GetStatsCalculator(subgraph_index);
|
||||
int64_t start_us = event->begin_timestamp_us - base_start_us;
|
||||
int64_t node_exec_time =
|
||||
|
@ -174,7 +174,7 @@ void ProfileSummarizer::ProcessProfiles(
|
|||
const memory::MemoryUsage node_mem_usage =
|
||||
event->end_mem_usage - event->begin_mem_usage;
|
||||
std::string node_name(event->tag);
|
||||
node_name += "/" + std::to_string(event->event_subgraph_index);
|
||||
node_name += "/" + std::to_string(event->extra_event_metadata);
|
||||
stats_calculator->AddNodeStats(node_name, event->tag, node_num, start_us,
|
||||
node_exec_time,
|
||||
node_mem_usage.max_rss_kb * 1000.0);
|
||||
|
|
|
@ -182,13 +182,13 @@ TEST_F(ProfileSummarizerIfOpTest, TestIfTrue) {
|
|||
EXPECT_EQ(2, events.size());
|
||||
int event_count_of_subgraph_zero = std::count_if(
|
||||
events.begin(), events.end(),
|
||||
[](auto event) { return event->event_subgraph_index == 0; });
|
||||
[](auto event) { return event->extra_event_metadata == 0; });
|
||||
int event_count_of_subgraph_one = std::count_if(
|
||||
events.begin(), events.end(),
|
||||
[](auto event) { return event->event_subgraph_index == 1; });
|
||||
[](auto event) { return event->extra_event_metadata == 1; });
|
||||
int event_count_of_subgraph_two = std::count_if(
|
||||
events.begin(), events.end(),
|
||||
[](auto event) { return event->event_subgraph_index == 2; });
|
||||
[](auto event) { return event->extra_event_metadata == 2; });
|
||||
EXPECT_EQ(1, event_count_of_subgraph_zero);
|
||||
EXPECT_EQ(1, event_count_of_subgraph_one);
|
||||
EXPECT_EQ(0, event_count_of_subgraph_two);
|
||||
|
@ -209,13 +209,13 @@ TEST_F(ProfileSummarizerIfOpTest, TestIfFalse) {
|
|||
EXPECT_EQ(2, events.size());
|
||||
int event_count_of_subgraph_zero = std::count_if(
|
||||
events.begin(), events.end(),
|
||||
[](auto event) { return event->event_subgraph_index == 0; });
|
||||
[](auto event) { return event->extra_event_metadata == 0; });
|
||||
int event_count_of_subgraph_one = std::count_if(
|
||||
events.begin(), events.end(),
|
||||
[](auto event) { return event->event_subgraph_index == 1; });
|
||||
[](auto event) { return event->extra_event_metadata == 1; });
|
||||
int event_count_of_subgraph_two = std::count_if(
|
||||
events.begin(), events.end(),
|
||||
[](auto event) { return event->event_subgraph_index == 2; });
|
||||
[](auto event) { return event->extra_event_metadata == 2; });
|
||||
EXPECT_EQ(1, event_count_of_subgraph_zero);
|
||||
EXPECT_EQ(0, event_count_of_subgraph_one);
|
||||
EXPECT_EQ(1, event_count_of_subgraph_two);
|
||||
|
|
|
@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/profiling/profiler.h"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <chrono> // NOLINT(build/c++11)
|
||||
|
@ -20,7 +22,6 @@ limitations under the License.
|
|||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "tensorflow/lite/profiling/profiler.h"
|
||||
#include "tensorflow/lite/testing/util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
@ -55,6 +56,20 @@ TEST(ProfilerTest, NoProfilesAreCollectedWhenDisabled) {
|
|||
EXPECT_EQ(0, profile_events.size());
|
||||
}
|
||||
|
||||
TEST(ProfilerTest, NoProfilesAreCollectedWhenEventTypeUnsupported) {
|
||||
BufferedProfiler profiler(1024);
|
||||
tflite::Profiler* p = &profiler;
|
||||
p->AddEvent("Hello",
|
||||
Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT,
|
||||
/*start*/ 0, /*end*/ 1,
|
||||
/*event_metadata*/ 2);
|
||||
auto handler = p->BeginEvent(
|
||||
"begin", Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, 0);
|
||||
p->EndEvent(handler);
|
||||
auto profile_events = profiler.GetProfileEvents();
|
||||
EXPECT_EQ(0, profile_events.size());
|
||||
}
|
||||
|
||||
TEST(ProfilingTest, ProfilesAreCollected) {
|
||||
BufferedProfiler profiler(1024);
|
||||
profiler.StartProfiling();
|
||||
|
|
Loading…
Reference in New Issue