[Profiler] Add tf-function stats to be part of op_stats.

PiperOrigin-RevId: 308861068
Change-Id: Ibca41b8b9676e187c37aad284b6dfab73bb97e63
This commit is contained in:
A. Unique TensorFlower 2020-04-28 11:22:44 -07:00 committed by TensorFlower Gardener
parent e0606af65f
commit b091e42120
15 changed files with 716 additions and 21 deletions

View File

@ -209,10 +209,12 @@ cc_library(
":xplane_to_kernel_stats_db",
":xplane_to_op_metrics_db",
":xplane_to_step_events",
":xplane_to_tf_functions",
"//tensorflow/core:lib",
"//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
"//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:event_span",
"//tensorflow/core/profiler/utils:hardware_type_utils",
@ -229,6 +231,7 @@ tf_cc_test(
srcs = ["xplane_to_op_stats_test.cc"],
deps = [
":xplane_to_op_stats",
":xplane_to_tf_functions",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
@ -371,3 +374,43 @@ cc_library(
"//tensorflow/core/profiler/utils:xplane_visitor",
],
)
cc_library(
name = "xplane_to_tf_functions",
srcs = ["xplane_to_tf_functions.cc"],
hdrs = ["xplane_to_tf_functions.h"],
deps = [
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
"//tensorflow/core/profiler/utils:timespan",
"//tensorflow/core/profiler/utils:xplane_schema",
"//tensorflow/core/profiler/utils:xplane_utils",
"//tensorflow/core/profiler/utils:xplane_visitor",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",
],
)
tf_cc_test(
name = "xplane_to_tf_functions_test",
size = "small",
srcs = ["xplane_to_tf_functions_test.cc"],
deps = [
":xplane_to_tf_functions",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
"//tensorflow/core/profiler/utils:xplane_builder",
"//tensorflow/core/profiler/utils:xplane_schema",
"//tensorflow/core/profiler/utils:xplane_utils",
],
)

View File

@ -20,12 +20,13 @@ limitations under the License.
#include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h"
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
#include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
#include "tensorflow/core/profiler/utils/event_span.h"
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
@ -77,6 +78,24 @@ void SetRunEnvironment(int32 accelerator_count, RunEnvironment* env) {
env->set_device_core_count(accelerator_count);
}
void ProcessHostPlane(const XPlane* host_plane, bool use_device_step_events,
OpMetricsDb* op_metrics_db, StepEvents* step_events,
TfFunctionDb* tf_function_db) {
absl::flat_hash_map<int64, TfOp> tf_ops =
CollectTfOpsFromHostThreadsXPlane(*host_plane);
OpMetricsDbCombiner combiner(op_metrics_db);
XPlaneVisitor plane = CreateTfXPlaneVisitor(host_plane);
plane.ForEachLine([&](const XLineVisitor& line) {
ConsumeTfMetricsDbData(
ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner);
CombineStepEvents(ConvertHostThreadsXLineToStepEvents(
line, use_device_step_events, *step_events),
step_events);
CombineTfFunctionDb(ConvertHostThreadsXLineToTfFunctionDb(line),
tf_function_db);
});
}
} // namespace
OpStats ConvertXSpaceToOpStats(const XSpace& space) {
@ -112,12 +131,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space) {
// Convert a host plane.
bool has_device = !device_planes.empty();
if (host_plane) {
*op_stats.mutable_host_op_metrics_db() =
ConvertHostThreadsXPlaneToOpMetricsDb(*host_plane);
CombineStepEvents(
ConvertHostThreadsXPlaneToStepEvents(
*host_plane, /*use_device_step_events=*/has_device, step_events),
&step_events);
ProcessHostPlane(host_plane, has_device,
op_stats.mutable_host_op_metrics_db(), &step_events,
op_stats.mutable_tf_function_db());
}
StepEvents nonoverlapped_step_events = ToNonOverlappedStepEvents(step_events);
*op_stats.mutable_step_db() =

View File

@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/group_events.h"
@ -96,7 +97,7 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) {
CreateXEvent(&host_plane_builder, &tf_executor_thread,
HostEventType::kExecutorStateProcess, 20, 80,
{{StatType::kStepId, 0}});
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, {});
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70);
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
OpStats op_stats = ConvertXSpaceToOpStats(space);
@ -144,6 +145,42 @@ TEST(ConvertXPlaneToOpStats, GpuStepDbTest) {
EXPECT_EQ(precision_stats.compute_32bit_ps(), 40);
}
TEST(ConcertXPlaneToOpStats, TfFunctionTest) {
XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes());
host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(1);
std::string kFunctionName = "increment";
auto main_thread = host_plane_builder.GetOrCreateLine(0);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
10, 100, "traced-nonXla", 1);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
150, 20, "notTraced-nonXla", 1);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
200, 80, "traced-nonXla", 2);
OpStats op_stats = ConvertXSpaceToOpStats(space);
const TfFunctionDb& tf_function_db = op_stats.tf_function_db();
EXPECT_EQ(tf_function_db.tf_functions().size(), 1);
EXPECT_EQ(tf_function_db.tf_functions().count(kFunctionName), 1);
const TfFunction& tf_function =
tf_function_db.tf_functions().at(kFunctionName);
EXPECT_EQ(tf_function.total_tracing_count(), 2);
EXPECT_EQ(tf_function.compiler(), OTHER_COMPILER);
const auto& metrics = tf_function.metrics();
EXPECT_EQ(metrics.size(), 2);
EXPECT_EQ(metrics.count(TRACED_MODE), 1);
EXPECT_EQ(metrics.count(NOT_TRACED_MODE), 1);
const auto& traced_mode = metrics.at(TRACED_MODE);
EXPECT_EQ(traced_mode.count(), 2);
EXPECT_EQ(traced_mode.self_time_ps(), 180);
const auto& not_traced_mode = metrics.at(NOT_TRACED_MODE);
EXPECT_EQ(not_traced_mode.count(), 1);
EXPECT_EQ(not_traced_mode.self_time_ps(), 20);
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -0,0 +1,271 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include <stack>
#include "absl/algorithm/container.h"
#include "absl/container/flat_hash_map.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/strings/proto_serialization.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/timespan.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
namespace tensorflow {
namespace profiler {
namespace {
std::pair<TfFunctionExecutionMode, TfFunctionCompiler> Decode(
absl::string_view function_name, absl::string_view mode) {
// mode is one of ["eager", "concrete", "traced-xla", "traced-nonXla",
// "notTraced-xla", "notTraced-nonXla"]
if (mode == "eager") return {EAGER_MODE, INVALID_COMPILER};
if (mode == "concrete") return {CONCRETE_MODE, INVALID_COMPILER};
if (mode == "traced-xla") return {TRACED_MODE, XLA_COMPILER};
if (mode == "traced-nonXla") return {TRACED_MODE, OTHER_COMPILER};
if (mode == "notTraced-xla") return {NOT_TRACED_MODE, XLA_COMPILER};
if (mode == "notTraced-nonXla") return {NOT_TRACED_MODE, OTHER_COMPILER};
// Shouldn't reach here.
LOG(ERROR) << absl::StrCat("tf-function '", function_name,
"' has an unexpected execution mode '", mode, "'")
<< std::endl;
return {INVALID_MODE, INVALID_COMPILER};
DCHECK(false);
}
// Each invocation of a tf-function creates an ActivationRecord.
struct ActivationRecord {
std::string function_name; // name of the tf-function.
Timespan timespan; // timespan of this invocation.
TfFunctionExecutionMode execution_mode; // execution mode.
TfFunctionCompiler compiler; // compiler used.
int64 tracing_count; // the total tracing count of this function when this
// invocation happened.
uint64 children_duration_ps; // Sum of the duration of all (immediate)
// children tf-functions of this function.
ActivationRecord()
: function_name(""),
execution_mode(INVALID_MODE),
compiler(INVALID_COMPILER),
tracing_count(0),
children_duration_ps(0) {}
ActivationRecord(absl::string_view name, const Timespan& timespan,
TfFunctionExecutionMode exe_mode,
TfFunctionCompiler compiler, int64 tracing_cnt)
: function_name(std::string(name)),
timespan(timespan),
execution_mode(exe_mode),
compiler(compiler),
tracing_count(tracing_cnt),
children_duration_ps(0) {}
std::string DebugString() const {
return absl::StrCat("{", function_name, ", ",
TfFunctionExecutionMode_Name(execution_mode), ", ",
TfFunctionCompiler_Name(compiler),
", tracing_count:", tracing_count,
", children_duration:", children_duration_ps,
" ps, timespan:", timespan.DebugString(), "}");
}
};
// Entry or exit point of a tf-function.
struct EntryOrExit {
bool is_entry; // true for entry, false for exit.
int64 index; // index to the ActivationRecord.
uint64 timestamp_ps; // the time when this entry/exit happens.
EntryOrExit() : is_entry(false), index(-1), timestamp_ps(0) {}
EntryOrExit(bool is_entry, int64 index, uint64 timestamp_ps)
: is_entry(is_entry), index(index), timestamp_ps(timestamp_ps) {}
std::string DebugString() const {
std::string entry_or_exit = is_entry ? "entry, " : "exit, ";
return absl::StrCat("{", entry_or_exit, "idx:", index,
", timestamp:", timestamp_ps, "}");
}
};
TfFunctionCompiler CombineCompilers(TfFunctionCompiler a,
TfFunctionCompiler b) {
if (a == INVALID_COMPILER) return b;
if (b == INVALID_COMPILER) return a;
if (a == b) return a;
return MIXED_COMPILER;
}
void CombineTfFunctionMetrics(const TfFunctionMetrics& src,
TfFunctionMetrics* dst) {
dst->set_count(src.count() + dst->count());
dst->set_self_time_ps(src.self_time_ps() + dst->self_time_ps());
}
void CombineTfFunction(const TfFunction& src, TfFunction* dst) {
dst->set_total_tracing_count(
std::max(src.total_tracing_count(), dst->total_tracing_count()));
dst->set_compiler(CombineCompilers(src.compiler(), dst->compiler()));
for (const auto& mode_metrics : src.metrics()) {
int32 execution_mode = mode_metrics.first;
const TfFunctionMetrics& src_metrics = mode_metrics.second;
TfFunctionMetrics* dst_metrics =
gtl::FindOrNull(*dst->mutable_metrics(), execution_mode);
if (dst_metrics == nullptr) {
(*dst->mutable_metrics())[execution_mode] = src_metrics;
} else {
CombineTfFunctionMetrics(src_metrics, dst_metrics);
}
}
}
// Execution history of all tf-functions invoked.
class TfFunctionExecutions {
public:
explicit TfFunctionExecutions(const XLineVisitor& line) {
// Creates points_ and activations_ from line.
line.ForEachEvent([&](const XEventVisitor& event) {
std::string mode = "";
int64 tracing_count = 0;
event.ForEachStat([&mode, &tracing_count](const XStatVisitor& stat) {
if (stat.Type() == StatType::kTfFunctionCall)
mode = std::string(stat.StrValue());
if (stat.Type() == StatType::kTfFunctionTracingCount)
tracing_count = stat.IntValue();
});
if (mode.empty()) return;
// event is a tf-function.
int64 index = activations_.size();
auto timespan = event.GetTimespan();
auto mode_compiler = Decode(event.Name(), mode);
ActivationRecord activation_record =
ActivationRecord(event.Name(), timespan, mode_compiler.first,
mode_compiler.second, tracing_count);
activations_.push_back(activation_record);
EntryOrExit entry_point =
EntryOrExit(/*is_entry=*/true, index, timespan.begin_ps());
EntryOrExit exit_point =
EntryOrExit(/*is_entry=*/false, index, timespan.end_ps());
points_.push_back(entry_point);
points_.push_back(exit_point);
});
// Sorts points_ in ascending order of timestamps.
auto ascending_in_timestamp = [](const EntryOrExit& a,
const EntryOrExit& b) {
return a.timestamp_ps < b.timestamp_ps;
};
absl::c_sort(points_, ascending_in_timestamp);
// Calculates the children duration for each activation record.
CalculateChildrenDurations();
}
std::string DebugString() const {
std::string result = "\nActivations:\n";
for (auto i = 0; i < activations_.size(); i++) {
absl::StrAppend(&result, "[", i, "] ", activations_[i].DebugString(),
"\n");
}
absl::StrAppend(&result, "tf-function Entry/Exit Points:\n");
for (const auto& pt : points_) {
absl::StrAppend(&result, pt.DebugString(), "\n");
}
return result;
}
// Converts this execution history to a TfFunctionDb.
TfFunctionDb ConvertToTfFunctionDb() {
TfFunctionDb result;
for (const auto& record : activations_) {
TfFunction* fun = &(*result.mutable_tf_functions())[record.function_name];
fun->set_total_tracing_count(
std::max(static_cast<int64>(fun->total_tracing_count()),
record.tracing_count));
fun->set_compiler(CombineCompilers(fun->compiler(), record.compiler));
// The self-time of this function is the difference between the duration
// of this function and the duration of its children.
uint64 self_time_ps =
record.timespan.duration_ps() - record.children_duration_ps;
// Updates the metrics for this execution mode with this invocation.
TfFunctionMetrics* metrics =
&(*fun->mutable_metrics())[record.execution_mode];
metrics->set_count(metrics->count() + 1);
metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps);
}
return result;
}
// Calculates the children duration of every tf-function.
void CalculateChildrenDurations() {
std::stack<int64> call_stack;
for (const auto& pt : points_) {
if (pt.is_entry) {
// Function entry.
call_stack.push(pt.index);
} else {
// Function exit.
DCHECK(call_stack.top() == pt.index); // must be well nested.
uint64 call_duration = activations_[pt.index].timespan.duration_ps();
call_stack.pop();
if (!call_stack.empty()) {
// call_stack.top() is the parent tf-function; adds call_duration to
// its children_duration.
activations_[call_stack.top()].children_duration_ps += call_duration;
}
}
}
}
private:
// ActivationRecords for all tf-function invocations.
std::vector<ActivationRecord> activations_;
// Entry and exit points of all invocations.
std::vector<EntryOrExit> points_;
};
} // namespace
std::string DebugString(const TfFunctionDb tf_function_db) {
std::string str;
::tensorflow::protobuf::TextFormat::PrintToString(tf_function_db, &str);
return str;
}
void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst) {
for (const auto& name_function : src.tf_functions()) {
const auto& name = name_function.first;
const auto& src_fun = name_function.second;
TfFunction* dst_fun = gtl::FindOrNull(*dst->mutable_tf_functions(), name);
if (dst_fun == nullptr) {
(*dst->mutable_tf_functions())[name] = src_fun;
} else {
CombineTfFunction(src_fun, dst_fun);
}
}
}
TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line) {
TfFunctionExecutions tf_function_executions = TfFunctionExecutions(line);
return tf_function_executions.ConvertToTfFunctionDb();
}
} // namespace profiler
} // namespace tensorflow

View File

@ -0,0 +1,38 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
namespace tensorflow {
namespace profiler {
// Converts from the given XLine to a TfFunctionDb.
TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line);
// Returns a debugging string for the given TfFunctionDb.
std::string DebugString(const TfFunctionDb tf_function_db);
// Combines the tf-function statistics from src and dst into dst.
void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst);
} // namespace profiler
} // namespace tensorflow
#endif // TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_

View File

@ -0,0 +1,171 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
namespace tensorflow {
namespace profiler {
namespace {
const absl::string_view kEager = "eager";
const absl::string_view kConcrete = "concrete";
const absl::string_view kTracedNonXla = "traced-nonXla";
const absl::string_view kTracedXla = "traced-xla";
const absl::string_view kNotTracedNonXla = "notTraced-nonXla";
const absl::string_view kNotTracedXla = "notTraced-xla";
TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) {
TfFunctionDb result;
const XPlane* host_plane = FindPlaneWithName(space, kHostThreads);
if (host_plane) {
XPlaneVisitor plane = CreateTfXPlaneVisitor(host_plane);
plane.ForEachLine([&result](const XLineVisitor& line) {
CombineTfFunctionDb(ConvertHostThreadsXLineToTfFunctionDb(line), &result);
});
}
return result;
}
TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) {
XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes());
host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(2);
std::string kFunctionName = "decrement";
auto main_thread = host_plane_builder.GetOrCreateLine(0);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
10, 100, kTracedNonXla, 1);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
150, 20, kNotTracedNonXla, 2);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
200, 80, kTracedNonXla, 3);
auto other_thread = host_plane_builder.GetOrCreateLine(1);
CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName,
20, 100, kTracedNonXla, 2);
CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName,
160, 20, kNotTracedNonXla, 2);
CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName,
210, 80, kTracedXla, 4);
TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space);
EXPECT_EQ(tf_function_db.tf_functions().size(), 1);
EXPECT_EQ(tf_function_db.tf_functions().count(kFunctionName), 1);
const TfFunction& tf_function =
tf_function_db.tf_functions().at(kFunctionName);
EXPECT_EQ(tf_function.total_tracing_count(), 4);
EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER);
const auto& metrics = tf_function.metrics();
EXPECT_EQ(metrics.size(), 2);
EXPECT_EQ(metrics.count(TRACED_MODE), 1);
EXPECT_EQ(metrics.count(NOT_TRACED_MODE), 1);
const auto& traced_mode = metrics.at(TRACED_MODE);
EXPECT_EQ(traced_mode.count(), 4);
EXPECT_EQ(traced_mode.self_time_ps(), 360);
const auto& not_traced_mode = metrics.at(NOT_TRACED_MODE);
EXPECT_EQ(not_traced_mode.count(), 2);
EXPECT_EQ(not_traced_mode.self_time_ps(), 40);
}
TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes());
host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(1);
std::string kOuterFunctionName = "outer";
std::string kInnerFunctionName = "inner";
auto main_thread = host_plane_builder.GetOrCreateLine(0);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread,
kOuterFunctionName, 10, 100, kTracedNonXla, 1);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread,
kInnerFunctionName, 30, 40, kNotTracedXla, 0);
TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space);
EXPECT_EQ(tf_function_db.tf_functions().size(), 2);
EXPECT_EQ(tf_function_db.tf_functions().count(kOuterFunctionName), 1);
EXPECT_EQ(tf_function_db.tf_functions().count(kInnerFunctionName), 1);
const TfFunction& outer =
tf_function_db.tf_functions().at(kOuterFunctionName);
EXPECT_EQ(outer.total_tracing_count(), 1);
EXPECT_EQ(outer.compiler(), OTHER_COMPILER);
const auto& outer_metrics = outer.metrics();
EXPECT_EQ(outer_metrics.size(), 1);
EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1);
const auto& traced_mode = outer_metrics.at(TRACED_MODE);
EXPECT_EQ(traced_mode.count(), 1);
EXPECT_EQ(traced_mode.self_time_ps(), 60);
const TfFunction& inner =
tf_function_db.tf_functions().at(kInnerFunctionName);
EXPECT_EQ(inner.total_tracing_count(), 0);
EXPECT_EQ(inner.compiler(), XLA_COMPILER);
const auto& inner_metrics = inner.metrics();
EXPECT_EQ(inner_metrics.size(), 1);
EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1);
const auto& not_traced_mode = inner_metrics.at(NOT_TRACED_MODE);
EXPECT_EQ(not_traced_mode.count(), 1);
EXPECT_EQ(not_traced_mode.self_time_ps(), 40);
}
TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes());
host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(2);
std::string kEagerFunctionName = "i_am_eager";
std::string kConcreteFunctionName = "i_am_concrete";
auto main_thread = host_plane_builder.GetOrCreateLine(0);
CreateTfFunctionCallEvent(&host_plane_builder, &main_thread,
kEagerFunctionName, 10, 200, kEager);
auto other_thread = host_plane_builder.GetOrCreateLine(1);
CreateTfFunctionCallEvent(&host_plane_builder, &other_thread,
kConcreteFunctionName, 20, 40, kConcrete);
TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space);
EXPECT_EQ(tf_function_db.tf_functions().size(), 2);
EXPECT_EQ(tf_function_db.tf_functions().count(kEagerFunctionName), 1);
EXPECT_EQ(tf_function_db.tf_functions().count(kConcreteFunctionName), 1);
const TfFunction& eager =
tf_function_db.tf_functions().at(kEagerFunctionName);
EXPECT_EQ(eager.total_tracing_count(), 0);
EXPECT_EQ(eager.compiler(), INVALID_COMPILER);
const auto& eager_metrics = eager.metrics();
EXPECT_EQ(eager_metrics.size(), 1);
EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1);
const auto& eager_mode = eager_metrics.at(EAGER_MODE);
EXPECT_EQ(eager_mode.count(), 1);
EXPECT_EQ(eager_mode.self_time_ps(), 200);
const TfFunction& concrete =
tf_function_db.tf_functions().at(kConcreteFunctionName);
EXPECT_EQ(concrete.total_tracing_count(), 0);
EXPECT_EQ(concrete.compiler(), INVALID_COMPILER);
const auto& concrete_metrics = concrete.metrics();
EXPECT_EQ(concrete_metrics.size(), 1);
EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1);
const auto& concrete_mode = concrete_metrics.at(CONCRETE_MODE);
EXPECT_EQ(concrete_mode.count(), 1);
EXPECT_EQ(concrete_mode.self_time_ps(), 40);
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -77,6 +77,7 @@ tf_proto_library(
":kernel_stats_proto",
":op_metrics_proto",
":steps_db_proto",
":tf_function_proto",
],
visibility = [
":friends",
@ -90,6 +91,13 @@ tf_proto_library(
visibility = [":friends"],
)
tf_proto_library(
name = "tf_function_proto",
srcs = ["tf_function.proto"],
cc_api_version = 2,
visibility = [":friends"],
)
# This proto is deprecating and not guaranteed to be compatible across versions.
# Please don't refer in new project unless you are double confirmed.
tf_proto_library(

View File

@ -5,6 +5,7 @@ package tensorflow.profiler;
import "tensorflow/core/profiler/protobuf/kernel_stats.proto";
import "tensorflow/core/profiler/protobuf/op_metrics.proto";
import "tensorflow/core/profiler/protobuf/steps_db.proto";
import "tensorflow/core/profiler/protobuf/tf_function.proto";
// Performance environment, e.g the peak performance capabilities of the device.
message PerfEnv {
@ -104,6 +105,8 @@ message OpStats {
RunEnvironment run_environment = 5;
// Kernel stats results from all GPUs.
KernelStatsDb kernel_stats_db = 6;
// Statistics for all tf-functions.
TfFunctionDb tf_function_db = 8;
// Errors seen.
repeated string errors = 7;
}

View File

@ -0,0 +1,58 @@
syntax = "proto3";
package tensorflow.profiler;
// All possible execution modes of a tf-function.
enum TfFunctionExecutionMode {
// Yet to be set.
INVALID_MODE = 0;
// Eager execution.
EAGER_MODE = 1;
// Graph execution with tracing.
TRACED_MODE = 2;
// Graph execution without tracing.
NOT_TRACED_MODE = 3;
// Concrete function.
CONCRETE_MODE = 4;
}
// All possible compilers that can be used to compile a tf-function in the graph
// mode.
enum TfFunctionCompiler {
// Yet to be set.
INVALID_COMPILER = 0;
// Any other compiler.
OTHER_COMPILER = 1;
// If some instance of the function is compiled with XLA and some is compiled
// with Non-XLA, use "MIXED_COMPILER".
MIXED_COMPILER = 2;
// XLA compiler.
XLA_COMPILER = 3;
// MLIR compiler.
MLIR_COMPILER = 4;
}
// Metrics associated with a particular execution mode of a tf-function.
message TfFunctionMetrics {
// Number of invocations to the function in that execution mode.
uint64 count = 1;
// The sum of "self-execution" time of this function over those invocations.
uint64 self_time_ps = 2;
}
// Statistics for a tf-function.
message TfFunction {
// A map from each execution mode to its corresponding metrics.
map<int32, TfFunctionMetrics> metrics = 1;
// Total tracing count from the program's beginning (i.e. beyond the profiling
// period) of this tf-function.
int64 total_tracing_count = 2;
// Compiler used to compile this function.
TfFunctionCompiler compiler = 3;
}
// Statistics for all tf-functions.
message TfFunctionDb {
// A map from function name to the statistics of that function.
map<string, TfFunction> tf_functions = 1;
}

View File

@ -45,8 +45,7 @@ TEST(DerivedTimelineTest, HloModuleNameTest) {
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
auto first_event =
CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100);
first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kHloModule)),
kHloModuleName);
@ -54,7 +53,7 @@ TEST(DerivedTimelineTest, HloModuleNameTest) {
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300);
second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kHloModule)),
kHloModuleName);
@ -85,8 +84,7 @@ TEST(DerivedTimelineTest, TfOpLineTest) {
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
auto first_event =
CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100);
first_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
@ -94,7 +92,7 @@ TEST(DerivedTimelineTest, TfOpLineTest) {
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300);
second_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
@ -164,8 +162,7 @@ TEST(DerivedTimelineTest, TfOpNameScopeTest) {
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
auto first_event =
CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100);
first_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
@ -173,7 +170,7 @@ TEST(DerivedTimelineTest, TfOpNameScopeTest) {
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300);
second_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);

View File

@ -159,7 +159,7 @@ TEST(GroupEventsTest, EagerOpTest) {
// Eagerly executed CPU TF op.
CreateXEvent(&host_plane_builder, &main_thread,
HostEventType::kEagerKernelExecute, 120, 80, {});
CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {});
CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80);
XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane);
@ -208,7 +208,7 @@ TEST(GroupEventsTest, FunctionOpTest) {
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30,
{{StatType::kCorrelationId, 100}});
// CPU TF op executed inside tf.function.
CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {});
CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20);
XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane);

View File

@ -161,6 +161,8 @@ const StatTypeMap& GetStatTypeMap() {
{"hlo_module", kHloModule},
{"equation", kEquation},
{"is_eager", kIsEager},
{"tf_function_call", kTfFunctionCall},
{"tracing_count", kTfFunctionTracingCount},
// Performance counter related.
{"Raw Value", kRawValue},
{"Scaled Value", kScaledValue},

View File

@ -157,6 +157,8 @@ enum StatType {
kHloModule,
kEquation,
kIsEager,
kTfFunctionCall,
kTfFunctionTracingCount,
// Performance counter related.
kRawValue,
kScaledValue,

View File

@ -154,6 +154,23 @@ XEventBuilder CreateXEvent(
stats);
}
XEventBuilder CreateXEventWithStringViewMetadataValue(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
absl::string_view event_name, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
stats) {
auto event_builder = line_builder->AddEvent(
*plane_builder->GetOrCreateEventMetadata(event_name));
event_builder.SetOffsetPs(offset_ps);
event_builder.SetDurationPs(duration_ps);
for (const auto& stat_type_and_value : stats) {
event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata(
GetStatTypeStr(stat_type_and_value.first)),
stat_type_and_value.second);
}
return event_builder;
}
void RemovePlaneWithName(XSpace* space, absl::string_view name) {
auto* planes = space->mutable_planes();
planes->erase(
@ -290,5 +307,23 @@ uint64 GetStartTimestampNs(const XPlane& plane) {
return plane_timestamp;
}
void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder,
XLineBuilder* line_builder,
absl::string_view function_name, int64 offset_ps,
int64 duration_ps,
absl::string_view execution_mode,
int64 tracing_count) {
XEventBuilder event_builder = CreateXEventWithStringViewMetadataValue(
plane_builder, line_builder, function_name, offset_ps, duration_ps,
{{StatType::kTfFunctionCall, execution_mode}});
if (tracing_count >= 0) {
// Adds the tracing_count stats only if tracing_count is valid.
event_builder.AddStatValue(
*plane_builder->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kTfFunctionTracingCount)),
tracing_count);
}
}
} // namespace profiler
} // namespace tensorflow

View File

@ -46,16 +46,23 @@ void AddOrUpdateIntStat(int64 metadata_id, int64 value,
void AddOrUpdateStrStat(int64 metadata_id, absl::string_view value,
tensorflow::profiler::XEvent* event);
// Creates an XEvent with int64 stats.
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
absl::string_view event_name, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats);
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats = {});
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
HostEventType event_type, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats);
// Creates an XEvent with string stats.
XEventBuilder CreateXEventWithStringViewMetadataValue(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
absl::string_view event_name, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
stats);
void RemovePlaneWithName(XSpace* space, absl::string_view name);
void RemoveEmptyPlanes(XSpace* space);
void RemoveEmptyLines(XPlane* plane);
@ -87,6 +94,13 @@ void MergePlanes(const XPlane& src_plane, XPlane* dst_plane);
// timestamps. If zero line exists, return 0;
uint64 GetStartTimestampNs(const XPlane& plane);
// Creates a Xevent in the given plane & line for a tf-function.
void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder,
XLineBuilder* line_builder,
absl::string_view function_name, int64 offset_ps,
int64 duration_ps,
absl::string_view execution_mode,
int64 tracing_count = -1);
} // namespace profiler
} // namespace tensorflow