diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 9fa4bd99ba6..bc127966659 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -209,10 +209,12 @@ cc_library( ":xplane_to_kernel_stats_db", ":xplane_to_op_metrics_db", ":xplane_to_step_events", + ":xplane_to_tf_functions", "//tensorflow/core:lib", "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:hardware_type_utils", @@ -229,6 +231,7 @@ tf_cc_test( srcs = ["xplane_to_op_stats_test.cc"], deps = [ ":xplane_to_op_stats", + ":xplane_to_tf_functions", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", @@ -371,3 +374,43 @@ cc_library( "//tensorflow/core/profiler/utils:xplane_visitor", ], ) + +cc_library( + name = "xplane_to_tf_functions", + srcs = ["xplane_to_tf_functions.cc"], + hdrs = ["xplane_to_tf_functions.h"], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/utils:tf_xplane_visitor", + "//tensorflow/core/profiler/utils:timespan", + "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_utils", + "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + ], +) + +tf_cc_test( + name = "xplane_to_tf_functions_test", + size = "small", + srcs = ["xplane_to_tf_functions_test.cc"], + deps = [ + ":xplane_to_tf_functions", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", + "//tensorflow/core/profiler/utils:tf_xplane_visitor", + "//tensorflow/core/profiler/utils:xplane_builder", + "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_utils", + ], +) diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc index f1182e095e2..7fdd6ffd8cb 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc @@ -20,12 +20,13 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h" #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h" #include "tensorflow/core/profiler/convert/xplane_to_step_events.h" +#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/tf_function.pb.h" #include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" -#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" @@ -77,6 +78,24 @@ void SetRunEnvironment(int32 accelerator_count, RunEnvironment* env) { env->set_device_core_count(accelerator_count); } +void ProcessHostPlane(const XPlane* host_plane, bool use_device_step_events, + OpMetricsDb* op_metrics_db, StepEvents* step_events, + TfFunctionDb* tf_function_db) { + absl::flat_hash_map tf_ops = + CollectTfOpsFromHostThreadsXPlane(*host_plane); + OpMetricsDbCombiner combiner(op_metrics_db); + XPlaneVisitor plane = CreateTfXPlaneVisitor(host_plane); + plane.ForEachLine([&](const XLineVisitor& line) { + ConsumeTfMetricsDbData( + ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner); + CombineStepEvents(ConvertHostThreadsXLineToStepEvents( + line, use_device_step_events, *step_events), + step_events); + CombineTfFunctionDb(ConvertHostThreadsXLineToTfFunctionDb(line), + tf_function_db); + }); +} + } // namespace OpStats ConvertXSpaceToOpStats(const XSpace& space) { @@ -112,12 +131,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space) { // Convert a host plane. bool has_device = !device_planes.empty(); if (host_plane) { - *op_stats.mutable_host_op_metrics_db() = - ConvertHostThreadsXPlaneToOpMetricsDb(*host_plane); - CombineStepEvents( - ConvertHostThreadsXPlaneToStepEvents( - *host_plane, /*use_device_step_events=*/has_device, step_events), - &step_events); + ProcessHostPlane(host_plane, has_device, + op_stats.mutable_host_op_metrics_db(), &step_events, + op_stats.mutable_tf_function_db()); } StepEvents nonoverlapped_step_events = ToNonOverlappedStepEvents(step_events); *op_stats.mutable_step_db() = diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc index 69cf591284f..c7b140b6a67 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/utils/group_events.h" @@ -96,7 +97,7 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, {{StatType::kStepId, 0}}); - CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, {}); + CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); OpStats op_stats = ConvertXSpaceToOpStats(space); @@ -144,6 +145,42 @@ TEST(ConvertXPlaneToOpStats, GpuStepDbTest) { EXPECT_EQ(precision_stats.compute_32bit_ps(), 40); } +TEST(ConcertXPlaneToOpStats, TfFunctionTest) { + XSpace space; + XPlaneBuilder host_plane_builder(space.add_planes()); + host_plane_builder.SetName(kHostThreads); + host_plane_builder.ReserveLines(1); + std::string kFunctionName = "increment"; + + auto main_thread = host_plane_builder.GetOrCreateLine(0); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName, + 10, 100, "traced-nonXla", 1); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName, + 150, 20, "notTraced-nonXla", 1); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName, + 200, 80, "traced-nonXla", 2); + + OpStats op_stats = ConvertXSpaceToOpStats(space); + const TfFunctionDb& tf_function_db = op_stats.tf_function_db(); + + EXPECT_EQ(tf_function_db.tf_functions().size(), 1); + EXPECT_EQ(tf_function_db.tf_functions().count(kFunctionName), 1); + const TfFunction& tf_function = + tf_function_db.tf_functions().at(kFunctionName); + EXPECT_EQ(tf_function.total_tracing_count(), 2); + EXPECT_EQ(tf_function.compiler(), OTHER_COMPILER); + const auto& metrics = tf_function.metrics(); + EXPECT_EQ(metrics.size(), 2); + EXPECT_EQ(metrics.count(TRACED_MODE), 1); + EXPECT_EQ(metrics.count(NOT_TRACED_MODE), 1); + const auto& traced_mode = metrics.at(TRACED_MODE); + EXPECT_EQ(traced_mode.count(), 2); + EXPECT_EQ(traced_mode.self_time_ps(), 180); + const auto& not_traced_mode = metrics.at(NOT_TRACED_MODE); + EXPECT_EQ(not_traced_mode.count(), 1); + EXPECT_EQ(not_traced_mode.self_time_ps(), 20); +} + } // namespace } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc new file mode 100644 index 00000000000..ea57a53321c --- /dev/null +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc @@ -0,0 +1,271 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +You may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" + +#include + +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/timespan.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_utils.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" + +namespace tensorflow { +namespace profiler { + +namespace { + +std::pair Decode( + absl::string_view function_name, absl::string_view mode) { + // mode is one of ["eager", "concrete", "traced-xla", "traced-nonXla", + // "notTraced-xla", "notTraced-nonXla"] + if (mode == "eager") return {EAGER_MODE, INVALID_COMPILER}; + if (mode == "concrete") return {CONCRETE_MODE, INVALID_COMPILER}; + if (mode == "traced-xla") return {TRACED_MODE, XLA_COMPILER}; + if (mode == "traced-nonXla") return {TRACED_MODE, OTHER_COMPILER}; + if (mode == "notTraced-xla") return {NOT_TRACED_MODE, XLA_COMPILER}; + if (mode == "notTraced-nonXla") return {NOT_TRACED_MODE, OTHER_COMPILER}; + // Shouldn't reach here. + LOG(ERROR) << absl::StrCat("tf-function '", function_name, + "' has an unexpected execution mode '", mode, "'") + << std::endl; + return {INVALID_MODE, INVALID_COMPILER}; + DCHECK(false); +} + +// Each invocation of a tf-function creates an ActivationRecord. +struct ActivationRecord { + std::string function_name; // name of the tf-function. + Timespan timespan; // timespan of this invocation. + TfFunctionExecutionMode execution_mode; // execution mode. + TfFunctionCompiler compiler; // compiler used. + int64 tracing_count; // the total tracing count of this function when this + // invocation happened. + uint64 children_duration_ps; // Sum of the duration of all (immediate) + // children tf-functions of this function. + ActivationRecord() + : function_name(""), + execution_mode(INVALID_MODE), + compiler(INVALID_COMPILER), + tracing_count(0), + children_duration_ps(0) {} + ActivationRecord(absl::string_view name, const Timespan& timespan, + TfFunctionExecutionMode exe_mode, + TfFunctionCompiler compiler, int64 tracing_cnt) + : function_name(std::string(name)), + timespan(timespan), + execution_mode(exe_mode), + compiler(compiler), + tracing_count(tracing_cnt), + children_duration_ps(0) {} + std::string DebugString() const { + return absl::StrCat("{", function_name, ", ", + TfFunctionExecutionMode_Name(execution_mode), ", ", + TfFunctionCompiler_Name(compiler), + ", tracing_count:", tracing_count, + ", children_duration:", children_duration_ps, + " ps, timespan:", timespan.DebugString(), "}"); + } +}; + +// Entry or exit point of a tf-function. +struct EntryOrExit { + bool is_entry; // true for entry, false for exit. + int64 index; // index to the ActivationRecord. + uint64 timestamp_ps; // the time when this entry/exit happens. + EntryOrExit() : is_entry(false), index(-1), timestamp_ps(0) {} + EntryOrExit(bool is_entry, int64 index, uint64 timestamp_ps) + : is_entry(is_entry), index(index), timestamp_ps(timestamp_ps) {} + std::string DebugString() const { + std::string entry_or_exit = is_entry ? "entry, " : "exit, "; + return absl::StrCat("{", entry_or_exit, "idx:", index, + ", timestamp:", timestamp_ps, "}"); + } +}; + +TfFunctionCompiler CombineCompilers(TfFunctionCompiler a, + TfFunctionCompiler b) { + if (a == INVALID_COMPILER) return b; + if (b == INVALID_COMPILER) return a; + if (a == b) return a; + return MIXED_COMPILER; +} + +void CombineTfFunctionMetrics(const TfFunctionMetrics& src, + TfFunctionMetrics* dst) { + dst->set_count(src.count() + dst->count()); + dst->set_self_time_ps(src.self_time_ps() + dst->self_time_ps()); +} + +void CombineTfFunction(const TfFunction& src, TfFunction* dst) { + dst->set_total_tracing_count( + std::max(src.total_tracing_count(), dst->total_tracing_count())); + dst->set_compiler(CombineCompilers(src.compiler(), dst->compiler())); + for (const auto& mode_metrics : src.metrics()) { + int32 execution_mode = mode_metrics.first; + const TfFunctionMetrics& src_metrics = mode_metrics.second; + TfFunctionMetrics* dst_metrics = + gtl::FindOrNull(*dst->mutable_metrics(), execution_mode); + if (dst_metrics == nullptr) { + (*dst->mutable_metrics())[execution_mode] = src_metrics; + } else { + CombineTfFunctionMetrics(src_metrics, dst_metrics); + } + } +} + +// Execution history of all tf-functions invoked. +class TfFunctionExecutions { + public: + explicit TfFunctionExecutions(const XLineVisitor& line) { + // Creates points_ and activations_ from line. + line.ForEachEvent([&](const XEventVisitor& event) { + std::string mode = ""; + int64 tracing_count = 0; + event.ForEachStat([&mode, &tracing_count](const XStatVisitor& stat) { + if (stat.Type() == StatType::kTfFunctionCall) + mode = std::string(stat.StrValue()); + if (stat.Type() == StatType::kTfFunctionTracingCount) + tracing_count = stat.IntValue(); + }); + if (mode.empty()) return; + + // event is a tf-function. + int64 index = activations_.size(); + auto timespan = event.GetTimespan(); + auto mode_compiler = Decode(event.Name(), mode); + ActivationRecord activation_record = + ActivationRecord(event.Name(), timespan, mode_compiler.first, + mode_compiler.second, tracing_count); + activations_.push_back(activation_record); + EntryOrExit entry_point = + EntryOrExit(/*is_entry=*/true, index, timespan.begin_ps()); + EntryOrExit exit_point = + EntryOrExit(/*is_entry=*/false, index, timespan.end_ps()); + points_.push_back(entry_point); + points_.push_back(exit_point); + }); + + // Sorts points_ in ascending order of timestamps. + auto ascending_in_timestamp = [](const EntryOrExit& a, + const EntryOrExit& b) { + return a.timestamp_ps < b.timestamp_ps; + }; + absl::c_sort(points_, ascending_in_timestamp); + + // Calculates the children duration for each activation record. + CalculateChildrenDurations(); + } + + std::string DebugString() const { + std::string result = "\nActivations:\n"; + for (auto i = 0; i < activations_.size(); i++) { + absl::StrAppend(&result, "[", i, "] ", activations_[i].DebugString(), + "\n"); + } + absl::StrAppend(&result, "tf-function Entry/Exit Points:\n"); + for (const auto& pt : points_) { + absl::StrAppend(&result, pt.DebugString(), "\n"); + } + return result; + } + + // Converts this execution history to a TfFunctionDb. + TfFunctionDb ConvertToTfFunctionDb() { + TfFunctionDb result; + for (const auto& record : activations_) { + TfFunction* fun = &(*result.mutable_tf_functions())[record.function_name]; + fun->set_total_tracing_count( + std::max(static_cast(fun->total_tracing_count()), + record.tracing_count)); + fun->set_compiler(CombineCompilers(fun->compiler(), record.compiler)); + // The self-time of this function is the difference between the duration + // of this function and the duration of its children. + uint64 self_time_ps = + record.timespan.duration_ps() - record.children_duration_ps; + // Updates the metrics for this execution mode with this invocation. + TfFunctionMetrics* metrics = + &(*fun->mutable_metrics())[record.execution_mode]; + metrics->set_count(metrics->count() + 1); + metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps); + } + return result; + } + + // Calculates the children duration of every tf-function. + void CalculateChildrenDurations() { + std::stack call_stack; + for (const auto& pt : points_) { + if (pt.is_entry) { + // Function entry. + call_stack.push(pt.index); + } else { + // Function exit. + DCHECK(call_stack.top() == pt.index); // must be well nested. + uint64 call_duration = activations_[pt.index].timespan.duration_ps(); + call_stack.pop(); + if (!call_stack.empty()) { + // call_stack.top() is the parent tf-function; adds call_duration to + // its children_duration. + activations_[call_stack.top()].children_duration_ps += call_duration; + } + } + } + } + + private: + // ActivationRecords for all tf-function invocations. + std::vector activations_; + // Entry and exit points of all invocations. + std::vector points_; +}; + +} // namespace + +std::string DebugString(const TfFunctionDb tf_function_db) { + std::string str; + ::tensorflow::protobuf::TextFormat::PrintToString(tf_function_db, &str); + return str; +} + +void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst) { + for (const auto& name_function : src.tf_functions()) { + const auto& name = name_function.first; + const auto& src_fun = name_function.second; + TfFunction* dst_fun = gtl::FindOrNull(*dst->mutable_tf_functions(), name); + if (dst_fun == nullptr) { + (*dst->mutable_tf_functions())[name] = src_fun; + } else { + CombineTfFunction(src_fun, dst_fun); + } + } +} + +TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line) { + TfFunctionExecutions tf_function_executions = TfFunctionExecutions(line); + return tf_function_executions.ConvertToTfFunctionDb(); +} + +} // namespace profiler +} // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.h b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h new file mode 100644 index 00000000000..470b22d34b8 --- /dev/null +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_ +#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_ + +#include "tensorflow/core/profiler/protobuf/tf_function.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/xplane_visitor.h" + +namespace tensorflow { +namespace profiler { + +// Converts from the given XLine to a TfFunctionDb. +TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line); + +// Returns a debugging string for the given TfFunctionDb. +std::string DebugString(const TfFunctionDb tf_function_db); + +// Combines the tf-function statistics from src and dst into dst. +void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst); + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_ diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc new file mode 100644 index 00000000000..253ef1a74f9 --- /dev/null +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc @@ -0,0 +1,171 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h" + +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/profiler/protobuf/tf_function.pb.h" +#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/xplane_builder.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_utils.h" + +namespace tensorflow { +namespace profiler { +namespace { + +const absl::string_view kEager = "eager"; +const absl::string_view kConcrete = "concrete"; +const absl::string_view kTracedNonXla = "traced-nonXla"; +const absl::string_view kTracedXla = "traced-xla"; +const absl::string_view kNotTracedNonXla = "notTraced-nonXla"; +const absl::string_view kNotTracedXla = "notTraced-xla"; + +TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) { + TfFunctionDb result; + const XPlane* host_plane = FindPlaneWithName(space, kHostThreads); + if (host_plane) { + XPlaneVisitor plane = CreateTfXPlaneVisitor(host_plane); + plane.ForEachLine([&result](const XLineVisitor& line) { + CombineTfFunctionDb(ConvertHostThreadsXLineToTfFunctionDb(line), &result); + }); + } + return result; +} + +TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) { + XSpace space; + XPlaneBuilder host_plane_builder(space.add_planes()); + host_plane_builder.SetName(kHostThreads); + host_plane_builder.ReserveLines(2); + std::string kFunctionName = "decrement"; + + auto main_thread = host_plane_builder.GetOrCreateLine(0); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName, + 10, 100, kTracedNonXla, 1); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName, + 150, 20, kNotTracedNonXla, 2); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName, + 200, 80, kTracedNonXla, 3); + + auto other_thread = host_plane_builder.GetOrCreateLine(1); + CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName, + 20, 100, kTracedNonXla, 2); + CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName, + 160, 20, kNotTracedNonXla, 2); + CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName, + 210, 80, kTracedXla, 4); + + TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space); + EXPECT_EQ(tf_function_db.tf_functions().size(), 1); + EXPECT_EQ(tf_function_db.tf_functions().count(kFunctionName), 1); + const TfFunction& tf_function = + tf_function_db.tf_functions().at(kFunctionName); + EXPECT_EQ(tf_function.total_tracing_count(), 4); + EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER); + const auto& metrics = tf_function.metrics(); + EXPECT_EQ(metrics.size(), 2); + EXPECT_EQ(metrics.count(TRACED_MODE), 1); + EXPECT_EQ(metrics.count(NOT_TRACED_MODE), 1); + const auto& traced_mode = metrics.at(TRACED_MODE); + EXPECT_EQ(traced_mode.count(), 4); + EXPECT_EQ(traced_mode.self_time_ps(), 360); + const auto& not_traced_mode = metrics.at(NOT_TRACED_MODE); + EXPECT_EQ(not_traced_mode.count(), 2); + EXPECT_EQ(not_traced_mode.self_time_ps(), 40); +} + +TEST(ConvertXPlaneToTfFunctions, NestedFunctions) { + XSpace space; + XPlaneBuilder host_plane_builder(space.add_planes()); + host_plane_builder.SetName(kHostThreads); + host_plane_builder.ReserveLines(1); + std::string kOuterFunctionName = "outer"; + std::string kInnerFunctionName = "inner"; + + auto main_thread = host_plane_builder.GetOrCreateLine(0); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, + kOuterFunctionName, 10, 100, kTracedNonXla, 1); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, + kInnerFunctionName, 30, 40, kNotTracedXla, 0); + TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space); + EXPECT_EQ(tf_function_db.tf_functions().size(), 2); + EXPECT_EQ(tf_function_db.tf_functions().count(kOuterFunctionName), 1); + EXPECT_EQ(tf_function_db.tf_functions().count(kInnerFunctionName), 1); + const TfFunction& outer = + tf_function_db.tf_functions().at(kOuterFunctionName); + EXPECT_EQ(outer.total_tracing_count(), 1); + EXPECT_EQ(outer.compiler(), OTHER_COMPILER); + const auto& outer_metrics = outer.metrics(); + EXPECT_EQ(outer_metrics.size(), 1); + EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1); + const auto& traced_mode = outer_metrics.at(TRACED_MODE); + EXPECT_EQ(traced_mode.count(), 1); + EXPECT_EQ(traced_mode.self_time_ps(), 60); + const TfFunction& inner = + tf_function_db.tf_functions().at(kInnerFunctionName); + EXPECT_EQ(inner.total_tracing_count(), 0); + EXPECT_EQ(inner.compiler(), XLA_COMPILER); + const auto& inner_metrics = inner.metrics(); + EXPECT_EQ(inner_metrics.size(), 1); + EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1); + const auto& not_traced_mode = inner_metrics.at(NOT_TRACED_MODE); + EXPECT_EQ(not_traced_mode.count(), 1); + EXPECT_EQ(not_traced_mode.self_time_ps(), 40); +} + +TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) { + XSpace space; + XPlaneBuilder host_plane_builder(space.add_planes()); + host_plane_builder.SetName(kHostThreads); + host_plane_builder.ReserveLines(2); + std::string kEagerFunctionName = "i_am_eager"; + std::string kConcreteFunctionName = "i_am_concrete"; + + auto main_thread = host_plane_builder.GetOrCreateLine(0); + CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, + kEagerFunctionName, 10, 200, kEager); + auto other_thread = host_plane_builder.GetOrCreateLine(1); + CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, + kConcreteFunctionName, 20, 40, kConcrete); + TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space); + EXPECT_EQ(tf_function_db.tf_functions().size(), 2); + EXPECT_EQ(tf_function_db.tf_functions().count(kEagerFunctionName), 1); + EXPECT_EQ(tf_function_db.tf_functions().count(kConcreteFunctionName), 1); + const TfFunction& eager = + tf_function_db.tf_functions().at(kEagerFunctionName); + EXPECT_EQ(eager.total_tracing_count(), 0); + EXPECT_EQ(eager.compiler(), INVALID_COMPILER); + const auto& eager_metrics = eager.metrics(); + EXPECT_EQ(eager_metrics.size(), 1); + EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1); + const auto& eager_mode = eager_metrics.at(EAGER_MODE); + EXPECT_EQ(eager_mode.count(), 1); + EXPECT_EQ(eager_mode.self_time_ps(), 200); + const TfFunction& concrete = + tf_function_db.tf_functions().at(kConcreteFunctionName); + EXPECT_EQ(concrete.total_tracing_count(), 0); + EXPECT_EQ(concrete.compiler(), INVALID_COMPILER); + const auto& concrete_metrics = concrete.metrics(); + EXPECT_EQ(concrete_metrics.size(), 1); + EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1); + const auto& concrete_mode = concrete_metrics.at(CONCRETE_MODE); + EXPECT_EQ(concrete_mode.count(), 1); + EXPECT_EQ(concrete_mode.self_time_ps(), 40); +} + +} // namespace +} // namespace profiler +} // namespace tensorflow diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD index 7ebfd5002c7..b102fe2ec25 100644 --- a/tensorflow/core/profiler/protobuf/BUILD +++ b/tensorflow/core/profiler/protobuf/BUILD @@ -77,6 +77,7 @@ tf_proto_library( ":kernel_stats_proto", ":op_metrics_proto", ":steps_db_proto", + ":tf_function_proto", ], visibility = [ ":friends", @@ -90,6 +91,13 @@ tf_proto_library( visibility = [":friends"], ) +tf_proto_library( + name = "tf_function_proto", + srcs = ["tf_function.proto"], + cc_api_version = 2, + visibility = [":friends"], +) + # This proto is deprecating and not guaranteed to be compatible across versions. # Please don't refer in new project unless you are double confirmed. tf_proto_library( diff --git a/tensorflow/core/profiler/protobuf/op_stats.proto b/tensorflow/core/profiler/protobuf/op_stats.proto index 2e3ace87adf..aa7cd563a33 100644 --- a/tensorflow/core/profiler/protobuf/op_stats.proto +++ b/tensorflow/core/profiler/protobuf/op_stats.proto @@ -5,6 +5,7 @@ package tensorflow.profiler; import "tensorflow/core/profiler/protobuf/kernel_stats.proto"; import "tensorflow/core/profiler/protobuf/op_metrics.proto"; import "tensorflow/core/profiler/protobuf/steps_db.proto"; +import "tensorflow/core/profiler/protobuf/tf_function.proto"; // Performance environment, e.g the peak performance capabilities of the device. message PerfEnv { @@ -104,6 +105,8 @@ message OpStats { RunEnvironment run_environment = 5; // Kernel stats results from all GPUs. KernelStatsDb kernel_stats_db = 6; + // Statistics for all tf-functions. + TfFunctionDb tf_function_db = 8; // Errors seen. repeated string errors = 7; } diff --git a/tensorflow/core/profiler/protobuf/tf_function.proto b/tensorflow/core/profiler/protobuf/tf_function.proto new file mode 100644 index 00000000000..fe07c00c8d3 --- /dev/null +++ b/tensorflow/core/profiler/protobuf/tf_function.proto @@ -0,0 +1,58 @@ +syntax = "proto3"; + +package tensorflow.profiler; + +// All possible execution modes of a tf-function. +enum TfFunctionExecutionMode { + // Yet to be set. + INVALID_MODE = 0; + // Eager execution. + EAGER_MODE = 1; + // Graph execution with tracing. + TRACED_MODE = 2; + // Graph execution without tracing. + NOT_TRACED_MODE = 3; + // Concrete function. + CONCRETE_MODE = 4; +} + +// All possible compilers that can be used to compile a tf-function in the graph +// mode. +enum TfFunctionCompiler { + // Yet to be set. + INVALID_COMPILER = 0; + // Any other compiler. + OTHER_COMPILER = 1; + // If some instance of the function is compiled with XLA and some is compiled + // with Non-XLA, use "MIXED_COMPILER". + MIXED_COMPILER = 2; + // XLA compiler. + XLA_COMPILER = 3; + // MLIR compiler. + MLIR_COMPILER = 4; +} + +// Metrics associated with a particular execution mode of a tf-function. +message TfFunctionMetrics { + // Number of invocations to the function in that execution mode. + uint64 count = 1; + // The sum of "self-execution" time of this function over those invocations. + uint64 self_time_ps = 2; +} + +// Statistics for a tf-function. +message TfFunction { + // A map from each execution mode to its corresponding metrics. + map metrics = 1; + // Total tracing count from the program's beginning (i.e. beyond the profiling + // period) of this tf-function. + int64 total_tracing_count = 2; + // Compiler used to compile this function. + TfFunctionCompiler compiler = 3; +} + +// Statistics for all tf-functions. +message TfFunctionDb { + // A map from function name to the statistics of that function. + map tf_functions = 1; +} diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc index b92f4c5f801..f3e6b66f087 100644 --- a/tensorflow/core/profiler/utils/derived_timeline_test.cc +++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc @@ -45,8 +45,7 @@ TEST(DerivedTimelineTest, HloModuleNameTest) { XPlane* plane = space.add_planes(); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); - auto first_event = - CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {}); + auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100); first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kHloModule)), kHloModuleName); @@ -54,7 +53,7 @@ TEST(DerivedTimelineTest, HloModuleNameTest) { GetStatTypeStr(StatType::kKernelDetails)), kKernelDetails); auto second_event = - CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {}); + CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300); second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kHloModule)), kHloModuleName); @@ -85,8 +84,7 @@ TEST(DerivedTimelineTest, TfOpLineTest) { XPlane* plane = space.add_planes(); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); - auto first_event = - CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {}); + auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100); first_event.AddStatValue( *plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)), kTfOpName); @@ -94,7 +92,7 @@ TEST(DerivedTimelineTest, TfOpLineTest) { GetStatTypeStr(StatType::kKernelDetails)), kKernelDetails); auto second_event = - CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {}); + CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300); second_event.AddStatValue( *plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)), kTfOpName); @@ -164,8 +162,7 @@ TEST(DerivedTimelineTest, TfOpNameScopeTest) { XPlane* plane = space.add_planes(); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); - auto first_event = - CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {}); + auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100); first_event.AddStatValue( *plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)), kTfOpName); @@ -173,7 +170,7 @@ TEST(DerivedTimelineTest, TfOpNameScopeTest) { GetStatTypeStr(StatType::kKernelDetails)), kKernelDetails); auto second_event = - CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {}); + CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300); second_event.AddStatValue( *plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)), kTfOpName); diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index f3f58bf8701..6b6a0d2a19d 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -159,7 +159,7 @@ TEST(GroupEventsTest, EagerOpTest) { // Eagerly executed CPU TF op. CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kEagerKernelExecute, 120, 80, {}); - CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {}); + CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); @@ -208,7 +208,7 @@ TEST(GroupEventsTest, FunctionOpTest) { CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30, {{StatType::kCorrelationId, 100}}); // CPU TF op executed inside tf.function. - CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {}); + CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index 0a2cc52c0c0..af0008186bc 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -161,6 +161,8 @@ const StatTypeMap& GetStatTypeMap() { {"hlo_module", kHloModule}, {"equation", kEquation}, {"is_eager", kIsEager}, + {"tf_function_call", kTfFunctionCall}, + {"tracing_count", kTfFunctionTracingCount}, // Performance counter related. {"Raw Value", kRawValue}, {"Scaled Value", kScaledValue}, diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index 432c948582d..e8c377b6e3d 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -157,6 +157,8 @@ enum StatType { kHloModule, kEquation, kIsEager, + kTfFunctionCall, + kTfFunctionTracingCount, // Performance counter related. kRawValue, kScaledValue, diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc index df1d5fc6e7a..3fe3cc96d85 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.cc +++ b/tensorflow/core/profiler/utils/xplane_utils.cc @@ -154,6 +154,23 @@ XEventBuilder CreateXEvent( stats); } +XEventBuilder CreateXEventWithStringViewMetadataValue( + XPlaneBuilder* plane_builder, XLineBuilder* line_builder, + absl::string_view event_name, int64 offset_ps, int64 duration_ps, + const absl::flat_hash_map& + stats) { + auto event_builder = line_builder->AddEvent( + *plane_builder->GetOrCreateEventMetadata(event_name)); + event_builder.SetOffsetPs(offset_ps); + event_builder.SetDurationPs(duration_ps); + for (const auto& stat_type_and_value : stats) { + event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata( + GetStatTypeStr(stat_type_and_value.first)), + stat_type_and_value.second); + } + return event_builder; +} + void RemovePlaneWithName(XSpace* space, absl::string_view name) { auto* planes = space->mutable_planes(); planes->erase( @@ -290,5 +307,23 @@ uint64 GetStartTimestampNs(const XPlane& plane) { return plane_timestamp; } +void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder, + XLineBuilder* line_builder, + absl::string_view function_name, int64 offset_ps, + int64 duration_ps, + absl::string_view execution_mode, + int64 tracing_count) { + XEventBuilder event_builder = CreateXEventWithStringViewMetadataValue( + plane_builder, line_builder, function_name, offset_ps, duration_ps, + {{StatType::kTfFunctionCall, execution_mode}}); + if (tracing_count >= 0) { + // Adds the tracing_count stats only if tracing_count is valid. + event_builder.AddStatValue( + *plane_builder->GetOrCreateStatMetadata( + GetStatTypeStr(StatType::kTfFunctionTracingCount)), + tracing_count); + } +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/xplane_utils.h b/tensorflow/core/profiler/utils/xplane_utils.h index 41035a65214..a98e37f96ea 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.h +++ b/tensorflow/core/profiler/utils/xplane_utils.h @@ -46,16 +46,23 @@ void AddOrUpdateIntStat(int64 metadata_id, int64 value, void AddOrUpdateStrStat(int64 metadata_id, absl::string_view value, tensorflow::profiler::XEvent* event); +// Creates an XEvent with int64 stats. XEventBuilder CreateXEvent( XPlaneBuilder* plane_builder, XLineBuilder* line_builder, absl::string_view event_name, int64 offset_ps, int64 duration_ps, - const absl::flat_hash_map& stats); - + const absl::flat_hash_map& stats = {}); XEventBuilder CreateXEvent( XPlaneBuilder* plane_builder, XLineBuilder* line_builder, HostEventType event_type, int64 offset_ps, int64 duration_ps, const absl::flat_hash_map& stats); +// Creates an XEvent with string stats. +XEventBuilder CreateXEventWithStringViewMetadataValue( + XPlaneBuilder* plane_builder, XLineBuilder* line_builder, + absl::string_view event_name, int64 offset_ps, int64 duration_ps, + const absl::flat_hash_map& + stats); + void RemovePlaneWithName(XSpace* space, absl::string_view name); void RemoveEmptyPlanes(XSpace* space); void RemoveEmptyLines(XPlane* plane); @@ -87,6 +94,13 @@ void MergePlanes(const XPlane& src_plane, XPlane* dst_plane); // timestamps. If zero line exists, return 0; uint64 GetStartTimestampNs(const XPlane& plane); +// Creates a Xevent in the given plane & line for a tf-function. +void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder, + XLineBuilder* line_builder, + absl::string_view function_name, int64 offset_ps, + int64 duration_ps, + absl::string_view execution_mode, + int64 tracing_count = -1); } // namespace profiler } // namespace tensorflow