From 8c30da064f56c636221e7e5ee3980807274693ad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 Sep 2020 12:42:34 -0700 Subject: [PATCH] Use OpStats combiner in profiler_wrapper.cc PiperOrigin-RevId: 333350927 Change-Id: Idb7bb344253f32326dbddf290e68f23a41827d6c --- tensorflow/core/profiler/convert/BUILD | 2 + .../profiler/convert/xplane_to_op_stats.cc | 41 +++++ .../profiler/convert/xplane_to_op_stats.h | 9 + .../convert/xplane_to_op_stats_test.cc | 83 +++++++++ tensorflow/python/profiler/internal/BUILD | 2 + .../profiler/internal/profiler_wrapper.cc | 165 ++++++++++++------ 6 files changed, 251 insertions(+), 51 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 1af3b170b18..ad5f95dc214 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -232,6 +232,7 @@ cc_library( hdrs = ["xplane_to_op_stats.h"], deps = [ ":op_metrics_db_combiner", + ":op_stats_combiner", ":step_events_to_steps_db", ":xplane_to_kernel_stats_db", ":xplane_to_op_metrics_db", @@ -249,6 +250,7 @@ cc_library( "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:kernel_stats_utils", + "//tensorflow/core/profiler/utils:step_intersection", "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:xplane_schema", diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc index 47a0062884a..9ca784c01bb 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h" +#include "tensorflow/core/profiler/convert/op_stats_combiner.h" #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h" #include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h" #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h" @@ -38,6 +39,7 @@ limitations under the License. #include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" +#include "tensorflow/core/profiler/utils/step_intersection.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" @@ -202,5 +204,44 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space, return op_stats; } +Status ConvertMultiXSpacesToCombinedOpStats( + const std::vector& xspace_paths, const OpStatsOptions& options, + OpStats* combined_op_stats) { + // A shortcut code path for a single XSpace. There is no need to merge OpStats + // if there is only a single XSpace. + if (xspace_paths.size() == 1) { + XSpace xspace; + Status status = ReadBinaryProto(Env::Default(), xspace_paths[0], &xspace); + if (!status.ok()) return status; + *combined_op_stats = ConvertXSpaceToOpStats(xspace, options); + return Status::OK(); + } + + // Read multiple XSpaces and convert to multiple OpStats. + std::vector all_op_stats; + for (const std::string& xspace_path : xspace_paths) { + XSpace xspace; + Status status = ReadBinaryProto(Env::Default(), xspace_path, &xspace); + if (!status.ok()) return status; + all_op_stats.push_back(ConvertXSpaceToOpStats(xspace, options)); + } + + // Combine OpStats. + std::vector all_op_stats_info; + all_op_stats_info.reserve(all_op_stats.size()); + for (int i = 0; i < all_op_stats.size(); i++) { + all_op_stats_info.emplace_back( + &all_op_stats[i], + ParseHardwareType(all_op_stats[i].run_environment().device_type()), i); + } + + // Do not limit the maximum number of steps during the merge of OpStats. + StepIntersection step_intersection = + ComputeStepIntersectionToMergeOpStats(all_op_stats_info, kuint32max); + CombineAllOpStats(all_op_stats_info, step_intersection, combined_op_stats); + + return Status::OK(); +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.h b/tensorflow/core/profiler/convert/xplane_to_op_stats.h index adad8296307..09ed246e766 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.h +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_ #include "absl/container/flat_hash_set.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" @@ -45,6 +46,14 @@ PerfEnv MakePerfEnv(double peak_tera_flops_per_second, // Extracts PerfEnv from XPlane stats. PerfEnv GetPerfEnvFromXPlane(const XPlane& device_plane); +// Reads multiple XSpaces from , convert them to OpStats, and +// combine them to a single OpStats in . +// Return the first error status during conversion, or return Status::OK() if +// there is no error. +Status ConvertMultiXSpacesToCombinedOpStats( + const std::vector& xspace_paths, const OpStatsOptions& options, + OpStats* combined_op_stats); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc index 68a6e511c7c..9c677119229 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc @@ -16,6 +16,9 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" #include "absl/strings/str_cat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/path.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/diagnostics.pb.h" @@ -175,6 +178,86 @@ TEST(ConvertXPlaneToOpStats, PropagateAndDedupErrors) { EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0)); } +// Helper function to build a XSpace and store it to test directory. +void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir, + const std::string& xspace_name) { + constexpr int64 kStepNum = 123; + constexpr int64 kStepId = 456; + // Create a host only XSpace for test. + XSpace xspace; + XPlaneBuilder host_plane_builder(GetOrCreateHostXPlane(&xspace)); + host_plane_builder.ReserveLines(2); + + auto main_thread = host_plane_builder.GetOrCreateLine(0); + CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, + 0, 100, {{StatType::kStepNum, kStepNum}}); + CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, + 10, 90, {{StatType::kStepId, kStepId}}); + + auto executor_thread = host_plane_builder.GetOrCreateLine(1); + CreateXEvent(&host_plane_builder, &executor_thread, + HostEventType::kExecutorStateProcess, 20, 80, + {{StatType::kStepId, kStepId}}); + // Create a TensorFlow op that runs for 70 ps. + CreateXEvent(&host_plane_builder, &executor_thread, "aaa:bbb", 30, 70); + GroupTfEvents(&xspace, /*group_metadata_map=*/nullptr); + + TF_CHECK_OK( + WriteBinaryProto(test_env, io::JoinPath(test_dir, xspace_name), xspace)) + << "Failed to write binary XSpace to file: " << xspace_name; +} + +TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) { + // Initialize environment and directory for testing. + Env* test_env = Env::Default(); + std::string test_dir = io::JoinPath(testing::TmpDir(), "test_dir"); + TF_CHECK_OK(test_env->CreateDir(test_dir)) + << "Failed to create test directory: " << test_dir; + + const std::string xspace1 = "xspace1.pb"; + const std::string xspace2 = "xspace2.pb"; + BuildAndStoreXSpaceForTest(test_env, test_dir, xspace1); + BuildAndStoreXSpaceForTest(test_env, test_dir, xspace2); + + std::vector xspace_paths; + xspace_paths.push_back(io::JoinPath(test_dir, xspace1)); + xspace_paths.push_back(io::JoinPath(test_dir, xspace2)); + OpStatsOptions options; + options.generate_op_metrics_db = true; + options.generate_step_db = true; + OpStats combined_op_stats; + + TF_CHECK_OK(ConvertMultiXSpacesToCombinedOpStats(xspace_paths, options, + &combined_op_stats)) + << "Failed to convert multi XSpace to OpStats"; + + // Result OpStats has 2 Host Ops, "IDLE" and "aaa:bbb". + ASSERT_EQ(combined_op_stats.host_op_metrics_db().metrics_db_size(), 2); + const auto& metric = combined_op_stats.host_op_metrics_db().metrics_db(1); + EXPECT_EQ(metric.name(), "aaa"); + EXPECT_EQ(metric.category(), "bbb"); + // Each host has the HostOp "aaa:bbb" running for 70 ps, so the combined + // OpStats has "aaa:bbb" running for 140 ps in total. + EXPECT_EQ(metric.self_time_ps(), 140); + + // Result OpStats has 1 step, 2 cores. + ASSERT_EQ(combined_op_stats.step_db().step_sequence_size(), 1); + ASSERT_EQ( + combined_op_stats.step_db().step_sequence(0).step_info_per_core_size(), + 2); + const auto& step_info_per_core = + combined_op_stats.step_db().step_sequence(0).step_info_per_core(); + // global_core_id is computed using: 1000 * host_id + local_core_id. + EXPECT_TRUE(step_info_per_core.contains(1)); + EXPECT_TRUE(step_info_per_core.contains(1001)); + + // Tear down environment and directory for testing. + int64 undeleted_files, undeleted_dirs; + TF_CHECK_OK( + test_env->DeleteRecursively(test_dir, &undeleted_files, &undeleted_dirs)) + << "Failed to delete test directory: " << test_dir; +} + } // namespace } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD index 8cdf8faaaf5..9b24e17b5c2 100644 --- a/tensorflow/python/profiler/internal/BUILD +++ b/tensorflow/python/profiler/internal/BUILD @@ -127,6 +127,8 @@ tf_python_pybind_extension( "//tensorflow/core/profiler/convert:xplane_to_op_stats", "//tensorflow/core/profiler/convert:xplane_to_trace_events", "//tensorflow/core/profiler/lib:profiler_session_headers", + "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/rpc:profiler_server_headers", "//tensorflow/core/profiler/rpc/client:capture_profile", "//tensorflow/core/profiler/rpc/client:save_profile", diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc index b58f9b98776..6b0a095abea 100644 --- a/tensorflow/python/profiler/internal/profiler_wrapper.cc +++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc @@ -18,8 +18,10 @@ limitations under the License. #include "absl/memory/memory.h" #include "absl/strings/match.h" #include "absl/strings/numbers.h" +#include "pybind11/cast.h" #include "pybind11/pybind11.h" #include "pybind11/pytypes.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" @@ -32,6 +34,8 @@ limitations under the License. #include "tensorflow/core/profiler/lib/profiler_session.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/rpc/client/capture_profile.h" #include "tensorflow/core/profiler/rpc/client/save_profile.h" #include "tensorflow/core/profiler/rpc/profiler_server.h" @@ -114,6 +118,15 @@ class ProfilerSessionWrapper { tensorflow::string logdir_; }; +// Converts a pybind list of XSpace paths to a cpp vector. +std::vector GetXSpacePaths(const py::list& python_paths) { + std::vector cpp_paths; + for (py::handle obj : python_paths) { + cpp_paths.push_back(std::string(py::cast(obj))); + } + return cpp_paths; +} + } // namespace PYBIND11_MODULE(_pywrap_profiler, m) { @@ -171,71 +184,121 @@ PYBIND11_MODULE(_pywrap_profiler, m) { return content; }); - m.def("xspace_to_trace_events", [](const py::bytes& serialized_xspace_proto) { - tensorflow::string content; + m.def("xspace_to_trace_events", [](const py::list& xspace_path_list) { + std::vector xspace_paths = GetXSpacePaths(xspace_path_list); + if (xspace_paths.size() != 1) { + LOG(WARNING) << "Trace events tool expects only 1 XSpace path but gets " + << xspace_paths.size(); + return py::make_tuple(py::bytes(), py::bool_(false)); + } tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(std::string(serialized_xspace_proto)); + tensorflow::Status status = tensorflow::ReadBinaryProto( + tensorflow::Env::Default(), xspace_paths[0], &xspace); + if (!status.ok()) { + LOG(WARNING) << "Could not read XSpace for trace events: " + << xspace_paths[0]; + return py::make_tuple(py::bytes(), py::bool_(false)); + } + tensorflow::string content; tensorflow::profiler::ConvertXSpaceToTraceEventsString(xspace, &content); - return py::bytes(content); + return py::make_tuple(py::bytes(content), py::bool_(true)); }); - m.def("xspace_to_overview_page", - [](const py::bytes& serialized_xspace_proto) { - tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(std::string(serialized_xspace_proto)); - tensorflow::profiler::OpStatsOptions options; - options.generate_kernel_stats_db = true; - options.generate_op_metrics_db = true; - options.generate_step_db = true; - // TODO(profiler): xspace should tell whether this is sampling mode. - tensorflow::profiler::OverviewPage overview_page = - tensorflow::profiler::ConvertOpStatsToOverviewPage( - ConvertXSpaceToOpStats(xspace, options)); - return py::bytes(overview_page.SerializeAsString()); - }); + m.def("xspace_to_overview_page", [](const py::list& xspace_path_list) { + std::vector xspace_paths = GetXSpacePaths(xspace_path_list); + tensorflow::profiler::OpStatsOptions options; + options.generate_kernel_stats_db = true; + options.generate_op_metrics_db = true; + options.generate_step_db = true; + tensorflow::profiler::OpStats combined_op_stats; + tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats( + xspace_paths, options, &combined_op_stats); + if (!status.ok()) { + LOG(WARNING) << "Could not generate OpStats for overview page. Error: " + << status.error_message(); + return py::make_tuple(py::bytes(), py::bool_(false)); + } + // TODO(profiler): xspace should tell whether this is sampling mode. + tensorflow::profiler::OverviewPage overview_page = + tensorflow::profiler::ConvertOpStatsToOverviewPage(combined_op_stats); + return py::make_tuple(py::bytes(overview_page.SerializeAsString()), + py::bool_(true)); + }); - m.def("xspace_to_input_pipeline", - [](const py::bytes& serialized_xspace_proto) { - tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(std::string(serialized_xspace_proto)); - tensorflow::profiler::OpStatsOptions options; - options.generate_op_metrics_db = true; - options.generate_step_db = true; - tensorflow::profiler::InputPipelineAnalysisResult input_pipeline = - tensorflow::profiler::ConvertOpStatsToInputPipelineAnalysis( - ConvertXSpaceToOpStats(xspace, options)); - return py::bytes(input_pipeline.SerializeAsString()); - }); + m.def("xspace_to_input_pipeline", [](const py::list& xspace_path_list) { + std::vector xspace_paths = GetXSpacePaths(xspace_path_list); + tensorflow::profiler::OpStatsOptions options; + options.generate_op_metrics_db = true; + options.generate_step_db = true; + tensorflow::profiler::OpStats combined_op_stats; + tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats( + xspace_paths, options, &combined_op_stats); + if (!status.ok()) { + LOG(WARNING) << "Could not generate OpStats for input pipeline. Error: " + << status.error_message(); + return py::make_tuple(py::bytes(), py::bool_(false)); + } + tensorflow::profiler::InputPipelineAnalysisResult input_pipeline = + tensorflow::profiler::ConvertOpStatsToInputPipelineAnalysis( + combined_op_stats); + return py::make_tuple(py::bytes(input_pipeline.SerializeAsString()), + py::bool_(true)); + }); - m.def("xspace_to_tf_stats", [](const py::bytes& serialized_xspace_proto) { - tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(std::string(serialized_xspace_proto)); + m.def("xspace_to_tf_stats", [](const py::list& xspace_path_list) { + std::vector xspace_paths = GetXSpacePaths(xspace_path_list); tensorflow::profiler::OpStatsOptions options; options.generate_op_metrics_db = true; options.generate_kernel_stats_db = true; + tensorflow::profiler::OpStats combined_op_stats; + tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats( + xspace_paths, options, &combined_op_stats); + if (!status.ok()) { + LOG(WARNING) << "Could not generate OpStats for tensorflow stats. Error: " + << status.error_message(); + return py::make_tuple(py::bytes(), py::bool_(false)); + } tensorflow::profiler::TfStatsDatabase tf_stats_db = - tensorflow::profiler::ConvertOpStatsToTfStats( - ConvertXSpaceToOpStats(xspace, options)); - return py::bytes(tf_stats_db.SerializeAsString()); + tensorflow::profiler::ConvertOpStatsToTfStats(combined_op_stats); + return py::make_tuple(py::bytes(tf_stats_db.SerializeAsString()), + py::bool_(true)); }); - m.def("xspace_to_kernel_stats", [](const py::bytes& serialized_xspace_proto) { - tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(std::string(serialized_xspace_proto)); + m.def("xspace_to_kernel_stats", [](const py::list& xspace_path_list) { + std::vector xspace_paths = GetXSpacePaths(xspace_path_list); tensorflow::profiler::OpStatsOptions options; options.generate_kernel_stats_db = true; - tensorflow::profiler::OpStats op_stats = - ConvertXSpaceToOpStats(xspace, options); - return py::bytes(op_stats.kernel_stats_db().SerializeAsString()); + tensorflow::profiler::OpStats combined_op_stats; + tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats( + xspace_paths, options, &combined_op_stats); + if (!status.ok()) { + LOG(WARNING) << "Could not generate OpStats for kernel stats. Error: " + << status.error_message(); + return py::make_tuple(py::bytes(), py::bool_(false)); + } + return py::make_tuple( + py::bytes(combined_op_stats.kernel_stats_db().SerializeAsString()), + py::bool_(true)); }); - m.def("xspace_to_memory_profile", - [](const py::bytes& serialized_xspace_proto) { - tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(std::string(serialized_xspace_proto)); - std::string json_output; - tensorflow::profiler::ConvertXSpaceToMemoryProfileJson(xspace, - &json_output); - return py::bytes(json_output); - }); + m.def("xspace_to_memory_profile", [](const py::list& xspace_path_list) { + std::vector xspace_paths = GetXSpacePaths(xspace_path_list); + if (xspace_paths.size() != 1) { + LOG(WARNING) << "Memory profile tool expects only 1 XSpace path but gets " + << xspace_paths.size(); + return py::make_tuple(py::bytes(), py::bool_(false)); + } + tensorflow::profiler::XSpace xspace; + tensorflow::Status status = tensorflow::ReadBinaryProto( + tensorflow::Env::Default(), xspace_paths[0], &xspace); + if (!status.ok()) { + LOG(WARNING) << "Could not read XSpace for memory profile: " + << xspace_paths[0]; + return py::make_tuple(py::bytes(), py::bool_(false)); + } + std::string json_output; + tensorflow::profiler::ConvertXSpaceToMemoryProfileJson(xspace, + &json_output); + return py::make_tuple(py::bytes(json_output), py::bool_(true)); + }); };