Use OpStats combiner in profiler_wrapper.cc

PiperOrigin-RevId: 333350927
Change-Id: Idb7bb344253f32326dbddf290e68f23a41827d6c
This commit is contained in:
A. Unique TensorFlower 2020-09-23 12:42:34 -07:00 committed by TensorFlower Gardener
parent a57895321e
commit 8c30da064f
6 changed files with 251 additions and 51 deletions

View File

@ -232,6 +232,7 @@ cc_library(
hdrs = ["xplane_to_op_stats.h"],
deps = [
":op_metrics_db_combiner",
":op_stats_combiner",
":step_events_to_steps_db",
":xplane_to_kernel_stats_db",
":xplane_to_op_metrics_db",
@ -249,6 +250,7 @@ cc_library(
"//tensorflow/core/profiler/utils:event_span",
"//tensorflow/core/profiler/utils:hardware_type_utils",
"//tensorflow/core/profiler/utils:kernel_stats_utils",
"//tensorflow/core/profiler/utils:step_intersection",
"//tensorflow/core/profiler/utils:tf_op_utils",
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
"//tensorflow/core/profiler/utils:xplane_schema",

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
#include "tensorflow/core/profiler/convert/op_stats_combiner.h"
#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
#include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h"
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
@ -38,6 +39,7 @@ limitations under the License.
#include "tensorflow/core/profiler/utils/event_span.h"
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
#include "tensorflow/core/profiler/utils/step_intersection.h"
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
@ -202,5 +204,44 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
return op_stats;
}
Status ConvertMultiXSpacesToCombinedOpStats(
const std::vector<std::string>& xspace_paths, const OpStatsOptions& options,
OpStats* combined_op_stats) {
// A shortcut code path for a single XSpace. There is no need to merge OpStats
// if there is only a single XSpace.
if (xspace_paths.size() == 1) {
XSpace xspace;
Status status = ReadBinaryProto(Env::Default(), xspace_paths[0], &xspace);
if (!status.ok()) return status;
*combined_op_stats = ConvertXSpaceToOpStats(xspace, options);
return Status::OK();
}
// Read multiple XSpaces and convert to multiple OpStats.
std::vector<OpStats> all_op_stats;
for (const std::string& xspace_path : xspace_paths) {
XSpace xspace;
Status status = ReadBinaryProto(Env::Default(), xspace_path, &xspace);
if (!status.ok()) return status;
all_op_stats.push_back(ConvertXSpaceToOpStats(xspace, options));
}
// Combine OpStats.
std::vector<OpStatsInfo> all_op_stats_info;
all_op_stats_info.reserve(all_op_stats.size());
for (int i = 0; i < all_op_stats.size(); i++) {
all_op_stats_info.emplace_back(
&all_op_stats[i],
ParseHardwareType(all_op_stats[i].run_environment().device_type()), i);
}
// Do not limit the maximum number of steps during the merge of OpStats.
StepIntersection step_intersection =
ComputeStepIntersectionToMergeOpStats(all_op_stats_info, kuint32max);
CombineAllOpStats(all_op_stats_info, step_intersection, combined_op_stats);
return Status::OK();
}
} // namespace profiler
} // namespace tensorflow

View File

@ -17,6 +17,7 @@ limitations under the License.
#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_
#include "absl/container/flat_hash_set.h"
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
@ -45,6 +46,14 @@ PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
// Extracts PerfEnv from XPlane stats.
PerfEnv GetPerfEnvFromXPlane(const XPlane& device_plane);
// Reads multiple XSpaces from <xspace_paths>, convert them to OpStats, and
// combine them to a single OpStats in <combined_op_stats>.
// Return the first error status during conversion, or return Status::OK() if
// there is no error.
Status ConvertMultiXSpacesToCombinedOpStats(
const std::vector<std::string>& xspace_paths, const OpStatsOptions& options,
OpStats* combined_op_stats);
} // namespace profiler
} // namespace tensorflow

View File

@ -16,6 +16,9 @@ limitations under the License.
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
#include "absl/strings/str_cat.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/path.h"
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
@ -175,6 +178,86 @@ TEST(ConvertXPlaneToOpStats, PropagateAndDedupErrors) {
EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0));
}
// Helper function to build a XSpace and store it to test directory.
void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
const std::string& xspace_name) {
constexpr int64 kStepNum = 123;
constexpr int64 kStepId = 456;
// Create a host only XSpace for test.
XSpace xspace;
XPlaneBuilder host_plane_builder(GetOrCreateHostXPlane(&xspace));
host_plane_builder.ReserveLines(2);
auto main_thread = host_plane_builder.GetOrCreateLine(0);
CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext,
0, 100, {{StatType::kStepNum, kStepNum}});
CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun,
10, 90, {{StatType::kStepId, kStepId}});
auto executor_thread = host_plane_builder.GetOrCreateLine(1);
CreateXEvent(&host_plane_builder, &executor_thread,
HostEventType::kExecutorStateProcess, 20, 80,
{{StatType::kStepId, kStepId}});
// Create a TensorFlow op that runs for 70 ps.
CreateXEvent(&host_plane_builder, &executor_thread, "aaa:bbb", 30, 70);
GroupTfEvents(&xspace, /*group_metadata_map=*/nullptr);
TF_CHECK_OK(
WriteBinaryProto(test_env, io::JoinPath(test_dir, xspace_name), xspace))
<< "Failed to write binary XSpace to file: " << xspace_name;
}
TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
// Initialize environment and directory for testing.
Env* test_env = Env::Default();
std::string test_dir = io::JoinPath(testing::TmpDir(), "test_dir");
TF_CHECK_OK(test_env->CreateDir(test_dir))
<< "Failed to create test directory: " << test_dir;
const std::string xspace1 = "xspace1.pb";
const std::string xspace2 = "xspace2.pb";
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace1);
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace2);
std::vector<std::string> xspace_paths;
xspace_paths.push_back(io::JoinPath(test_dir, xspace1));
xspace_paths.push_back(io::JoinPath(test_dir, xspace2));
OpStatsOptions options;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
OpStats combined_op_stats;
TF_CHECK_OK(ConvertMultiXSpacesToCombinedOpStats(xspace_paths, options,
&combined_op_stats))
<< "Failed to convert multi XSpace to OpStats";
// Result OpStats has 2 Host Ops, "IDLE" and "aaa:bbb".
ASSERT_EQ(combined_op_stats.host_op_metrics_db().metrics_db_size(), 2);
const auto& metric = combined_op_stats.host_op_metrics_db().metrics_db(1);
EXPECT_EQ(metric.name(), "aaa");
EXPECT_EQ(metric.category(), "bbb");
// Each host has the HostOp "aaa:bbb" running for 70 ps, so the combined
// OpStats has "aaa:bbb" running for 140 ps in total.
EXPECT_EQ(metric.self_time_ps(), 140);
// Result OpStats has 1 step, 2 cores.
ASSERT_EQ(combined_op_stats.step_db().step_sequence_size(), 1);
ASSERT_EQ(
combined_op_stats.step_db().step_sequence(0).step_info_per_core_size(),
2);
const auto& step_info_per_core =
combined_op_stats.step_db().step_sequence(0).step_info_per_core();
// global_core_id is computed using: 1000 * host_id + local_core_id.
EXPECT_TRUE(step_info_per_core.contains(1));
EXPECT_TRUE(step_info_per_core.contains(1001));
// Tear down environment and directory for testing.
int64 undeleted_files, undeleted_dirs;
TF_CHECK_OK(
test_env->DeleteRecursively(test_dir, &undeleted_files, &undeleted_dirs))
<< "Failed to delete test directory: " << test_dir;
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -127,6 +127,8 @@ tf_python_pybind_extension(
"//tensorflow/core/profiler/convert:xplane_to_op_stats",
"//tensorflow/core/profiler/convert:xplane_to_trace_events",
"//tensorflow/core/profiler/lib:profiler_session_headers",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/rpc:profiler_server_headers",
"//tensorflow/core/profiler/rpc/client:capture_profile",
"//tensorflow/core/profiler/rpc/client:save_profile",

View File

@ -18,8 +18,10 @@ limitations under the License.
#include "absl/memory/memory.h"
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "pybind11/cast.h"
#include "pybind11/pybind11.h"
#include "pybind11/pytypes.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/platform/types.h"
@ -32,6 +34,8 @@ limitations under the License.
#include "tensorflow/core/profiler/lib/profiler_session.h"
#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/rpc/client/capture_profile.h"
#include "tensorflow/core/profiler/rpc/client/save_profile.h"
#include "tensorflow/core/profiler/rpc/profiler_server.h"
@ -114,6 +118,15 @@ class ProfilerSessionWrapper {
tensorflow::string logdir_;
};
// Converts a pybind list of XSpace paths to a cpp vector.
std::vector<std::string> GetXSpacePaths(const py::list& python_paths) {
std::vector<std::string> cpp_paths;
for (py::handle obj : python_paths) {
cpp_paths.push_back(std::string(py::cast<py::str>(obj)));
}
return cpp_paths;
}
} // namespace
PYBIND11_MODULE(_pywrap_profiler, m) {
@ -171,71 +184,121 @@ PYBIND11_MODULE(_pywrap_profiler, m) {
return content;
});
m.def("xspace_to_trace_events", [](const py::bytes& serialized_xspace_proto) {
tensorflow::string content;
m.def("xspace_to_trace_events", [](const py::list& xspace_path_list) {
std::vector<std::string> xspace_paths = GetXSpacePaths(xspace_path_list);
if (xspace_paths.size() != 1) {
LOG(WARNING) << "Trace events tool expects only 1 XSpace path but gets "
<< xspace_paths.size();
return py::make_tuple(py::bytes(), py::bool_(false));
}
tensorflow::profiler::XSpace xspace;
xspace.ParseFromString(std::string(serialized_xspace_proto));
tensorflow::Status status = tensorflow::ReadBinaryProto(
tensorflow::Env::Default(), xspace_paths[0], &xspace);
if (!status.ok()) {
LOG(WARNING) << "Could not read XSpace for trace events: "
<< xspace_paths[0];
return py::make_tuple(py::bytes(), py::bool_(false));
}
tensorflow::string content;
tensorflow::profiler::ConvertXSpaceToTraceEventsString(xspace, &content);
return py::bytes(content);
return py::make_tuple(py::bytes(content), py::bool_(true));
});
m.def("xspace_to_overview_page",
[](const py::bytes& serialized_xspace_proto) {
tensorflow::profiler::XSpace xspace;
xspace.ParseFromString(std::string(serialized_xspace_proto));
tensorflow::profiler::OpStatsOptions options;
options.generate_kernel_stats_db = true;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
// TODO(profiler): xspace should tell whether this is sampling mode.
tensorflow::profiler::OverviewPage overview_page =
tensorflow::profiler::ConvertOpStatsToOverviewPage(
ConvertXSpaceToOpStats(xspace, options));
return py::bytes(overview_page.SerializeAsString());
});
m.def("xspace_to_overview_page", [](const py::list& xspace_path_list) {
std::vector<std::string> xspace_paths = GetXSpacePaths(xspace_path_list);
tensorflow::profiler::OpStatsOptions options;
options.generate_kernel_stats_db = true;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
tensorflow::profiler::OpStats combined_op_stats;
tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats(
xspace_paths, options, &combined_op_stats);
if (!status.ok()) {
LOG(WARNING) << "Could not generate OpStats for overview page. Error: "
<< status.error_message();
return py::make_tuple(py::bytes(), py::bool_(false));
}
// TODO(profiler): xspace should tell whether this is sampling mode.
tensorflow::profiler::OverviewPage overview_page =
tensorflow::profiler::ConvertOpStatsToOverviewPage(combined_op_stats);
return py::make_tuple(py::bytes(overview_page.SerializeAsString()),
py::bool_(true));
});
m.def("xspace_to_input_pipeline",
[](const py::bytes& serialized_xspace_proto) {
tensorflow::profiler::XSpace xspace;
xspace.ParseFromString(std::string(serialized_xspace_proto));
tensorflow::profiler::OpStatsOptions options;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
tensorflow::profiler::InputPipelineAnalysisResult input_pipeline =
tensorflow::profiler::ConvertOpStatsToInputPipelineAnalysis(
ConvertXSpaceToOpStats(xspace, options));
return py::bytes(input_pipeline.SerializeAsString());
});
m.def("xspace_to_input_pipeline", [](const py::list& xspace_path_list) {
std::vector<std::string> xspace_paths = GetXSpacePaths(xspace_path_list);
tensorflow::profiler::OpStatsOptions options;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
tensorflow::profiler::OpStats combined_op_stats;
tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats(
xspace_paths, options, &combined_op_stats);
if (!status.ok()) {
LOG(WARNING) << "Could not generate OpStats for input pipeline. Error: "
<< status.error_message();
return py::make_tuple(py::bytes(), py::bool_(false));
}
tensorflow::profiler::InputPipelineAnalysisResult input_pipeline =
tensorflow::profiler::ConvertOpStatsToInputPipelineAnalysis(
combined_op_stats);
return py::make_tuple(py::bytes(input_pipeline.SerializeAsString()),
py::bool_(true));
});
m.def("xspace_to_tf_stats", [](const py::bytes& serialized_xspace_proto) {
tensorflow::profiler::XSpace xspace;
xspace.ParseFromString(std::string(serialized_xspace_proto));
m.def("xspace_to_tf_stats", [](const py::list& xspace_path_list) {
std::vector<std::string> xspace_paths = GetXSpacePaths(xspace_path_list);
tensorflow::profiler::OpStatsOptions options;
options.generate_op_metrics_db = true;
options.generate_kernel_stats_db = true;
tensorflow::profiler::OpStats combined_op_stats;
tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats(
xspace_paths, options, &combined_op_stats);
if (!status.ok()) {
LOG(WARNING) << "Could not generate OpStats for tensorflow stats. Error: "
<< status.error_message();
return py::make_tuple(py::bytes(), py::bool_(false));
}
tensorflow::profiler::TfStatsDatabase tf_stats_db =
tensorflow::profiler::ConvertOpStatsToTfStats(
ConvertXSpaceToOpStats(xspace, options));
return py::bytes(tf_stats_db.SerializeAsString());
tensorflow::profiler::ConvertOpStatsToTfStats(combined_op_stats);
return py::make_tuple(py::bytes(tf_stats_db.SerializeAsString()),
py::bool_(true));
});
m.def("xspace_to_kernel_stats", [](const py::bytes& serialized_xspace_proto) {
tensorflow::profiler::XSpace xspace;
xspace.ParseFromString(std::string(serialized_xspace_proto));
m.def("xspace_to_kernel_stats", [](const py::list& xspace_path_list) {
std::vector<std::string> xspace_paths = GetXSpacePaths(xspace_path_list);
tensorflow::profiler::OpStatsOptions options;
options.generate_kernel_stats_db = true;
tensorflow::profiler::OpStats op_stats =
ConvertXSpaceToOpStats(xspace, options);
return py::bytes(op_stats.kernel_stats_db().SerializeAsString());
tensorflow::profiler::OpStats combined_op_stats;
tensorflow::Status status = ConvertMultiXSpacesToCombinedOpStats(
xspace_paths, options, &combined_op_stats);
if (!status.ok()) {
LOG(WARNING) << "Could not generate OpStats for kernel stats. Error: "
<< status.error_message();
return py::make_tuple(py::bytes(), py::bool_(false));
}
return py::make_tuple(
py::bytes(combined_op_stats.kernel_stats_db().SerializeAsString()),
py::bool_(true));
});
m.def("xspace_to_memory_profile",
[](const py::bytes& serialized_xspace_proto) {
tensorflow::profiler::XSpace xspace;
xspace.ParseFromString(std::string(serialized_xspace_proto));
std::string json_output;
tensorflow::profiler::ConvertXSpaceToMemoryProfileJson(xspace,
&json_output);
return py::bytes(json_output);
});
m.def("xspace_to_memory_profile", [](const py::list& xspace_path_list) {
std::vector<std::string> xspace_paths = GetXSpacePaths(xspace_path_list);
if (xspace_paths.size() != 1) {
LOG(WARNING) << "Memory profile tool expects only 1 XSpace path but gets "
<< xspace_paths.size();
return py::make_tuple(py::bytes(), py::bool_(false));
}
tensorflow::profiler::XSpace xspace;
tensorflow::Status status = tensorflow::ReadBinaryProto(
tensorflow::Env::Default(), xspace_paths[0], &xspace);
if (!status.ok()) {
LOG(WARNING) << "Could not read XSpace for memory profile: "
<< xspace_paths[0];
return py::make_tuple(py::bytes(), py::bool_(false));
}
std::string json_output;
tensorflow::profiler::ConvertXSpaceToMemoryProfileJson(xspace,
&json_output);
return py::make_tuple(py::bytes(json_output), py::bool_(true));
});
};