Add Hostname to XPlane and OpStats in OSS.
Call OpStatsToPodViewer converter in OSS. PiperOrigin-RevId: 337194132 Change-Id: Idd2de2066012a773eda2cec7d11542d09ecbbfa0
This commit is contained in:
parent
162e0e02b8
commit
2882df7cf2
@ -356,6 +356,7 @@ tf_cc_test(
|
|||||||
size = "small",
|
size = "small",
|
||||||
srcs = ["xplane_to_op_stats_test.cc"],
|
srcs = ["xplane_to_op_stats_test.cc"],
|
||||||
deps = [
|
deps = [
|
||||||
|
":step_events_to_steps_db",
|
||||||
":xplane_to_op_stats",
|
":xplane_to_op_stats",
|
||||||
":xplane_to_tf_functions",
|
":xplane_to_tf_functions",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
@ -684,6 +685,7 @@ cc_library(
|
|||||||
deps = [
|
deps = [
|
||||||
":op_stats_to_input_pipeline_analysis",
|
":op_stats_to_input_pipeline_analysis",
|
||||||
":op_stats_to_overview_page",
|
":op_stats_to_overview_page",
|
||||||
|
":op_stats_to_pod_viewer",
|
||||||
":op_stats_to_tf_stats",
|
":op_stats_to_tf_stats",
|
||||||
":xplane_to_memory_profile",
|
":xplane_to_memory_profile",
|
||||||
":xplane_to_op_stats",
|
":xplane_to_op_stats",
|
||||||
@ -693,6 +695,7 @@ cc_library(
|
|||||||
"//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
|
"//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
||||||
|
"//tensorflow/core/profiler/protobuf:pod_viewer_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
|
"//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
|
|||||||
@ -201,6 +201,11 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
|
|||||||
*op_stats.mutable_device_op_metrics_db()->mutable_precision_stats() =
|
*op_stats.mutable_device_op_metrics_db()->mutable_precision_stats() =
|
||||||
ComputePrecisionStats(nonoverlapped_step_events);
|
ComputePrecisionStats(nonoverlapped_step_events);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CoreDetails& details =
|
||||||
|
(*op_stats.mutable_core_id_to_details())[kDefaultGpuLocalCoreId];
|
||||||
|
details.set_hostname(space.hostnames().empty() ? "localhost"
|
||||||
|
: space.hostnames(0));
|
||||||
return op_stats;
|
return op_stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/status.h"
|
#include "tensorflow/core/platform/status.h"
|
||||||
#include "tensorflow/core/platform/test.h"
|
#include "tensorflow/core/platform/test.h"
|
||||||
#include "tensorflow/core/platform/types.h"
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
|
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||||
@ -36,6 +37,8 @@ namespace tensorflow {
|
|||||||
namespace profiler {
|
namespace profiler {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
static constexpr char kXPlanePb[] = "xplane.pb";
|
||||||
|
|
||||||
TEST(ConvertXPlaneToOpStats, PerfEnv) {
|
TEST(ConvertXPlaneToOpStats, PerfEnv) {
|
||||||
XSpace space;
|
XSpace space;
|
||||||
constexpr double kMaxError = 0.01;
|
constexpr double kMaxError = 0.01;
|
||||||
@ -178,9 +181,20 @@ TEST(ConvertXPlaneToOpStats, PropagateAndDedupErrors) {
|
|||||||
EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0));
|
EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ConvertXPlaneToOpStats, Hostnames) {
|
||||||
|
XSpace space;
|
||||||
|
static constexpr char kHost[] = "host1";
|
||||||
|
*space.add_hostnames() = kHost;
|
||||||
|
|
||||||
|
OpStats op_stats = ConvertXSpaceToOpStats(space, OpStatsOptions());
|
||||||
|
EXPECT_EQ(
|
||||||
|
kHost,
|
||||||
|
op_stats.core_id_to_details().at(kDefaultGpuLocalCoreId).hostname());
|
||||||
|
}
|
||||||
|
|
||||||
// Helper function to build a XSpace and store it to test directory.
|
// Helper function to build a XSpace and store it to test directory.
|
||||||
void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
|
void BuildAndStoreXSpaceForTest(Env* test_env, absl::string_view test_dir,
|
||||||
const std::string& xspace_name) {
|
absl::string_view hostname) {
|
||||||
constexpr int64 kStepNum = 123;
|
constexpr int64 kStepNum = 123;
|
||||||
constexpr int64 kStepId = 456;
|
constexpr int64 kStepId = 456;
|
||||||
// Create a host only XSpace for test.
|
// Create a host only XSpace for test.
|
||||||
@ -202,6 +216,9 @@ void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
|
|||||||
CreateXEvent(&host_plane_builder, &executor_thread, "aaa:bbb", 30, 70);
|
CreateXEvent(&host_plane_builder, &executor_thread, "aaa:bbb", 30, 70);
|
||||||
GroupTfEvents(&xspace);
|
GroupTfEvents(&xspace);
|
||||||
|
|
||||||
|
xspace.add_hostnames(hostname);
|
||||||
|
|
||||||
|
std::string xspace_name = absl::StrCat(hostname, ".", kXPlanePb);
|
||||||
TF_CHECK_OK(
|
TF_CHECK_OK(
|
||||||
WriteBinaryProto(test_env, io::JoinPath(test_dir, xspace_name), xspace))
|
WriteBinaryProto(test_env, io::JoinPath(test_dir, xspace_name), xspace))
|
||||||
<< "Failed to write binary XSpace to file: " << xspace_name;
|
<< "Failed to write binary XSpace to file: " << xspace_name;
|
||||||
@ -214,14 +231,17 @@ TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
|
|||||||
TF_CHECK_OK(test_env->CreateDir(test_dir))
|
TF_CHECK_OK(test_env->CreateDir(test_dir))
|
||||||
<< "Failed to create test directory: " << test_dir;
|
<< "Failed to create test directory: " << test_dir;
|
||||||
|
|
||||||
const std::string xspace1 = "xspace1.pb";
|
static constexpr char kHost1[] = "host1";
|
||||||
const std::string xspace2 = "xspace2.pb";
|
static constexpr char kHost2[] = "host2";
|
||||||
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace1);
|
|
||||||
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace2);
|
BuildAndStoreXSpaceForTest(test_env, test_dir, kHost1);
|
||||||
|
BuildAndStoreXSpaceForTest(test_env, test_dir, kHost2);
|
||||||
|
|
||||||
std::vector<std::string> xspace_paths;
|
std::vector<std::string> xspace_paths;
|
||||||
xspace_paths.push_back(io::JoinPath(test_dir, xspace1));
|
xspace_paths.push_back(
|
||||||
xspace_paths.push_back(io::JoinPath(test_dir, xspace2));
|
io::JoinPath(test_dir, absl::StrCat(kHost1, ".", kXPlanePb)));
|
||||||
|
xspace_paths.push_back(
|
||||||
|
io::JoinPath(test_dir, absl::StrCat(kHost2, ".", kXPlanePb)));
|
||||||
OpStatsOptions options;
|
OpStatsOptions options;
|
||||||
options.generate_op_metrics_db = true;
|
options.generate_op_metrics_db = true;
|
||||||
options.generate_step_db = true;
|
options.generate_step_db = true;
|
||||||
@ -248,8 +268,13 @@ TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
|
|||||||
const auto& step_info_per_core =
|
const auto& step_info_per_core =
|
||||||
combined_op_stats.step_db().step_sequence(0).step_info_per_core();
|
combined_op_stats.step_db().step_sequence(0).step_info_per_core();
|
||||||
// global_core_id is computed using: 1000 * host_id + local_core_id.
|
// global_core_id is computed using: 1000 * host_id + local_core_id.
|
||||||
EXPECT_TRUE(step_info_per_core.contains(1));
|
EXPECT_TRUE(step_info_per_core.contains(kDefaultGpuLocalCoreId));
|
||||||
EXPECT_TRUE(step_info_per_core.contains(1001));
|
EXPECT_TRUE(step_info_per_core.contains(1000 + kDefaultGpuLocalCoreId));
|
||||||
|
|
||||||
|
const auto& core_details_map = combined_op_stats.core_id_to_details();
|
||||||
|
EXPECT_EQ(kHost1, core_details_map.at(kDefaultGpuLocalCoreId).hostname());
|
||||||
|
EXPECT_EQ(kHost2,
|
||||||
|
core_details_map.at(1000 + kDefaultGpuLocalCoreId).hostname());
|
||||||
|
|
||||||
// Tear down environment and directory for testing.
|
// Tear down environment and directory for testing.
|
||||||
int64 undeleted_files, undeleted_dirs;
|
int64 undeleted_files, undeleted_dirs;
|
||||||
|
|||||||
@ -22,6 +22,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||||
#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
|
#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
|
||||||
|
#include "tensorflow/core/profiler/convert/op_stats_to_pod_viewer.h"
|
||||||
#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
|
#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
|
||||||
#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
|
#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
|
||||||
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
|
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
|
||||||
@ -30,6 +31,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
|
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/pod_viewer.pb.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
|
#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||||
|
|
||||||
@ -153,6 +155,23 @@ std::pair<std::string, bool> ConvertXSpaceToMemoryProfile(
|
|||||||
return std::make_pair(json_output, true);
|
return std::make_pair(json_output, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::string, bool> ConvertMultiXSpacesToPodViewer(
|
||||||
|
const std::vector<std::string>& xspace_paths) {
|
||||||
|
OpStatsOptions options;
|
||||||
|
options.generate_op_metrics_db = true;
|
||||||
|
options.generate_step_db = true;
|
||||||
|
OpStats combined_op_stats;
|
||||||
|
Status status = ConvertMultiXSpacesToCombinedOpStats(xspace_paths, options,
|
||||||
|
&combined_op_stats);
|
||||||
|
if (!status.ok()) {
|
||||||
|
LOG(WARNING) << "Could not generate OpStats for pod_viewer. Error: "
|
||||||
|
<< status.error_message();
|
||||||
|
return std::make_pair("", false);
|
||||||
|
}
|
||||||
|
return std::make_pair(
|
||||||
|
ConvertOpStatsToPodViewer(combined_op_stats).SerializeAsString(), true);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
|
std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
|
||||||
@ -170,6 +189,8 @@ std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
|
|||||||
return ConvertMultiXSpacesToKernelStats(xspace_paths);
|
return ConvertMultiXSpacesToKernelStats(xspace_paths);
|
||||||
} else if (tool_name == "memory_profile") {
|
} else if (tool_name == "memory_profile") {
|
||||||
return ConvertXSpaceToMemoryProfile(xspace_paths);
|
return ConvertXSpaceToMemoryProfile(xspace_paths);
|
||||||
|
} else if (tool_name == "pod_viewer") {
|
||||||
|
return ConvertMultiXSpacesToPodViewer(xspace_paths);
|
||||||
} else {
|
} else {
|
||||||
LOG(WARNING) << "Can not find tool: " << tool_name << ". Please update to "
|
LOG(WARNING) << "Can not find tool: " << tool_name << ". Please update to "
|
||||||
<< "the latest version of Tensorflow.";
|
<< "the latest version of Tensorflow.";
|
||||||
|
|||||||
@ -5,13 +5,15 @@ package tensorflow.profiler;
|
|||||||
option cc_enable_arenas = true;
|
option cc_enable_arenas = true;
|
||||||
|
|
||||||
// A container of parallel XPlanes, generated by one or more profiling sources.
|
// A container of parallel XPlanes, generated by one or more profiling sources.
|
||||||
// Next ID: 4
|
// Next ID: 5
|
||||||
message XSpace {
|
message XSpace {
|
||||||
repeated XPlane planes = 1;
|
repeated XPlane planes = 1;
|
||||||
// Errors (if any) in the generation of planes.
|
// Errors (if any) in the generation of planes.
|
||||||
repeated string errors = 2;
|
repeated string errors = 2;
|
||||||
// Warnings (if any) in the generation of planes;
|
// Warnings (if any) in the generation of planes;
|
||||||
repeated string warnings = 3;
|
repeated string warnings = 3;
|
||||||
|
// List of hostnames that XPlanes are generated from.
|
||||||
|
repeated string hostnames = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
// An XPlane is a container of parallel timelines (XLines), generated by a
|
// An XPlane is a container of parallel timelines (XLines), generated by a
|
||||||
|
|||||||
@ -50,6 +50,7 @@ Status CollectDataToRepository(const ProfileRequest& request,
|
|||||||
// Read the profile data into xspace.
|
// Read the profile data into xspace.
|
||||||
XSpace xspace;
|
XSpace xspace;
|
||||||
TF_RETURN_IF_ERROR(profiler->CollectData(&xspace));
|
TF_RETURN_IF_ERROR(profiler->CollectData(&xspace));
|
||||||
|
xspace.add_hostnames(request.host_name());
|
||||||
VLOG(3) << "Collected XSpace to repository.";
|
VLOG(3) << "Collected XSpace to repository.";
|
||||||
response->set_empty_trace(IsEmpty(xspace));
|
response->set_empty_trace(IsEmpty(xspace));
|
||||||
|
|
||||||
|
|||||||
@ -120,6 +120,7 @@ tf_python_pybind_extension(
|
|||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/core:lib_internal",
|
||||||
"//tensorflow/core/profiler/convert:xplane_to_tools_data",
|
"//tensorflow/core/profiler/convert:xplane_to_tools_data",
|
||||||
"//tensorflow/core/profiler/convert:xplane_to_trace_events",
|
"//tensorflow/core/profiler/convert:xplane_to_trace_events",
|
||||||
"//tensorflow/core/profiler/lib:profiler_session_for_pybind",
|
"//tensorflow/core/profiler/lib:profiler_session_for_pybind",
|
||||||
|
|||||||
@ -29,6 +29,7 @@ limitations under the License.
|
|||||||
#include "pybind11/pytypes.h"
|
#include "pybind11/pytypes.h"
|
||||||
#include "tensorflow/core/platform/env.h"
|
#include "tensorflow/core/platform/env.h"
|
||||||
#include "tensorflow/core/platform/errors.h"
|
#include "tensorflow/core/platform/errors.h"
|
||||||
|
#include "tensorflow/core/platform/host_info.h"
|
||||||
#include "tensorflow/core/platform/status.h"
|
#include "tensorflow/core/platform/status.h"
|
||||||
#include "tensorflow/core/platform/types.h"
|
#include "tensorflow/core/platform/types.h"
|
||||||
#include "tensorflow/core/profiler/convert/xplane_to_tools_data.h"
|
#include "tensorflow/core/profiler/convert/xplane_to_tools_data.h"
|
||||||
@ -234,6 +235,7 @@ class ProfilerSessionWrapper {
|
|||||||
tensorflow::profiler::XSpace xspace;
|
tensorflow::profiler::XSpace xspace;
|
||||||
tensorflow::Status status;
|
tensorflow::Status status;
|
||||||
status = session_->CollectData(&xspace);
|
status = session_->CollectData(&xspace);
|
||||||
|
xspace.add_hostnames(tensorflow::port::Hostname());
|
||||||
session_.reset();
|
session_.reset();
|
||||||
status = tensorflow::profiler::ExportToTensorBoard(xspace, logdir_);
|
status = tensorflow::profiler::ExportToTensorBoard(xspace, logdir_);
|
||||||
tensorflow::MaybeRaiseRegisteredFromStatus(status);
|
tensorflow::MaybeRaiseRegisteredFromStatus(status);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user