Add Hostname to XPlane and OpStats in OSS.
Call OpStatsToPodViewer converter in OSS. PiperOrigin-RevId: 337194132 Change-Id: Idd2de2066012a773eda2cec7d11542d09ecbbfa0
This commit is contained in:
parent
162e0e02b8
commit
2882df7cf2
@ -356,6 +356,7 @@ tf_cc_test(
|
||||
size = "small",
|
||||
srcs = ["xplane_to_op_stats_test.cc"],
|
||||
deps = [
|
||||
":step_events_to_steps_db",
|
||||
":xplane_to_op_stats",
|
||||
":xplane_to_tf_functions",
|
||||
"//tensorflow/core:lib",
|
||||
@ -684,6 +685,7 @@ cc_library(
|
||||
deps = [
|
||||
":op_stats_to_input_pipeline_analysis",
|
||||
":op_stats_to_overview_page",
|
||||
":op_stats_to_pod_viewer",
|
||||
":op_stats_to_tf_stats",
|
||||
":xplane_to_memory_profile",
|
||||
":xplane_to_op_stats",
|
||||
@ -693,6 +695,7 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:pod_viewer_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"@com_google_absl//absl/strings",
|
||||
|
||||
@ -201,6 +201,11 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
|
||||
*op_stats.mutable_device_op_metrics_db()->mutable_precision_stats() =
|
||||
ComputePrecisionStats(nonoverlapped_step_events);
|
||||
}
|
||||
|
||||
CoreDetails& details =
|
||||
(*op_stats.mutable_core_id_to_details())[kDefaultGpuLocalCoreId];
|
||||
details.set_hostname(space.hostnames().empty() ? "localhost"
|
||||
: space.hostnames(0));
|
||||
return op_stats;
|
||||
}
|
||||
|
||||
|
||||
@ -21,6 +21,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
|
||||
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
@ -36,6 +37,8 @@ namespace tensorflow {
|
||||
namespace profiler {
|
||||
namespace {
|
||||
|
||||
static constexpr char kXPlanePb[] = "xplane.pb";
|
||||
|
||||
TEST(ConvertXPlaneToOpStats, PerfEnv) {
|
||||
XSpace space;
|
||||
constexpr double kMaxError = 0.01;
|
||||
@ -178,9 +181,20 @@ TEST(ConvertXPlaneToOpStats, PropagateAndDedupErrors) {
|
||||
EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0));
|
||||
}
|
||||
|
||||
TEST(ConvertXPlaneToOpStats, Hostnames) {
|
||||
XSpace space;
|
||||
static constexpr char kHost[] = "host1";
|
||||
*space.add_hostnames() = kHost;
|
||||
|
||||
OpStats op_stats = ConvertXSpaceToOpStats(space, OpStatsOptions());
|
||||
EXPECT_EQ(
|
||||
kHost,
|
||||
op_stats.core_id_to_details().at(kDefaultGpuLocalCoreId).hostname());
|
||||
}
|
||||
|
||||
// Helper function to build a XSpace and store it to test directory.
|
||||
void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
|
||||
const std::string& xspace_name) {
|
||||
void BuildAndStoreXSpaceForTest(Env* test_env, absl::string_view test_dir,
|
||||
absl::string_view hostname) {
|
||||
constexpr int64 kStepNum = 123;
|
||||
constexpr int64 kStepId = 456;
|
||||
// Create a host only XSpace for test.
|
||||
@ -202,6 +216,9 @@ void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
|
||||
CreateXEvent(&host_plane_builder, &executor_thread, "aaa:bbb", 30, 70);
|
||||
GroupTfEvents(&xspace);
|
||||
|
||||
xspace.add_hostnames(hostname);
|
||||
|
||||
std::string xspace_name = absl::StrCat(hostname, ".", kXPlanePb);
|
||||
TF_CHECK_OK(
|
||||
WriteBinaryProto(test_env, io::JoinPath(test_dir, xspace_name), xspace))
|
||||
<< "Failed to write binary XSpace to file: " << xspace_name;
|
||||
@ -214,14 +231,17 @@ TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
|
||||
TF_CHECK_OK(test_env->CreateDir(test_dir))
|
||||
<< "Failed to create test directory: " << test_dir;
|
||||
|
||||
const std::string xspace1 = "xspace1.pb";
|
||||
const std::string xspace2 = "xspace2.pb";
|
||||
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace1);
|
||||
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace2);
|
||||
static constexpr char kHost1[] = "host1";
|
||||
static constexpr char kHost2[] = "host2";
|
||||
|
||||
BuildAndStoreXSpaceForTest(test_env, test_dir, kHost1);
|
||||
BuildAndStoreXSpaceForTest(test_env, test_dir, kHost2);
|
||||
|
||||
std::vector<std::string> xspace_paths;
|
||||
xspace_paths.push_back(io::JoinPath(test_dir, xspace1));
|
||||
xspace_paths.push_back(io::JoinPath(test_dir, xspace2));
|
||||
xspace_paths.push_back(
|
||||
io::JoinPath(test_dir, absl::StrCat(kHost1, ".", kXPlanePb)));
|
||||
xspace_paths.push_back(
|
||||
io::JoinPath(test_dir, absl::StrCat(kHost2, ".", kXPlanePb)));
|
||||
OpStatsOptions options;
|
||||
options.generate_op_metrics_db = true;
|
||||
options.generate_step_db = true;
|
||||
@ -248,8 +268,13 @@ TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
|
||||
const auto& step_info_per_core =
|
||||
combined_op_stats.step_db().step_sequence(0).step_info_per_core();
|
||||
// global_core_id is computed using: 1000 * host_id + local_core_id.
|
||||
EXPECT_TRUE(step_info_per_core.contains(1));
|
||||
EXPECT_TRUE(step_info_per_core.contains(1001));
|
||||
EXPECT_TRUE(step_info_per_core.contains(kDefaultGpuLocalCoreId));
|
||||
EXPECT_TRUE(step_info_per_core.contains(1000 + kDefaultGpuLocalCoreId));
|
||||
|
||||
const auto& core_details_map = combined_op_stats.core_id_to_details();
|
||||
EXPECT_EQ(kHost1, core_details_map.at(kDefaultGpuLocalCoreId).hostname());
|
||||
EXPECT_EQ(kHost2,
|
||||
core_details_map.at(1000 + kDefaultGpuLocalCoreId).hostname());
|
||||
|
||||
// Tear down environment and directory for testing.
|
||||
int64 undeleted_files, undeleted_dirs;
|
||||
|
||||
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_pod_viewer.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
|
||||
@ -30,6 +31,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/pod_viewer.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
|
||||
@ -153,6 +155,23 @@ std::pair<std::string, bool> ConvertXSpaceToMemoryProfile(
|
||||
return std::make_pair(json_output, true);
|
||||
}
|
||||
|
||||
std::pair<std::string, bool> ConvertMultiXSpacesToPodViewer(
|
||||
const std::vector<std::string>& xspace_paths) {
|
||||
OpStatsOptions options;
|
||||
options.generate_op_metrics_db = true;
|
||||
options.generate_step_db = true;
|
||||
OpStats combined_op_stats;
|
||||
Status status = ConvertMultiXSpacesToCombinedOpStats(xspace_paths, options,
|
||||
&combined_op_stats);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "Could not generate OpStats for pod_viewer. Error: "
|
||||
<< status.error_message();
|
||||
return std::make_pair("", false);
|
||||
}
|
||||
return std::make_pair(
|
||||
ConvertOpStatsToPodViewer(combined_op_stats).SerializeAsString(), true);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
|
||||
@ -170,6 +189,8 @@ std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
|
||||
return ConvertMultiXSpacesToKernelStats(xspace_paths);
|
||||
} else if (tool_name == "memory_profile") {
|
||||
return ConvertXSpaceToMemoryProfile(xspace_paths);
|
||||
} else if (tool_name == "pod_viewer") {
|
||||
return ConvertMultiXSpacesToPodViewer(xspace_paths);
|
||||
} else {
|
||||
LOG(WARNING) << "Can not find tool: " << tool_name << ". Please update to "
|
||||
<< "the latest version of Tensorflow.";
|
||||
|
||||
@ -5,13 +5,15 @@ package tensorflow.profiler;
|
||||
option cc_enable_arenas = true;
|
||||
|
||||
// A container of parallel XPlanes, generated by one or more profiling sources.
|
||||
// Next ID: 4
|
||||
// Next ID: 5
|
||||
message XSpace {
|
||||
repeated XPlane planes = 1;
|
||||
// Errors (if any) in the generation of planes.
|
||||
repeated string errors = 2;
|
||||
// Warnings (if any) in the generation of planes;
|
||||
repeated string warnings = 3;
|
||||
// List of hostnames that XPlanes are generated from.
|
||||
repeated string hostnames = 4;
|
||||
}
|
||||
|
||||
// An XPlane is a container of parallel timelines (XLines), generated by a
|
||||
|
||||
@ -50,6 +50,7 @@ Status CollectDataToRepository(const ProfileRequest& request,
|
||||
// Read the profile data into xspace.
|
||||
XSpace xspace;
|
||||
TF_RETURN_IF_ERROR(profiler->CollectData(&xspace));
|
||||
xspace.add_hostnames(request.host_name());
|
||||
VLOG(3) << "Collected XSpace to repository.";
|
||||
response->set_empty_trace(IsEmpty(xspace));
|
||||
|
||||
|
||||
@ -120,6 +120,7 @@ tf_python_pybind_extension(
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core/profiler/convert:xplane_to_tools_data",
|
||||
"//tensorflow/core/profiler/convert:xplane_to_trace_events",
|
||||
"//tensorflow/core/profiler/lib:profiler_session_for_pybind",
|
||||
|
||||
@ -29,6 +29,7 @@ limitations under the License.
|
||||
#include "pybind11/pytypes.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/errors.h"
|
||||
#include "tensorflow/core/platform/host_info.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_tools_data.h"
|
||||
@ -234,6 +235,7 @@ class ProfilerSessionWrapper {
|
||||
tensorflow::profiler::XSpace xspace;
|
||||
tensorflow::Status status;
|
||||
status = session_->CollectData(&xspace);
|
||||
xspace.add_hostnames(tensorflow::port::Hostname());
|
||||
session_.reset();
|
||||
status = tensorflow::profiler::ExportToTensorBoard(xspace, logdir_);
|
||||
tensorflow::MaybeRaiseRegisteredFromStatus(status);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user