Add Hostname to XPlane and OpStats in OSS.

Call OpStatsToPodViewer converter in OSS.

PiperOrigin-RevId: 337194132
Change-Id: Idd2de2066012a773eda2cec7d11542d09ecbbfa0
This commit is contained in:
A. Unique TensorFlower 2020-10-14 16:11:49 -07:00 committed by TensorFlower Gardener
parent 162e0e02b8
commit 2882df7cf2
8 changed files with 71 additions and 11 deletions

View File

@ -356,6 +356,7 @@ tf_cc_test(
size = "small",
srcs = ["xplane_to_op_stats_test.cc"],
deps = [
":step_events_to_steps_db",
":xplane_to_op_stats",
":xplane_to_tf_functions",
"//tensorflow/core:lib",
@ -684,6 +685,7 @@ cc_library(
deps = [
":op_stats_to_input_pipeline_analysis",
":op_stats_to_overview_page",
":op_stats_to_pod_viewer",
":op_stats_to_tf_stats",
":xplane_to_memory_profile",
":xplane_to_op_stats",
@ -693,6 +695,7 @@ cc_library(
"//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
"//tensorflow/core/profiler/protobuf:pod_viewer_proto_cc",
"//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/strings",

View File

@ -201,6 +201,11 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
*op_stats.mutable_device_op_metrics_db()->mutable_precision_stats() =
ComputePrecisionStats(nonoverlapped_step_events);
}
CoreDetails& details =
(*op_stats.mutable_core_id_to_details())[kDefaultGpuLocalCoreId];
details.set_hostname(space.hostnames().empty() ? "localhost"
: space.hostnames(0));
return op_stats;
}

View File

@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
@ -36,6 +37,8 @@ namespace tensorflow {
namespace profiler {
namespace {
static constexpr char kXPlanePb[] = "xplane.pb";
TEST(ConvertXPlaneToOpStats, PerfEnv) {
XSpace space;
constexpr double kMaxError = 0.01;
@ -178,9 +181,20 @@ TEST(ConvertXPlaneToOpStats, PropagateAndDedupErrors) {
EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0));
}
TEST(ConvertXPlaneToOpStats, Hostnames) {
XSpace space;
static constexpr char kHost[] = "host1";
*space.add_hostnames() = kHost;
OpStats op_stats = ConvertXSpaceToOpStats(space, OpStatsOptions());
EXPECT_EQ(
kHost,
op_stats.core_id_to_details().at(kDefaultGpuLocalCoreId).hostname());
}
// Helper function to build a XSpace and store it to test directory.
void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
const std::string& xspace_name) {
void BuildAndStoreXSpaceForTest(Env* test_env, absl::string_view test_dir,
absl::string_view hostname) {
constexpr int64 kStepNum = 123;
constexpr int64 kStepId = 456;
// Create a host only XSpace for test.
@ -202,6 +216,9 @@ void BuildAndStoreXSpaceForTest(Env* test_env, const std::string& test_dir,
CreateXEvent(&host_plane_builder, &executor_thread, "aaa:bbb", 30, 70);
GroupTfEvents(&xspace);
xspace.add_hostnames(hostname);
std::string xspace_name = absl::StrCat(hostname, ".", kXPlanePb);
TF_CHECK_OK(
WriteBinaryProto(test_env, io::JoinPath(test_dir, xspace_name), xspace))
<< "Failed to write binary XSpace to file: " << xspace_name;
@ -214,14 +231,17 @@ TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
TF_CHECK_OK(test_env->CreateDir(test_dir))
<< "Failed to create test directory: " << test_dir;
const std::string xspace1 = "xspace1.pb";
const std::string xspace2 = "xspace2.pb";
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace1);
BuildAndStoreXSpaceForTest(test_env, test_dir, xspace2);
static constexpr char kHost1[] = "host1";
static constexpr char kHost2[] = "host2";
BuildAndStoreXSpaceForTest(test_env, test_dir, kHost1);
BuildAndStoreXSpaceForTest(test_env, test_dir, kHost2);
std::vector<std::string> xspace_paths;
xspace_paths.push_back(io::JoinPath(test_dir, xspace1));
xspace_paths.push_back(io::JoinPath(test_dir, xspace2));
xspace_paths.push_back(
io::JoinPath(test_dir, absl::StrCat(kHost1, ".", kXPlanePb)));
xspace_paths.push_back(
io::JoinPath(test_dir, absl::StrCat(kHost2, ".", kXPlanePb)));
OpStatsOptions options;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
@ -248,8 +268,13 @@ TEST(ConvertXPlaneToOpStats, TestConvertMultiXSpacesToCombinedOpStats) {
const auto& step_info_per_core =
combined_op_stats.step_db().step_sequence(0).step_info_per_core();
// global_core_id is computed using: 1000 * host_id + local_core_id.
EXPECT_TRUE(step_info_per_core.contains(1));
EXPECT_TRUE(step_info_per_core.contains(1001));
EXPECT_TRUE(step_info_per_core.contains(kDefaultGpuLocalCoreId));
EXPECT_TRUE(step_info_per_core.contains(1000 + kDefaultGpuLocalCoreId));
const auto& core_details_map = combined_op_stats.core_id_to_details();
EXPECT_EQ(kHost1, core_details_map.at(kDefaultGpuLocalCoreId).hostname());
EXPECT_EQ(kHost2,
core_details_map.at(1000 + kDefaultGpuLocalCoreId).hostname());
// Tear down environment and directory for testing.
int64 undeleted_files, undeleted_dirs;

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
#include "tensorflow/core/profiler/convert/op_stats_to_pod_viewer.h"
#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
@ -30,6 +31,7 @@ limitations under the License.
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
#include "tensorflow/core/profiler/protobuf/pod_viewer.pb.h"
#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
@ -153,6 +155,23 @@ std::pair<std::string, bool> ConvertXSpaceToMemoryProfile(
return std::make_pair(json_output, true);
}
std::pair<std::string, bool> ConvertMultiXSpacesToPodViewer(
const std::vector<std::string>& xspace_paths) {
OpStatsOptions options;
options.generate_op_metrics_db = true;
options.generate_step_db = true;
OpStats combined_op_stats;
Status status = ConvertMultiXSpacesToCombinedOpStats(xspace_paths, options,
&combined_op_stats);
if (!status.ok()) {
LOG(WARNING) << "Could not generate OpStats for pod_viewer. Error: "
<< status.error_message();
return std::make_pair("", false);
}
return std::make_pair(
ConvertOpStatsToPodViewer(combined_op_stats).SerializeAsString(), true);
}
} // namespace
std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
@ -170,6 +189,8 @@ std::pair<std::string, bool> ConvertMultiXSpacesToToolData(
return ConvertMultiXSpacesToKernelStats(xspace_paths);
} else if (tool_name == "memory_profile") {
return ConvertXSpaceToMemoryProfile(xspace_paths);
} else if (tool_name == "pod_viewer") {
return ConvertMultiXSpacesToPodViewer(xspace_paths);
} else {
LOG(WARNING) << "Can not find tool: " << tool_name << ". Please update to "
<< "the latest version of Tensorflow.";

View File

@ -5,13 +5,15 @@ package tensorflow.profiler;
option cc_enable_arenas = true;
// A container of parallel XPlanes, generated by one or more profiling sources.
// Next ID: 4
// Next ID: 5
message XSpace {
repeated XPlane planes = 1;
// Errors (if any) in the generation of planes.
repeated string errors = 2;
// Warnings (if any) in the generation of planes;
repeated string warnings = 3;
// List of hostnames that XPlanes are generated from.
repeated string hostnames = 4;
}
// An XPlane is a container of parallel timelines (XLines), generated by a

View File

@ -50,6 +50,7 @@ Status CollectDataToRepository(const ProfileRequest& request,
// Read the profile data into xspace.
XSpace xspace;
TF_RETURN_IF_ERROR(profiler->CollectData(&xspace));
xspace.add_hostnames(request.host_name());
VLOG(3) << "Collected XSpace to repository.";
response->set_empty_trace(IsEmpty(xspace));

View File

@ -120,6 +120,7 @@ tf_python_pybind_extension(
],
deps = [
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/convert:xplane_to_tools_data",
"//tensorflow/core/profiler/convert:xplane_to_trace_events",
"//tensorflow/core/profiler/lib:profiler_session_for_pybind",

View File

@ -29,6 +29,7 @@ limitations under the License.
#include "pybind11/pytypes.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/host_info.h"
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/convert/xplane_to_tools_data.h"
@ -234,6 +235,7 @@ class ProfilerSessionWrapper {
tensorflow::profiler::XSpace xspace;
tensorflow::Status status;
status = session_->CollectData(&xspace);
xspace.add_hostnames(tensorflow::port::Hostname());
session_.reset();
status = tensorflow::profiler::ExportToTensorBoard(xspace, logdir_);
tensorflow::MaybeRaiseRegisteredFromStatus(status);