Make the semantics of repository_root more consistent.
PiperOrigin-RevId: 326098252 Change-Id: I036f3943b1e35aa0543906128e0d78f59ccf1735
This commit is contained in:
parent
c717e051fc
commit
45e89479c9
tensorflow
@ -81,7 +81,7 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace,
|
||||
response->set_empty_trace(true);
|
||||
return Status::OK();
|
||||
}
|
||||
TF_RETURN_IF_ERROR(SaveGzippedToolDataToTensorboardProfile(
|
||||
TF_RETURN_IF_ERROR(SaveGzippedToolData(
|
||||
req.repository_root(), req.session_id(), req.host_name(),
|
||||
ToolName(kTraceViewer), TraceEventsToJson(trace)));
|
||||
// Trace viewer is the only tool, skip OpStats conversion.
|
||||
@ -110,7 +110,7 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace,
|
||||
if (tools.contains(kMemoryProfile)) {
|
||||
std::string json_output;
|
||||
TF_RETURN_IF_ERROR(ConvertXSpaceToMemoryProfileJson(xspace, &json_output));
|
||||
TF_RETURN_IF_ERROR(SaveGzippedToolDataToTensorboardProfile(
|
||||
TF_RETURN_IF_ERROR(SaveGzippedToolData(
|
||||
req.repository_root(), req.session_id(), req.host_name(),
|
||||
ToolName(kMemoryProfile), json_output));
|
||||
}
|
||||
|
@ -7,8 +7,7 @@ import "tensorflow/core/profiler/profiler_service.proto";
|
||||
message NewProfileSessionRequest {
|
||||
ProfileRequest request = 1;
|
||||
// The place where we will dump profile data. We will normally use
|
||||
// MODEL_DIR as the repository root. The data will be saved under
|
||||
// MODEL_DIR/plugins/profile/.
|
||||
// MODEL_DIR/plugins/profile as the repository root.
|
||||
string repository_root = 2;
|
||||
repeated string hosts = 3;
|
||||
string session_id = 4;
|
||||
|
@ -48,8 +48,7 @@ message ProfileRequest {
|
||||
ProfileOptions opts = 4;
|
||||
|
||||
// The place where we will dump profile data. We will normally use
|
||||
// MODEL_DIR as the repository root. The data will be saved under
|
||||
// MODEL_DIR/plugins/profile/.
|
||||
// MODEL_DIR/plugins/profile/ as the repository root.
|
||||
string repository_root = 5;
|
||||
|
||||
// The user provided profile session identifier.
|
||||
|
@ -26,6 +26,8 @@ cc_library(
|
||||
"//tensorflow/core/profiler:profiler_options_proto_cc",
|
||||
"//tensorflow/core/profiler:profiler_service_proto_cc",
|
||||
"//tensorflow/core/profiler/convert:xplane_to_profile_response",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core:lib_internal",
|
||||
] + tf_profiler_client_deps(),
|
||||
)
|
||||
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
#include "tensorflow/core/platform/errors.h"
|
||||
#include "tensorflow/core/platform/host_info.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_profile_response.h"
|
||||
@ -47,6 +48,30 @@ MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level,
|
||||
return request;
|
||||
}
|
||||
|
||||
ProfileRequest PopulateProfileRequest(int duration_ms,
|
||||
const std::string& repository_root,
|
||||
const std::string& session_id,
|
||||
const std::string& host_name,
|
||||
const ProfileOptions& opts) {
|
||||
ProfileRequest request;
|
||||
request.set_duration_ms(duration_ms);
|
||||
request.set_max_events(kMaxEvents);
|
||||
request.set_repository_root(repository_root);
|
||||
request.set_session_id(session_id);
|
||||
request.set_host_name(host_name);
|
||||
request.add_tools("trace_viewer");
|
||||
request.add_tools("op_profile");
|
||||
request.add_tools("input_pipeline");
|
||||
request.add_tools("kernel_stats");
|
||||
request.add_tools("memory_viewer");
|
||||
request.add_tools("memory_profile");
|
||||
request.add_tools("overview_page");
|
||||
request.add_tools("pod_viewer");
|
||||
request.add_tools("tensorflow_stats");
|
||||
*request.mutable_opts() = opts;
|
||||
return request;
|
||||
}
|
||||
|
||||
NewProfileSessionRequest PopulateNewProfileSessionRequest(
|
||||
const std::string& service_addr, const std::string& repository_root,
|
||||
const std::vector<string>& hostnames, int duration_ms,
|
||||
@ -87,20 +112,20 @@ Status ConvertXSpaceToToolsInProfileResponse(const ProfileRequest& request,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Profile(const std::string& service_addr, const std::string& logdir,
|
||||
int duration_ms, const std::string& session_id,
|
||||
const ProfileOptions& opts) {
|
||||
Status Profile(const std::string& service_addr,
|
||||
const std::string& repository_root, int duration_ms,
|
||||
const std::string& session_id, const ProfileOptions& opts) {
|
||||
std::vector<std::string> parts = absl::StrSplit(service_addr, ':');
|
||||
ProfileRequest request =
|
||||
PopulateProfileRequest(duration_ms, logdir, session_id, parts[0], opts);
|
||||
ProfileRequest request = PopulateProfileRequest(duration_ms, repository_root,
|
||||
session_id, parts[0], opts);
|
||||
ProfileResponse response;
|
||||
TF_RETURN_IF_ERROR(ProfileGrpc(service_addr, request, &response));
|
||||
|
||||
if (!response.empty_trace()) {
|
||||
TF_RETURN_IF_ERROR(
|
||||
ConvertXSpaceToToolsInProfileResponse(request, &response));
|
||||
TF_RETURN_IF_ERROR(SaveTensorboardProfile(
|
||||
logdir, session_id, request.host_name(), response, &std::cout));
|
||||
TF_RETURN_IF_ERROR(SaveProfile(repository_root, session_id,
|
||||
request.host_name(), response, &std::cout));
|
||||
// Print this at the end so that it's not buried in irrelevant LOG messages.
|
||||
std::cout
|
||||
<< "NOTE: using the trace duration " << duration_ms << "ms.\n"
|
||||
@ -138,30 +163,6 @@ Status NewSession(const std::string& service_addr,
|
||||
|
||||
} // namespace
|
||||
|
||||
ProfileRequest PopulateProfileRequest(int duration_ms,
|
||||
const std::string& repository_root,
|
||||
const std::string& session_id,
|
||||
const std::string& host_name,
|
||||
const ProfileOptions& opts) {
|
||||
ProfileRequest request;
|
||||
request.set_duration_ms(duration_ms);
|
||||
request.set_max_events(kMaxEvents);
|
||||
request.set_repository_root(repository_root);
|
||||
request.set_session_id(session_id);
|
||||
request.set_host_name(host_name);
|
||||
request.add_tools("trace_viewer");
|
||||
request.add_tools("op_profile");
|
||||
request.add_tools("input_pipeline");
|
||||
request.add_tools("kernel_stats");
|
||||
request.add_tools("memory_viewer");
|
||||
request.add_tools("memory_profile");
|
||||
request.add_tools("overview_page");
|
||||
request.add_tools("pod_viewer");
|
||||
request.add_tools("tensorflow_stats");
|
||||
*request.mutable_opts() = opts;
|
||||
return request;
|
||||
}
|
||||
|
||||
// Starts tracing on a single or multiple hosts and saves the result in the
|
||||
// given logdir. If no trace was collected, retries tracing for
|
||||
// num_tracing_attempts.
|
||||
@ -174,6 +175,9 @@ Status Trace(const std::string& service_addr, const std::string& logdir,
|
||||
if (!workers_list.empty()) {
|
||||
hostnames = absl::StrSplit(workers_list, ',');
|
||||
}
|
||||
TF_RETURN_IF_ERROR(MaybeCreateEmptyEventFile(logdir));
|
||||
std::string repository_root =
|
||||
profiler::GetTensorBoardProfilePluginDir(logdir);
|
||||
|
||||
Status status = Status::OK();
|
||||
int remaining_attempts = num_tracing_attempts;
|
||||
@ -181,9 +185,10 @@ Status Trace(const std::string& service_addr, const std::string& logdir,
|
||||
std::cout << "Starting to trace for " << duration_ms << " ms. "
|
||||
<< "Remaining attempt(s): " << --remaining_attempts << std::endl;
|
||||
if (hostnames.empty()) {
|
||||
status = Profile(service_addr, logdir, duration_ms, session_id, opts);
|
||||
status =
|
||||
Profile(service_addr, repository_root, duration_ms, session_id, opts);
|
||||
} else {
|
||||
status = NewSession(service_addr, logdir, hostnames, duration_ms,
|
||||
status = NewSession(service_addr, repository_root, hostnames, duration_ms,
|
||||
session_id, opts);
|
||||
}
|
||||
if (remaining_attempts <= 0 || status.ok() || !ShouldRetryTracing(status))
|
||||
@ -213,5 +218,23 @@ Status Monitor(const std::string& service_addr, int duration_ms,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ExportToTensorBoard(const XSpace& xspace, const std::string& logdir) {
|
||||
TF_RETURN_IF_ERROR(MaybeCreateEmptyEventFile(logdir));
|
||||
|
||||
ProfileResponse response;
|
||||
ProfileRequest request = PopulateProfileRequest(
|
||||
/*duration_ms=*/0, GetTensorBoardProfilePluginDir(logdir),
|
||||
GetCurrentTimeStampAsString(), port::Hostname(), /*opts=*/{});
|
||||
TF_RETURN_IF_ERROR(
|
||||
ConvertXSpaceToProfileResponse(xspace, request, &response));
|
||||
|
||||
std::stringstream ss; // Record LOG messages.
|
||||
TF_RETURN_IF_ERROR(SaveProfile(request.repository_root(),
|
||||
request.session_id(), request.host_name(),
|
||||
response, &ss));
|
||||
LOG(INFO) << ss.str();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -22,15 +22,13 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/profiler/profiler_options.pb.h"
|
||||
#include "tensorflow/core/profiler/profiler_service.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
ProfileRequest PopulateProfileRequest(int duration_ms,
|
||||
const std::string& repository_root,
|
||||
const std::string& session_id,
|
||||
const std::string& host_name,
|
||||
const ProfileOptions& opts);
|
||||
// Convert XSpace to tool data and saves under <logdir>/plugins/profile/.
|
||||
Status ExportToTensorBoard(const XSpace& xspace, const std::string& logdir);
|
||||
|
||||
// Collects one sample of monitoring profile and shows user-friendly metrics.
|
||||
// If timestamp flag is true, timestamp will be displayed in "%H:%M:%S" format.
|
||||
|
@ -82,10 +82,8 @@ string ProfilerJoinPath(const T&... args) {
|
||||
constexpr char kProtoTraceFileName[] = "trace";
|
||||
constexpr char kTfStatsHelperSuffix[] = "tf_stats_helper_result";
|
||||
|
||||
Status DumpToolDataToLogDirectory(absl::string_view run_dir,
|
||||
absl::string_view host,
|
||||
const ProfileToolData& tool,
|
||||
std::ostream* os) {
|
||||
Status DumpToolData(absl::string_view run_dir, absl::string_view host,
|
||||
const ProfileToolData& tool, std::ostream* os) {
|
||||
// Don't save the intermediate results for combining the per host tool data.
|
||||
if (absl::EndsWith(tool.name(), kTfStatsHelperSuffix)) return Status::OK();
|
||||
string host_prefix = host.empty() ? "" : absl::StrCat(host, ".");
|
||||
@ -99,23 +97,6 @@ Status DumpToolDataToLogDirectory(absl::string_view run_dir,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Creates an empty event file if not already exists, which indicates that we
|
||||
// have a plugins/profile/ directory in the current logdir.
|
||||
Status MaybeCreateEmptyEventFile(const string& logdir) {
|
||||
// Suffix for an empty event file. it should be kept in sync with
|
||||
// _EVENT_FILE_SUFFIX in tensorflow/python/eager/profiler.py.
|
||||
constexpr char kProfileEmptySuffix[] = ".profile-empty";
|
||||
std::vector<string> children;
|
||||
TF_RETURN_IF_ERROR(Env::Default()->GetChildren(logdir, &children));
|
||||
for (const string& child : children) {
|
||||
if (absl::EndsWith(child, kProfileEmptySuffix)) {
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
EventsWriter event_writer(ProfilerJoinPath(logdir, "events"));
|
||||
return event_writer.InitWithSuffix(kProfileEmptySuffix);
|
||||
}
|
||||
|
||||
Status WriteGzippedDataToFile(const string& filepath, const string& data) {
|
||||
std::unique_ptr<WritableFile> file;
|
||||
TF_RETURN_IF_ERROR(Env::Default()->NewWritableFile(filepath, &file));
|
||||
@ -129,20 +110,14 @@ Status WriteGzippedDataToFile(const string& filepath, const string& data) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status GetOrCreateProfileRunDir(const string& logdir, const string& run,
|
||||
string* profile_run_dir, std::ostream* os) {
|
||||
// Dumps profile data to <logdir>/plugins/profile/<run>/.
|
||||
*profile_run_dir =
|
||||
ProfilerJoinPath(GetTensorBoardProfilePluginDir(logdir), run);
|
||||
*os << "Creating directory: " << *profile_run_dir;
|
||||
TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(*profile_run_dir));
|
||||
|
||||
// Creates an empty event file so that TensorBoard plugin logic can find
|
||||
// the logdir.
|
||||
TF_RETURN_IF_ERROR(MaybeCreateEmptyEventFile(logdir));
|
||||
Status GetOrCreateRunDir(const string& repository_root, const string& run,
|
||||
string* run_dir, std::ostream* os) {
|
||||
// Dumps profile data to <repository_root>/<run>/.
|
||||
*run_dir = ProfilerJoinPath(repository_root, run);
|
||||
*os << "Creating directory: " << *run_dir;
|
||||
TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(*run_dir));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
string GetTensorBoardProfilePluginDir(const string& logdir) {
|
||||
@ -151,33 +126,42 @@ string GetTensorBoardProfilePluginDir(const string& logdir) {
|
||||
return ProfilerJoinPath(logdir, kPluginName, kProfileName);
|
||||
}
|
||||
|
||||
Status SaveTensorboardProfile(const string& logdir, const string& run,
|
||||
const string& host,
|
||||
const ProfileResponse& response,
|
||||
std::ostream* os) {
|
||||
string profile_run_dir;
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetOrCreateProfileRunDir(logdir, run, &profile_run_dir, os));
|
||||
Status MaybeCreateEmptyEventFile(const string& logdir) {
|
||||
// Suffix for an empty event file. it should be kept in sync with
|
||||
// _EVENT_FILE_SUFFIX in tensorflow/python/eager/profiler.py.
|
||||
constexpr char kProfileEmptySuffix[] = ".profile-empty";
|
||||
std::vector<string> children;
|
||||
TF_RETURN_IF_ERROR(Env::Default()->GetChildren(logdir, &children));
|
||||
for (const string& child : children) {
|
||||
if (absl::EndsWith(child, kProfileEmptySuffix)) {
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
EventsWriter event_writer(ProfilerJoinPath(logdir, "events"));
|
||||
return event_writer.InitWithSuffix(kProfileEmptySuffix);
|
||||
}
|
||||
|
||||
Status SaveProfile(const string& repository_root, const string& run,
|
||||
const string& host, const ProfileResponse& response,
|
||||
std::ostream* os) {
|
||||
string run_dir;
|
||||
TF_RETURN_IF_ERROR(GetOrCreateRunDir(repository_root, run, &run_dir, os));
|
||||
for (const auto& tool_data : response.tool_data()) {
|
||||
TF_RETURN_IF_ERROR(
|
||||
DumpToolDataToLogDirectory(profile_run_dir, host, tool_data, os));
|
||||
TF_RETURN_IF_ERROR(DumpToolData(run_dir, host, tool_data, os));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SaveGzippedToolDataToTensorboardProfile(const string& logdir,
|
||||
const string& run,
|
||||
const string& host,
|
||||
const string& tool_name,
|
||||
const string& data) {
|
||||
string profile_run_dir;
|
||||
Status SaveGzippedToolData(const string& repository_root, const string& run,
|
||||
const string& host, const string& tool_name,
|
||||
const string& data) {
|
||||
string run_dir;
|
||||
std::stringstream ss;
|
||||
Status status = GetOrCreateProfileRunDir(logdir, run, &profile_run_dir, &ss);
|
||||
Status status = GetOrCreateRunDir(repository_root, run, &run_dir, &ss);
|
||||
LOG(INFO) << ss.str();
|
||||
TF_RETURN_IF_ERROR(status);
|
||||
string host_prefix = host.empty() ? "" : absl::StrCat(host, ".");
|
||||
string path =
|
||||
ProfilerJoinPath(profile_run_dir, absl::StrCat(host_prefix, tool_name));
|
||||
string path = ProfilerJoinPath(run_dir, absl::StrCat(host_prefix, tool_name));
|
||||
TF_RETURN_IF_ERROR(WriteGzippedDataToFile(path, data));
|
||||
LOG(INFO) << "Dumped gzipped tool data for " << tool_name << " to " << path;
|
||||
return Status::OK();
|
||||
|
@ -30,21 +30,22 @@ string GetCurrentTimeStampAsString();
|
||||
// Returns the profile plugin directory given a logdir to TensorBoard.
|
||||
string GetTensorBoardProfilePluginDir(const string& logdir);
|
||||
|
||||
// Saves all profiling tool data in a profile to a TensorBoard log directory
|
||||
// with the given run name. This writes user-facing log messages to `os`.
|
||||
// Creates an empty event file if not already exists, which indicates that we
|
||||
// have a plugins/profile/ directory in the current logdir.
|
||||
Status MaybeCreateEmptyEventFile(const string& logdir);
|
||||
|
||||
// Saves all profiling tool data in a profile to <repository_root>/<run>/.
|
||||
// This writes user-facing log messages to `os`.
|
||||
// Note: this function creates a directory even when all fields in
|
||||
// ProfileResponse are unset/empty.
|
||||
Status SaveTensorboardProfile(const string& logdir, const string& run,
|
||||
const string& host,
|
||||
const ProfileResponse& response,
|
||||
std::ostream* os);
|
||||
Status SaveProfile(const string& repository_root, const string& run,
|
||||
const string& host, const ProfileResponse& response,
|
||||
std::ostream* os);
|
||||
|
||||
// Gzip the data and save to the specified filepath.
|
||||
Status SaveGzippedToolDataToTensorboardProfile(const string& logdir,
|
||||
const string& run,
|
||||
const string& host,
|
||||
const string& tool_name,
|
||||
const string& data);
|
||||
// Gzip the data and save to <repository_root>/<run>/.
|
||||
Status SaveGzippedToolData(const string& repository_root, const string& run,
|
||||
const string& host, const string& tool_name,
|
||||
const string& data);
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -120,7 +120,6 @@ tf_python_pybind_extension(
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core/profiler/convert:op_stats_to_tf_stats",
|
||||
"//tensorflow/core/profiler/convert:xplane_to_op_stats",
|
||||
"//tensorflow/core/profiler/convert:xplane_to_profile_response",
|
||||
"//tensorflow/core/profiler/convert:xplane_to_trace_events",
|
||||
"//tensorflow/core/profiler/lib:profiler_session_headers",
|
||||
"//tensorflow/core/profiler/rpc:profiler_server_headers",
|
||||
|
@ -20,7 +20,6 @@ limitations under the License.
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/pytypes.h"
|
||||
#include "tensorflow/core/platform/errors.h"
|
||||
#include "tensorflow/core/platform/host_info.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||
@ -28,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_profile_response.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_trace_events.h"
|
||||
#include "tensorflow/core/profiler/lib/profiler_session.h"
|
||||
#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
|
||||
@ -105,23 +103,7 @@ class ProfilerSessionWrapper {
|
||||
tensorflow::Status status;
|
||||
status = session_->CollectData(&xspace);
|
||||
session_.reset();
|
||||
tensorflow::MaybeRaiseRegisteredFromStatus(status);
|
||||
|
||||
tensorflow::ProfileResponse response;
|
||||
tensorflow::ProfileRequest request =
|
||||
tensorflow::profiler::PopulateProfileRequest(
|
||||
/*duration_ms=*/0, logdir_,
|
||||
tensorflow::profiler::GetCurrentTimeStampAsString(),
|
||||
tensorflow::port::Hostname(), /*opts=*/{});
|
||||
status = tensorflow::profiler::ConvertXSpaceToProfileResponse(
|
||||
xspace, request, &response);
|
||||
tensorflow::MaybeRaiseRegisteredFromStatus(status);
|
||||
|
||||
std::stringstream ss; // Record LOG messages.
|
||||
status = tensorflow::profiler::SaveTensorboardProfile(
|
||||
request.repository_root(), request.session_id(), request.host_name(),
|
||||
response, &ss);
|
||||
LOG(INFO) << ss.str();
|
||||
status = tensorflow::profiler::ExportToTensorBoard(xspace, logdir_);
|
||||
tensorflow::MaybeRaiseRegisteredFromStatus(status);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user