Add run_id to TraceMes.

PiperOrigin-RevId: 300783157
Change-Id: I4d3f6eeea4454565fc12a2d643e65aa53b7fe7cd
This commit is contained in:
Jiho Choi 2020-03-13 10:39:06 -07:00 committed by TensorFlower Gardener
parent 329dd2f9ac
commit e84820c1b1
3 changed files with 14 additions and 6 deletions

View File

@ -30,6 +30,8 @@ std::string RunId::ToString() const {
return "RunId: " + std::to_string(data_); return "RunId: " + std::to_string(data_);
} }
int64 RunId::ToInt() const { return data_; }
ExecutableRunOptions& ExecutableRunOptions::set_device_ordinal( ExecutableRunOptions& ExecutableRunOptions::set_device_ordinal(
int device_ordinal) { int device_ordinal) {
device_ordinal_ = device_ordinal; device_ordinal_ = device_ordinal;

View File

@ -55,6 +55,7 @@ class RunId {
RunId& operator=(const RunId&) = default; RunId& operator=(const RunId&) = default;
friend bool operator==(const RunId& a, const RunId& b); friend bool operator==(const RunId& a, const RunId& b);
std::string ToString() const; std::string ToString() const;
int64 ToInt() const;
template <typename H> template <typename H>
friend H AbslHashValue(H h, const RunId& id) { friend H AbslHashValue(H h, const RunId& id) {

View File

@ -735,7 +735,10 @@ PyLocalExecutable::ExecuteHelper(
} }
CHECK_EQ(device->host_id(), client_->host_id()); CHECK_EQ(device->host_id(), client_->host_id());
int device_ordinal = device->local_device_state()->device_ordinal(); int device_ordinal = device->local_device_state()->device_ordinal();
tensorflow::profiler::TraceMe traceme("LocalExecutable::Execute"); tensorflow::profiler::TraceMe traceme([&] {
return absl::StrCat("LocalExecutable::Execute#run_id=", run_id.ToInt(),
"#");
});
VLOG(3) << "Replica " << replica << ", partition " << partition VLOG(3) << "Replica " << replica << ", partition " << partition
<< " mapped to device ordinal for execution: " << device_ordinal; << " mapped to device ordinal for execution: " << device_ordinal;
@ -861,8 +864,11 @@ StatusOr<std::vector<std::vector<std::unique_ptr<PyLocalBuffer>>>>
PyLocalExecutable::ExecuteOnLocalDevices( PyLocalExecutable::ExecuteOnLocalDevices(
absl::Span<const std::vector<PyLocalBuffer*>> argument_handles, absl::Span<const std::vector<PyLocalBuffer*>> argument_handles,
const ExecuteOptions& options) const { const ExecuteOptions& options) const {
tensorflow::profiler::TraceMe traceme( RunId run_id;
"LocalExecutable::ExecuteOnLocalDevices"); tensorflow::profiler::TraceMe traceme([&] {
return absl::StrCat(
"LocalExecutable::ExecuteOnLocalDevices#run_id=", run_id.ToInt(), "#");
});
const int num_local_devices = local_devices_.size(); const int num_local_devices = local_devices_.size();
@ -885,10 +891,9 @@ PyLocalExecutable::ExecuteOnLocalDevices(
// current thread. // current thread.
const int replica = local_logical_device_ids_[0].first; const int replica = local_logical_device_ids_[0].first;
const int partition = local_logical_device_ids_[0].second; const int partition = local_logical_device_ids_[0].second;
results[0] = ExecuteHelper(argument_handles[0], replica, partition, RunId(), results[0] =
options); ExecuteHelper(argument_handles[0], replica, partition, run_id, options);
} else { } else {
RunId run_id;
absl::Mutex mu; absl::Mutex mu;
int running = num_local_devices; int running = num_local_devices;
int failed = 0; int failed = 0;