Add thread name to profile result.
PiperOrigin-RevId: 232702876
This commit is contained in:
parent
1362eaa98e
commit
4e928af59b
@ -409,6 +409,21 @@ void StepStatsCollector::Save(const string& device,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StepStatsCollector::SaveThreadName(const string& device,
|
||||||
|
const uint32 thread_id,
|
||||||
|
const string& thread_name) {
|
||||||
|
VLOG(1) << "Save dev " << device << " thread id " << thread_id << " name "
|
||||||
|
<< thread_name;
|
||||||
|
{
|
||||||
|
mutex_lock l(mu_);
|
||||||
|
if (finalized_) {
|
||||||
|
LOG(WARNING) << "thread_name saved after finalize will not be collected.";
|
||||||
|
}
|
||||||
|
auto& thread_names_map = thread_names_[device];
|
||||||
|
thread_names_map[thread_id] = thread_name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
NodeExecStatsInterface* StepStatsCollector::CreateNodeExecStats(
|
NodeExecStatsInterface* StepStatsCollector::CreateNodeExecStats(
|
||||||
const Node* node) {
|
const Node* node) {
|
||||||
// Only collect statistics for non-transfer nodes.
|
// Only collect statistics for non-transfer nodes.
|
||||||
@ -531,5 +546,15 @@ void StepStatsCollector::FinalizeInternal() {
|
|||||||
stats->stats()->Swap(dss->add_node_stats());
|
stats->stats()->Swap(dss->add_node_stats());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (const auto& device_thread : thread_names_) {
|
||||||
|
if (dev_stats_pb.find(device_thread.first) == dev_stats_pb.end()) {
|
||||||
|
// skip device without DeviceStepStats.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
DeviceStepStats* dss = dev_stats_pb.at(device_thread.first);
|
||||||
|
for (const auto& thread_name : device_thread.second) {
|
||||||
|
(*dss->mutable_thread_names())[thread_name.first] = thread_name.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
@ -175,6 +175,10 @@ class StepStatsCollector : public StepStatsCollectorInterface {
|
|||||||
void Save(const string& device, NodeExecStats* node_stats_pb);
|
void Save(const string& device, NodeExecStats* node_stats_pb);
|
||||||
void Save(const string& device, NodeExecStatsWrapper* node_stats);
|
void Save(const string& device, NodeExecStatsWrapper* node_stats);
|
||||||
|
|
||||||
|
// Saves thread name.
|
||||||
|
void SaveThreadName(const string& device, const uint32 thread_id,
|
||||||
|
const string& thread_name);
|
||||||
|
|
||||||
NodeExecStatsInterface* CreateNodeExecStats(const Node* node) override;
|
NodeExecStatsInterface* CreateNodeExecStats(const Node* node) override;
|
||||||
string ReportAllocsOnResourceExhausted(const string& err) override;
|
string ReportAllocsOnResourceExhausted(const string& err) override;
|
||||||
|
|
||||||
@ -191,12 +195,14 @@ class StepStatsCollector : public StepStatsCollectorInterface {
|
|||||||
static const uint64 kMaxCollectedNodes = 1 << 20;
|
static const uint64 kMaxCollectedNodes = 1 << 20;
|
||||||
|
|
||||||
typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeStatsVector;
|
typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeStatsVector;
|
||||||
|
typedef std::unordered_map<uint32, string> ThreadNamesMap;
|
||||||
|
|
||||||
void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_);
|
||||||
|
|
||||||
mutex mu_;
|
mutex mu_;
|
||||||
bool finalized_ GUARDED_BY(mu_);
|
bool finalized_ GUARDED_BY(mu_);
|
||||||
std::unordered_map<string, NodeStatsVector> dev_stats_ GUARDED_BY(mu_);
|
std::unordered_map<string, NodeStatsVector> dev_stats_ GUARDED_BY(mu_);
|
||||||
|
std::unordered_map<string, ThreadNamesMap> thread_names_ GUARDED_BY(mu_);
|
||||||
StepStats* step_stats_ GUARDED_BY(mu_);
|
StepStats* step_stats_ GUARDED_BY(mu_);
|
||||||
uint64 collected_nodes_ GUARDED_BY(mu_) = 0;
|
uint64 collected_nodes_ GUARDED_BY(mu_) = 0;
|
||||||
};
|
};
|
||||||
|
@ -77,6 +77,8 @@ message NodeExecStats {
|
|||||||
message DeviceStepStats {
|
message DeviceStepStats {
|
||||||
string device = 1;
|
string device = 1;
|
||||||
repeated NodeExecStats node_stats = 2;
|
repeated NodeExecStats node_stats = 2;
|
||||||
|
// Its key is thread id.
|
||||||
|
map<uint32, string> thread_names = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
message StepStats {
|
message StepStats {
|
||||||
|
@ -79,6 +79,8 @@ Status HostTracer::CollectDataToCollector(
|
|||||||
|
|
||||||
const string cpu_name = "/host:CPU";
|
const string cpu_name = "/host:CPU";
|
||||||
for (auto& thread : events_) {
|
for (auto& thread : events_) {
|
||||||
|
step_stats_collector->SaveThreadName(cpu_name, thread.thread.tid,
|
||||||
|
thread.thread.name);
|
||||||
for (auto& event : thread.events) {
|
for (auto& event : thread.events) {
|
||||||
if (!event.end_time) {
|
if (!event.end_time) {
|
||||||
auto it = end_times.find(event.activity_id);
|
auto it = end_times.find(event.activity_id);
|
||||||
|
@ -47,6 +47,12 @@ void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
|
|||||||
resource.set_name("0");
|
resource.set_name("0");
|
||||||
resource.set_resource_id(0);
|
resource.set_resource_id(0);
|
||||||
(*device.mutable_resources())[0] = resource;
|
(*device.mutable_resources())[0] = resource;
|
||||||
|
for (const auto& thread_name : device_stats->thread_names()) {
|
||||||
|
tensorflow::tpu::Resource resource;
|
||||||
|
resource.set_resource_id(thread_name.first);
|
||||||
|
resource.set_name(thread_name.second);
|
||||||
|
(*device.mutable_resources())[thread_name.first] = resource;
|
||||||
|
}
|
||||||
(*trace_devices)[device_id] = device;
|
(*trace_devices)[device_id] = device;
|
||||||
|
|
||||||
// Emit events.
|
// Emit events.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user