Add thread name to profile result.

PiperOrigin-RevId: 232702876
This commit is contained in:
A. Unique TensorFlower 2019-02-06 10:46:02 -08:00 committed by TensorFlower Gardener
parent 1362eaa98e
commit 4e928af59b
5 changed files with 41 additions and 0 deletions

View File

@ -409,6 +409,21 @@ void StepStatsCollector::Save(const string& device,
}
}
void StepStatsCollector::SaveThreadName(const string& device,
const uint32 thread_id,
const string& thread_name) {
VLOG(1) << "Save dev " << device << " thread id " << thread_id << " name "
<< thread_name;
{
mutex_lock l(mu_);
if (finalized_) {
LOG(WARNING) << "thread_name saved after finalize will not be collected.";
}
auto& thread_names_map = thread_names_[device];
thread_names_map[thread_id] = thread_name;
}
}
NodeExecStatsInterface* StepStatsCollector::CreateNodeExecStats(
const Node* node) {
// Only collect statistics for non-transfer nodes.
@ -531,5 +546,15 @@ void StepStatsCollector::FinalizeInternal() {
stats->stats()->Swap(dss->add_node_stats());
}
}
for (const auto& device_thread : thread_names_) {
if (dev_stats_pb.find(device_thread.first) == dev_stats_pb.end()) {
// skip device without DeviceStepStats.
continue;
}
DeviceStepStats* dss = dev_stats_pb.at(device_thread.first);
for (const auto& thread_name : device_thread.second) {
(*dss->mutable_thread_names())[thread_name.first] = thread_name.second;
}
}
}
} // namespace tensorflow

View File

@ -175,6 +175,10 @@ class StepStatsCollector : public StepStatsCollectorInterface {
void Save(const string& device, NodeExecStats* node_stats_pb);
void Save(const string& device, NodeExecStatsWrapper* node_stats);
// Saves thread name.
void SaveThreadName(const string& device, const uint32 thread_id,
const string& thread_name);
NodeExecStatsInterface* CreateNodeExecStats(const Node* node) override;
string ReportAllocsOnResourceExhausted(const string& err) override;
@ -191,12 +195,14 @@ class StepStatsCollector : public StepStatsCollectorInterface {
static const uint64 kMaxCollectedNodes = 1 << 20;
typedef std::vector<std::unique_ptr<NodeExecStatsWrapper>> NodeStatsVector;
typedef std::unordered_map<uint32, string> ThreadNamesMap;
void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_);
mutex mu_;
bool finalized_ GUARDED_BY(mu_);
std::unordered_map<string, NodeStatsVector> dev_stats_ GUARDED_BY(mu_);
std::unordered_map<string, ThreadNamesMap> thread_names_ GUARDED_BY(mu_);
StepStats* step_stats_ GUARDED_BY(mu_);
uint64 collected_nodes_ GUARDED_BY(mu_) = 0;
};

View File

@ -77,6 +77,8 @@ message NodeExecStats {
message DeviceStepStats {
string device = 1;
repeated NodeExecStats node_stats = 2;
// Its key is thread id.
map<uint32, string> thread_names = 3;
}
message StepStats {

View File

@ -79,6 +79,8 @@ Status HostTracer::CollectDataToCollector(
const string cpu_name = "/host:CPU";
for (auto& thread : events_) {
step_stats_collector->SaveThreadName(cpu_name, thread.thread.tid,
thread.thread.name);
for (auto& event : thread.events) {
if (!event.end_time) {
auto it = end_times.find(event.activity_id);

View File

@ -47,6 +47,12 @@ void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
resource.set_name("0");
resource.set_resource_id(0);
(*device.mutable_resources())[0] = resource;
for (const auto& thread_name : device_stats->thread_names()) {
tensorflow::tpu::Resource resource;
resource.set_resource_id(thread_name.first);
resource.set_name(thread_name.second);
(*device.mutable_resources())[thread_name.first] = resource;
}
(*trace_devices)[device_id] = device;
// Emit events.