Rename (Add|Get)ProfileResult to something more specific; NFC
PiperOrigin-RevId: 174084570
This commit is contained in:
parent
f1916f8f6c
commit
0a7be5a2f5
@ -234,7 +234,7 @@ Status CpuExecutable::ExecuteComputeFunction(
|
||||
for (auto hlo_prof_idx : hlo_to_profile_idx_) {
|
||||
const HloInstruction* hlo = hlo_prof_idx.first;
|
||||
uint64 cycles_taken = profile_counters[hlo_prof_idx.second];
|
||||
hlo_execution_profile->AddProfileResult(hlo, cycles_taken);
|
||||
hlo_execution_profile->SetCyclesTakenBy(hlo, cycles_taken);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -463,7 +463,7 @@ Status ParallelCpuExecutable::ExecuteComputeFunctions(
|
||||
for (auto hlo_prof_idx : hlo_to_profile_idx_) {
|
||||
const HloInstruction* hlo = hlo_prof_idx.first;
|
||||
uint64 cycles_taken = profile_counters[hlo_prof_idx.second];
|
||||
hlo_execution_profile->AddProfileResult(hlo, cycles_taken);
|
||||
hlo_execution_profile->SetCyclesTakenBy(hlo, cycles_taken);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ class HloExecutionProfiler {
|
||||
if (do_profile_) {
|
||||
stream_->ThenStopTimer(per_op_timer_.get());
|
||||
stream_->BlockHostUntilDone();
|
||||
profile_->AddProfileResult(
|
||||
profile_->SetCyclesTakenBy(
|
||||
hlo_instruction, per_op_timer_->Nanoseconds() * clock_rate_ghz_);
|
||||
}
|
||||
}
|
||||
|
@ -27,13 +27,13 @@ limitations under the License.
|
||||
|
||||
namespace xla {
|
||||
|
||||
void HloExecutionProfile::AddProfileResult(const HloInstruction* hlo,
|
||||
void HloExecutionProfile::SetCyclesTakenBy(const HloInstruction* hlo,
|
||||
uint64 cycles_taken) {
|
||||
hlo_to_cycles_taken_[hlo] = cycles_taken;
|
||||
profiled_computations_.insert(hlo->parent());
|
||||
}
|
||||
|
||||
uint64 HloExecutionProfile::GetProfileResult(const HloInstruction& hlo) const {
|
||||
uint64 HloExecutionProfile::GetCyclesTakenBy(const HloInstruction& hlo) const {
|
||||
auto iter = hlo_to_cycles_taken_.find(&hlo);
|
||||
if (iter == hlo_to_cycles_taken_.end()) {
|
||||
return 0;
|
||||
|
@ -36,11 +36,11 @@ class HloExecutionProfile {
|
||||
using DeviceDescription = perftools::gputools::DeviceDescription;
|
||||
|
||||
// Record how many cycles this HLO took to execute.
|
||||
void AddProfileResult(const HloInstruction* hlo, uint64 cycles_taken);
|
||||
void SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken);
|
||||
|
||||
// Returns how many cycles this HLO took to execute. Profiling information
|
||||
// may not be available for some instructions in which case zero is returned.
|
||||
uint64 GetProfileResult(const HloInstruction& hlo) const;
|
||||
uint64 GetCyclesTakenBy(const HloInstruction& hlo) const;
|
||||
|
||||
// Return the number of cycles this computation took to execute.
|
||||
uint64 total_cycles_executed(const HloComputation& computation) const {
|
||||
|
@ -1070,7 +1070,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) {
|
||||
lines.push_back(Printf("[%p]", instr));
|
||||
}
|
||||
if (profile_ != nullptr) {
|
||||
double hlo_cycles_executed = profile_->GetProfileResult(*instr);
|
||||
double hlo_cycles_executed = profile_->GetCyclesTakenBy(*instr);
|
||||
double total_cycles_executed =
|
||||
profile_->total_cycles_executed(*instr->parent());
|
||||
if (hlo_cycles_executed > 0 && total_cycles_executed > 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user