[XLA:GPU] Add more VLOGing to ease debugging.
PiperOrigin-RevId: 328931055 Change-Id: I93547e0f1110c37cf3a60c55e58e6e52f7a46518
This commit is contained in:
parent
24aa11dd48
commit
6ba62e9fea
@ -60,6 +60,7 @@ bool GpuInstructionFusion::ShouldFuseInexpensiveChecks(HloInstruction* consumer,
|
|||||||
|
|
||||||
// Output fusions are not currently supported on GPUs.
|
// Output fusions are not currently supported on GPUs.
|
||||||
if (producer->opcode() == HloOpcode::kFusion) {
|
if (producer->opcode() == HloOpcode::kFusion) {
|
||||||
|
VLOG(4) << "Producer " << producer->name() << " is a fusion op";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Cost condition: not fuse (simple, expensive producers) and (consumers who
|
// Cost condition: not fuse (simple, expensive producers) and (consumers who
|
||||||
@ -67,11 +68,15 @@ bool GpuInstructionFusion::ShouldFuseInexpensiveChecks(HloInstruction* consumer,
|
|||||||
if (producer->opcode() != HloOpcode::kFusion &&
|
if (producer->opcode() != HloOpcode::kFusion &&
|
||||||
consumer->ReusesOperandElements(operand_index) &&
|
consumer->ReusesOperandElements(operand_index) &&
|
||||||
is_expensive(*producer)) {
|
is_expensive(*producer)) {
|
||||||
|
VLOG(4) << "Do not fuse simple, expensive producer " << producer->name()
|
||||||
|
<< " and consumer which reuses operand elements.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IsProducerConsumerFusible(*producer, *consumer) ||
|
if (!IsProducerConsumerFusible(*producer, *consumer) ||
|
||||||
!InstructionFusion::ShouldFuse(consumer, operand_index)) {
|
!InstructionFusion::ShouldFuse(consumer, operand_index)) {
|
||||||
|
VLOG(4) << "Producer " << producer->name()
|
||||||
|
<< " is not fusible or should not be fused.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -107,8 +112,13 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
|
|||||||
fusion_node_evaluations_.emplace(consumer,
|
fusion_node_evaluations_.emplace(consumer,
|
||||||
FusionNodeIndexingEvaluation(consumer));
|
FusionNodeIndexingEvaluation(consumer));
|
||||||
}
|
}
|
||||||
return !fusion_node_evaluations_.at(consumer).AverageCodeDuplicationTooHigh(
|
if (fusion_node_evaluations_.at(consumer).AverageCodeDuplicationTooHigh(
|
||||||
producer);
|
producer)) {
|
||||||
|
VLOG(5) << "Fusion of " << producer->name() << " into " << consumer->name()
|
||||||
|
<< " would result in overly large code duplication.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer,
|
bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer,
|
||||||
|
@ -516,11 +516,12 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
VLOG(5) << "Considering fusion of: " << instruction->ToString();
|
|
||||||
std::vector<int64>& sorted_operand_numbers = next_entry.second;
|
std::vector<int64>& sorted_operand_numbers = next_entry.second;
|
||||||
|
|
||||||
for (int64 i : sorted_operand_numbers) {
|
for (int64 i : sorted_operand_numbers) {
|
||||||
HloInstruction* operand = instruction->mutable_operand(i);
|
HloInstruction* operand = instruction->mutable_operand(i);
|
||||||
|
VLOG(5) << "Considering fusion of: " << instruction->ToString()
|
||||||
|
<< " with operand " << operand->name();
|
||||||
|
|
||||||
if (!operand->IsFusible()) {
|
if (!operand->IsFusible()) {
|
||||||
VLOG(3) << "Operand (" << operand->ToString() << ") is not fusible";
|
VLOG(3) << "Operand (" << operand->ToString() << ") is not fusible";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user