From 6ba62e9feab7136a5308c67b252d94dad6441c30 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Fri, 28 Aug 2020 06:59:24 -0700 Subject: [PATCH] [XLA:GPU] Add more VLOGing to ease debugging. PiperOrigin-RevId: 328931055 Change-Id: I93547e0f1110c37cf3a60c55e58e6e52f7a46518 --- .../compiler/xla/service/gpu/instruction_fusion.cc | 14 ++++++++++++-- .../compiler/xla/service/instruction_fusion.cc | 3 ++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index b994ead17ca..4680f072140 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -60,6 +60,7 @@ bool GpuInstructionFusion::ShouldFuseInexpensiveChecks(HloInstruction* consumer, // Output fusions are not currently supported on GPUs. if (producer->opcode() == HloOpcode::kFusion) { + VLOG(4) << "Producer " << producer->name() << " is a fusion op"; return false; } // Cost condition: not fuse (simple, expensive producers) and (consumers who @@ -67,11 +68,15 @@ bool GpuInstructionFusion::ShouldFuseInexpensiveChecks(HloInstruction* consumer, if (producer->opcode() != HloOpcode::kFusion && consumer->ReusesOperandElements(operand_index) && is_expensive(*producer)) { + VLOG(4) << "Do not fuse simple, expensive producer " << producer->name() + << " and consumer which reuses operand elements."; return false; } if (!IsProducerConsumerFusible(*producer, *consumer) || !InstructionFusion::ShouldFuse(consumer, operand_index)) { + VLOG(4) << "Producer " << producer->name() + << " is not fusible or should not be fused."; return false; } return true; @@ -107,8 +112,13 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, fusion_node_evaluations_.emplace(consumer, FusionNodeIndexingEvaluation(consumer)); } - return !fusion_node_evaluations_.at(consumer).AverageCodeDuplicationTooHigh( - producer); + if (fusion_node_evaluations_.at(consumer).AverageCodeDuplicationTooHigh( + producer)) { + VLOG(5) << "Fusion of " << producer->name() << " into " << consumer->name() + << " would result in overly large code duplication."; + return false; + } + return true; } bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer, diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index b290b1bd68b..2085b1ea4d0 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -516,11 +516,12 @@ StatusOr InstructionFusion::Run(HloModule* module) { continue; } - VLOG(5) << "Considering fusion of: " << instruction->ToString(); std::vector& sorted_operand_numbers = next_entry.second; for (int64 i : sorted_operand_numbers) { HloInstruction* operand = instruction->mutable_operand(i); + VLOG(5) << "Considering fusion of: " << instruction->ToString() + << " with operand " << operand->name(); if (!operand->IsFusible()) { VLOG(3) << "Operand (" << operand->ToString() << ") is not fusible";