diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 1509da6f7ec..ceaeacbea2a 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1027,10 +1027,13 @@ StatusOr IrEmitter::EmitElementalConvolution( PrimitiveType lhs_element_type = lhs->shape().element_type(); llvm::Type* lhs_llvm_type = llvm_ir::PrimitiveTypeToIrType(lhs_element_type, module_); + // Upcast the accumulator to F32 from F16 for increased precision. + llvm::Type* accumulator_type = + lhs_element_type == F16 ? b_.getFloatTy() : lhs_llvm_type; llvm::Value* sum_address = llvm_ir::EmitAllocaAtFunctionEntry( - lhs_llvm_type, "convolution_sum_address", &b_, + accumulator_type, "convolution_sum_address", &b_, MinimumAlignmentForPrimitiveType(lhs_element_type)); - llvm::Value* constant_zero = llvm::Constant::getNullValue(lhs_llvm_type); + llvm::Value* constant_zero = llvm::Constant::getNullValue(accumulator_type); Store(constant_zero, sum_address); llvm_ir::ForLoopNest loops(IrName(convolution, "inner"), &b_); @@ -1139,11 +1142,11 @@ StatusOr IrEmitter::EmitElementalConvolution( TF_ASSIGN_OR_RETURN(llvm::Value* const kernel_value, kernel_generator(kernel_index)); llvm::Value* product = FMul(input_value, kernel_value); - llvm::Value* sum = FAdd(Load(sum_address), product); + llvm::Value* sum = FAdd(Load(sum_address), FPCast(product, accumulator_type)); Store(sum, sum_address); SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &b_); - return Load(sum_address); + return FPCast(Load(sum_address), lhs_llvm_type); } Status IrEmitter::HandleConvolution(HloInstruction* convolution) { diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 0ab765aefa0..0fae5d966db 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -1842,15 +1842,11 @@ INSTANTIATE_TEST_CASE_P( Convolve1DTestParam{130, 1, 1, 1, 3}, Convolve1DTestParam{64, 1, 1, 1, 1}, Convolve1DTestParam{128, 1, 1, 1, 1}, -// TODO(b/72566306): The following five tests failed on CPU with unreasonable -// relative errors. Last ran on 2018-02-22. -#if XLA_TEST_BACKEND_GPU Convolve1DTestParam{139, 1, 1, 128, 1}, Convolve1DTestParam{640, 3, 3, 128, 1}, Convolve1DTestParam{900, 1, 1, 10, 1}, Convolve1DTestParam{1, 10, 10, 1, 10}, Convolve1DTestParam{1, 10, 130, 1, 1}, -#endif Convolve1DTestParam{1, 10, 130, 1, 2}, Convolve1DTestParam{1, 64, 64, 1, 10}, Convolve1DTestParam{1, 65, 65, 1, 1},