From 9fa8f4ad49ae5e22682d5d930b70b408ac3b4a48 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 29 Sep 2020 17:11:23 -0700 Subject: [PATCH] [MLIR] Fix CreateView() to not drop slice offset incorrectly. - A slice can be mapped to memref for the allocation only if slice offset is 0. - Minor comment tweaks. PiperOrigin-RevId: 334494277 Change-Id: Iac9251162b8470b8705eb0f2866d010cf83785b1 --- .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 18 +++++++++--------- tensorflow/compiler/mlir/xla/type_to_shape.cc | 3 +-- .../xla/service/gpu/ir_emitter_unnested.cc | 6 ++---- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index a731b5a4a50..b60d95d1ddf 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -294,7 +294,7 @@ Status LhloDialectEmitter::CreateView(const HloInstruction* instr, TF_ASSIGN_OR_RETURN(BufferAllocation::Slice slice, assignment_.GetUniqueSlice(instr, *current_shape_index)); Value alloc = allocations_[slice.allocation()]; - if (alloc.getType() == out_type) { + if (alloc.getType() == out_type && slice.offset() == 0) { values->push_back(alloc); return Status::OK(); } @@ -337,16 +337,16 @@ Status LhloDialectEmitter::CreateView(const HloInstruction* instr, Status LhloDialectEmitter::GetOrCreateView(const HloInstruction* instr, SmallVectorImpl* values) { // Cache generated ViewOp and StaticMemRefCastOp by instruction. We could have - // gone fancier to do the following cacheing: - // %range = ViewOp(%allocation, %offset) : memref - // %typed_range = ViewOp(%range) : memref + // gone fancier to do the following caching: + // %slice = ViewOp(%allocation, %offset) : memref + // %typed_slice = ViewOp(%slice) : memref // - // where %range is cached. This in theory gives easier time for alias - // analysis, since the identity of %range defines alias. However, - // %typed_range can't be cached, as different buffers with different types and + // where %slice is cached. This in theory gives easier time for alias + // analysis, since the identity of %slice defines alias. However, + // %typed_slice can't be cached, as different buffers with different types and // shapes may still alias. Creating two ViewOps doesn't seem to worth the // effort for a slightly easier aliasing, so we don't over optimize here. - auto result = slices_.try_emplace(instr, llvm::SmallVector{}); + auto result = slices_.try_emplace(instr, llvm::SmallVector{}); llvm::SmallVectorImpl& new_values = result.first->second; if (result.second) { ::xla::ShapeIndex shape_index; @@ -373,7 +373,7 @@ Status LhloDialectEmitter::Initialize() { if (computation_.IsEntryComputation()) { // Sort the rather arbitrarily ordered allocations to match the input/output - // parameters. Specifically We want to sort buffer allocations in the + // parameters. Specifically we want to sort buffer allocations in the // following order: // * Parameters always order before non-parameters. // * Different parameters order by parameter number. diff --git a/tensorflow/compiler/mlir/xla/type_to_shape.cc b/tensorflow/compiler/mlir/xla/type_to_shape.cc index b725f56b455..9aca3ce7f98 100644 --- a/tensorflow/compiler/mlir/xla/type_to_shape.cc +++ b/tensorflow/compiler/mlir/xla/type_to_shape.cc @@ -102,8 +102,7 @@ Shape TypeToShape(mlir::Type type) { if (ptype != PrimitiveType::PRIMITIVE_TYPE_INVALID) return ShapeUtil::MakeShape(ptype, {}); - if (type.isBF16() || type.isF32() || type.isF64() || - type.isa()) { + if (type.isIntOrFloat()) { auto* context = type.getContext(); mlir::emitError(mlir::UnknownLoc::get(context)) << "lowering should have been handled by primitive type lowering for " diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index f7627c348b6..a388dd6ac1b 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1396,16 +1396,14 @@ Status IrEmitterUnnested::EmitSortFromMlir(MlirEmitterInput input) { std::vector output_shapes(sort_op.output().size()); for (int i = 0; i < operand_count; i++) { - operand_shapes[i] = - TypeToShape(sort_op.operands()[i].getType().cast()); + operand_shapes[i] = TypeToShape(sort_op.operands()[i].getType()); } // Craft n + 1 slices, where the first n are output parameters, and the last // is the on-device tuple storage. We don't need n operands because sorting // kernels are always in-place. for (int i = 0; i < operand_count; i++) { - output_shapes[i] = - TypeToShape(sort_op.output()[i].getType().cast()); + output_shapes[i] = TypeToShape(sort_op.output()[i].getType()); MlirBufferSlice slice; TF_ASSIGN_OR_RETURN( slice.buffer_slice,