[XLA:GPU] Add layout attributes to LHLO_GPU Convolution operations.

- MLIR MemRefs do not preserve layout information correctly when unit dimensions
  are involved. Operations like convolution that use cuDNN however need the correct
  layout to be preserved so that we do not end up creating an incompatible combination
  of input/filter/output layout that is not supported by cuDNN.
- Add these layouts to convolution attributes in the form of I32ArrayAttr for representing
  the layout in "minor_to_major" form similar to XLA.

PiperOrigin-RevId: 348034757
Change-Id: I4bbccfc713d136335ac3b436a8b657bd34b98fae
This commit is contained in:
Rahul Joshi 2020-12-17 09:25:03 -08:00 committed by TensorFlower Gardener
parent 165b3e83a7
commit fdcfc23591
5 changed files with 58 additions and 8 deletions

View File

@ -21,7 +21,17 @@ include "mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops_base.td"
def ConvolutionBackendConfigAttr : StructAttr<"ConvolutionBackendConfig",
LHLO_GPU_Dialect, [
StructFieldAttr<"algorithm", I64Attr>,
StructFieldAttr<"tensor_ops_enabled", BoolAttr>]> {
StructFieldAttr<"tensor_ops_enabled", BoolAttr>,
// The following 3 attributes describe the layout as an array of integers
// that list the dimensions in minor-to-major order similar to XLA's layout
// representation. operand_0_layout and operand_0_layout described the layout
// of the first 2 operands of the convolution, and result_layout describes
// the layout of the primary output operand of the convolution.
// Note: Not using names like input_layout or filter_layout as `input` may be
// an input operand (for ConvForward) but output for ConvBackward.
StructFieldAttr<"operand_0_layout", I64ArrayAttr>,
StructFieldAttr<"operand_1_layout", I64ArrayAttr>,
StructFieldAttr<"result_layout", I64ArrayAttr>]> {
let description = "GPU Convolution backend configuration";
}

View File

@ -50,8 +50,11 @@ func @conv_forward(%input : memref<1x1x8x8xf16>, %filter: memref<1x1x2x2xf16>, %
feature_group_count = 1,
batch_group_count = 1,
result_scale = 1.0,
backend_config = {algorithm=0, tensor_ops_enabled = true }
}
backend_config = {algorithm=0,
operand_0_layout = [3,2,1,0],
operand_1_layout = [3,2,1,0],
result_layout = [3,2,1,0],
tensor_ops_enabled = true}}
: (memref<1x1x8x8xf16>, memref<1x1x2x2xf16>, memref<1x1x7x7xf16>, memref<32xi8>) -> ()
return
}
@ -60,7 +63,11 @@ func @conv_forward(%input : memref<1x1x8x8xf16>, %filter: memref<1x1x2x2xf16>, %
func @conv_backfilter(%input : memref<3x56x56x16xf64>, %filter: memref<3x3x3x64xf64>, %output: memref<54x54x16x64xf64>) {
%scratch = alloc() : memref<23328xui8>
"lmhlo_gpu.conv_backwardfilter"(%input, %filter, %output, %scratch)
{ backend_config = {algorithm = 1 : i64, tensor_ops_enabled = false},
{ backend_config = {algorithm = 1 : i64,
operand_0_layout = [3,2,1,0],
operand_1_layout = [3,2,1,0],
result_layout = [3,2,1,0],
tensor_ops_enabled = false},
batch_group_count = 1 : i64,
dimension_numbers = {input_batch_dimension = 0 : i64,
input_feature_dimension = 3 : i64,
@ -86,7 +93,11 @@ func @conv_backfilter(%input : memref<3x56x56x16xf64>, %filter: memref<3x3x3x64x
func @conv_backinput(%input : memref<4x5x16x16xf64>, %filter : memref<5x3x7x7xf64>, %output : memref<4x3x16x16xf64>) {
%scratch = alloc() : memref<32xui8>
"lmhlo_gpu.conv_backwardinput"(%input, %filter, %output, %scratch)
{ backend_config = {algorithm = 1 : i64, tensor_ops_enabled = false},
{ backend_config = {algorithm = 1 : i64,
operand_0_layout = [3,2,1,0],
operand_1_layout = [3,2,1,0],
result_layout = [3,2,1,0],
tensor_ops_enabled = false},
batch_group_count = 1 : i64,
dimension_numbers = {input_batch_dimension = 0 : i64,
input_feature_dimension = 1 : i64,
@ -114,7 +125,11 @@ func @conv_fused(%input : memref<1x17x9x9xf16>, %filter : memref<3x3x17x32xf16>,
%scratch = alloc() : memref<32xui8>
"lmhlo_gpu.conv_forward_fused"(%input, %filter, %bias, %output, %scratch)
{activation_mode = "Relu",
backend_config = {algorithm = 0 : i64, tensor_ops_enabled = false},
backend_config = {algorithm = 1 : i64,
operand_0_layout = [3,2,1,0],
operand_1_layout = [3,2,1,0],
result_layout = [3,2,1,0],
tensor_ops_enabled = false},
batch_group_count = 1 : i64,
dimension_numbers = {input_batch_dimension = 0 : i64,
input_feature_dimension = 1 : i64,
@ -141,7 +156,11 @@ func @conv_fused_side_input(%input : memref<1x17x9x9xf16>, %filter : memref<3x3x
%scratch = alloc() : memref<0xui8>
"lmhlo_gpu.conv_forward_fused_with_side_input"(%input, %filter, %bias, %side_input, %output, %scratch)
{activation_mode = "Relu",
backend_config = {algorithm = 0 : i64, tensor_ops_enabled = false},
backend_config = {algorithm = 1 : i64,
operand_0_layout = [3,2,1,0],
operand_1_layout = [3,2,1,0],
result_layout = [3,2,1,0],
tensor_ops_enabled = false},
batch_group_count = 1 : i64,
dimension_numbers = {input_batch_dimension = 0 : i64,
input_feature_dimension = 1 : i64,

View File

@ -154,6 +154,7 @@ cc_library(
"//tensorflow/compiler/xla/service/gpu:backend_configs_cc",
"//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
"//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
"@com_google_absl//absl/algorithm:container",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Pass",

View File

@ -226,6 +226,9 @@ HloModule ConvForward
// CHECK-LABEL: func @main
// CHECK: "lmhlo_gpu.conv_forward"
// CHECK-SAME: algorithm = 2 : i64
// CHECK-SAME: operand_0_layout = [3, 2, 1, 0]
// CKECK-SAME: operand_1_layout = [3, 2, 1, 0]
// CHECK-SAME: result_layout = [3, 2, 1, 0]
// CHECK-SAME: tensor_ops_enabled = false
// CHECK-SAME: batch_group_count = 1 : i64
// CHECK-SAME: input_batch_dimension = 0 : i64
@ -248,7 +251,7 @@ HloModule ConvForward
ENTRY main {
%input = f32[4,256,3,3]{3,2,1,0} parameter(0)
%filter = f32[256,256,2,2]{3,2,1,0} parameter(1)
ROOT %custom-call.1 = (f32[4,256,2,2]{3,2,1,0}, u8[65536]{0}) custom-call(f32[4,256,3,3]{3,2,1,0} %input, f32[256,256,2,2]{3,2,1,0} %filter),
ROOT %custom-call.1 = (f32[4,256,2,2]{3,2, 1,0}, u8[65536]{0}) custom-call(f32[4,256,3,3]{3,2,1,0} %input, f32[256,256,2,2]{3,2,1,0} %filter),
window={size=2x2 rhs_reversal=1x1}, dim_labels=bf01_oi01->bf01,
custom_call_target="__cudnn$convForward",
backend_config="{\"algorithm\":\"2\",\"tensor_ops_enabled\":false,\"conv_result_scale\":1,\"activation_mode\":\"0\",\"side_input_scale\":0}"
@ -260,6 +263,9 @@ ENTRY main {
// CHECK: "lmhlo_gpu.conv_forward_fused"
// CHECK-SAME: activation_mode = "Relu"
// CHECK-SAME: algorithm = 0 : i64
// CHECK-SAME: operand_0_layout = [1, 3, 2, 0]
// CHECK-SAME: operand_1_layout = [2, 1, 0, 3]
// CHECK-SAME: result_layout = [1, 3, 2, 0]
// CHECK-SAME: tensor_ops_enabled = false
// CHECK-SAME: batch_group_count = 1 : i64
// CHECK-SAME: input_batch_dimension = 0 : i64
@ -296,6 +302,9 @@ ENTRY main {
// CHECK: "lmhlo_gpu.conv_forward_fused_with_side_input"
// CHECK-SAME: activation_mode = "Relu"
// CHECK-SAME: algorithm = 0 : i64
// CHECK-SAME: operand_0_layout = [1, 3, 2, 0]
// CHECK-SAME: operand_1_layout = [2, 1, 0, 3]
// CHECK-SAME: result_layout = [1, 3, 2, 0]
// CHECK-SAME: tensor_ops_enabled = false
// CHECK-SAME: batch_group_count = 1 : i64
// CHECK-SAME: input_batch_dimension = 0 : i64

View File

@ -19,6 +19,7 @@ limitations under the License.
#include <memory>
#include <tuple>
#include "absl/algorithm/container.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
@ -659,6 +660,13 @@ StatusOr<Operation*> LhloDialectEmitter::EmitDnnConvolution(
TF_ASSIGN_OR_RETURN(const xla::gpu::CudnnConvKind kind,
xla::gpu::GetCudnnConvKind(custom_call));
auto get_layout_attribute = [&](const xla::Layout& layout) {
std::vector<int64_t> minor_to_major(layout.minor_to_major_size());
absl::c_transform(layout.minor_to_major(), minor_to_major.begin(),
[](xla::int64 x) { return static_cast<int64_t>(x); });
return builder_.getI64ArrayAttr(minor_to_major);
};
auto set_common_conv_attributes = [&, this](auto op) -> Operation* {
const xla::Window& window = custom_call->window();
// Window size for Cudnn Conv is same as the kernel size.
@ -703,6 +711,9 @@ StatusOr<Operation*> LhloDialectEmitter::EmitDnnConvolution(
auto config = mlir::lmhlo_gpu::ConvolutionBackendConfig::get(
builder_.getI64IntegerAttr(backend_config.algorithm()),
builder_.getBoolAttr(backend_config.tensor_ops_enabled()),
get_layout_attribute(custom_call->operand(0)->shape().layout()),
get_layout_attribute(custom_call->operand(1)->shape().layout()),
get_layout_attribute(custom_call->shape().tuple_shapes(0).layout()),
builder_.getContext());
op.backend_configAttr(config);