Support DenseImageWarp in TFLite converter

PiperOrigin-RevId: 352730731 Change-Id: I6ab692a0d03fb158e73a8167d169f4f5108a2d75
2021-01-19 23:26:14 -08:00 · 2021-01-19 23:26:14 -08:00 · 7d89345fdb
commit 7d89345fdb
parent c5f474d1c8
6 changed files with 191 additions and 1 deletions
--- a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir
@ -648,3 +648,114 @@ func private @max_unpooling_2d_filter_wrong_type(%arg0: tensor<1x1x2x1xf32>, %ar
  // expected-error @+1 {{'strides' attribute for MaxUnpooling2D does not contain integer values}}
 func private @max_unpooling_2d_strides_wrong_type(%arg0: tensor<1x1x2x1xf32>, %arg1: tensor<1x1x2x1xi32>) -> tensor<1x2x4x1xf32> attributes {tf._implements = #tf.func<@"addons:MaxUnpooling2D", {padding = "SAME", pool_size = [2, 2], strides = ["2", "2"]}>}
 }
+
+// -----
+
+module {
+func @dense_image_warp(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32> {
+  %0 = "tf.PartitionedCall"(%arg0, %arg1) {_collective_manager_ids = [], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01\82\01\00", executor_type = "", f = @__inference_dense_image_warp} : (tensor<2x4x4x1xf32>, tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32>
+  return %0 : tensor<2x4x4x1xf32>
+}
+
+func private @__inference_dense_image_warp(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "addons:DenseImageWarp"} {
+  %0 = "tf.Const"() {value = dense<[[[[0.000000e+00, 0.000000e+00], [0.000000e+00, 1.000000e+00], [0.000000e+00, 2.000000e+00], [0.000000e+00, 3.000000e+00]], [[1.000000e+00, 0.000000e+00], [1.000000e+00, 1.000000e+00], [1.000000e+00, 2.000000e+00], [1.000000e+00, 3.000000e+00]], [[2.000000e+00, 0.000000e+00], [2.000000e+00, 1.000000e+00], [2.000000e+00, 2.000000e+00], [2.000000e+00, 3.000000e+00]], [[3.000000e+00, 0.000000e+00], [3.000000e+00, 1.000000e+00], [3.000000e+00, 2.000000e+00], [3.000000e+00, 3.000000e+00]]]]> : tensor<1x4x4x2xf32>} : () -> tensor<1x4x4x2xf32>
+  %1 = "tf.Const"() {value = dense<[2, 16, 2]> : tensor<3xi32>} : () -> tensor<3xi32>
+  %2 = "tf.Const"() {value = dense<[2, 4, 4, 1]> : tensor<4xi32>} : () -> tensor<4xi32>
+  %3 = "tf.Sub"(%0, %arg1) {device = ""} : (tensor<1x4x4x2xf32>, tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32>
+  %4 = "tf.Reshape"(%3, %1) {device = ""} : (tensor<2x4x4x2xf32>, tensor<3xi32>) -> tensor<2x16x2xf32>
+  %5 = "tf.PartitionedCall"(%arg0, %4) {_collective_manager_ids = [], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01\82\01\00", executor_type = "", f = @__inference_interpolate_bilinear} : (tensor<2x4x4x1xf32>, tensor<2x16x2xf32>) -> tensor<2x16x1xf32>
+  %6 = "tf.Reshape"(%5, %2) {device = ""} : (tensor<2x16x1xf32>, tensor<4xi32>) -> tensor<2x4x4x1xf32>
+  %7 = "tf.Identity"(%6) {device = ""} : (tensor<2x4x4x1xf32>) -> tensor<2x4x4x1xf32>
+  return %7 : tensor<2x4x4x1xf32>
+}
+
+func private @__inference_interpolate_bilinear(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x16x2xf32>) -> tensor<2x16x1xf32> {
+  %0 = "tf.Const"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tf.Const"() {value = dense<1.000000e+00> : tensor<f32>} : () -> tensor<f32>
+  %2 = "tf.Const"() {value = dense<2> : tensor<i32>} : () -> tensor<i32>
+  %3 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  %4 = "tf.Const"() {value = dense<2.000000e+00> : tensor<f32>} : () -> tensor<f32>
+  %5 = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
+  %6 = "tf.Const"() {value = dense<[[0], [16]]> : tensor<2x1xi32>} : () -> tensor<2x1xi32>
+  %7 = "tf.Const"() {value = dense<[32, 1]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %8 = "tf.Const"() {value = dense<4> : tensor<i32>} : () -> tensor<i32>
+  %9 = "tf.Reshape"(%arg0, %7) {device = ""} : (tensor<2x4x4x1xf32>, tensor<2xi32>) -> tensor<32x1xf32>
+  %10:2 = "tf.Unpack"(%arg1) {axis = 2 : i64, device = ""} : (tensor<2x16x2xf32>) -> (tensor<2x16xf32>, tensor<2x16xf32>)
+  %11 = "tf.Floor"(%10#0) {device = ""} : (tensor<2x16xf32>) -> tensor<2x16xf32>
+  %12 = "tf.Maximum"(%0, %11) {device = ""} : (tensor<f32>, tensor<2x16xf32>) -> tensor<2x16xf32>
+  %13 = "tf.Minimum"(%12, %4) {device = ""} : (tensor<2x16xf32>, tensor<f32>) -> tensor<2x16xf32>
+  %14 = "tf.Cast"(%13) {Truncate = false, device = ""} : (tensor<2x16xf32>) -> tensor<2x16xi32>
+  %15 = "tf.AddV2"(%14, %3) {device = ""} : (tensor<2x16xi32>, tensor<i32>) -> tensor<2x16xi32>
+  %16 = "tf.Mul"(%15, %8) {device = ""} : (tensor<2x16xi32>, tensor<i32>) -> tensor<2x16xi32>
+  %17 = "tf.AddV2"(%16, %6) {device = ""} : (tensor<2x16xi32>, tensor<2x1xi32>) -> tensor<2x16xi32>
+  %18 = "tf.Mul"(%14, %8) {device = ""} : (tensor<2x16xi32>, tensor<i32>) -> tensor<2x16xi32>
+  %19 = "tf.AddV2"(%18, %6) {device = ""} : (tensor<2x16xi32>, tensor<2x1xi32>) -> tensor<2x16xi32>
+  %20 = "tf.Sub"(%10#0, %13) {device = ""} : (tensor<2x16xf32>, tensor<2x16xf32>) -> tensor<2x16xf32>
+  %21 = "tf.Maximum"(%0, %20) {device = ""} : (tensor<f32>, tensor<2x16xf32>) -> tensor<2x16xf32>
+  %22 = "tf.Minimum"(%21, %1) {device = ""} : (tensor<2x16xf32>, tensor<f32>) -> tensor<2x16xf32>
+  %23 = "tf.ExpandDims"(%22, %2) {device = ""} : (tensor<2x16xf32>, tensor<i32>) -> tensor<2x16x1xf32>
+  %24 = "tf.Floor"(%10#1) {device = ""} : (tensor<2x16xf32>) -> tensor<2x16xf32>
+  %25 = "tf.Maximum"(%0, %24) {device = ""} : (tensor<f32>, tensor<2x16xf32>) -> tensor<2x16xf32>
+  %26 = "tf.Minimum"(%25, %4) {device = ""} : (tensor<2x16xf32>, tensor<f32>) -> tensor<2x16xf32>
+  %27 = "tf.Cast"(%26) {Truncate = false, device = ""} : (tensor<2x16xf32>) -> tensor<2x16xi32>
+  %28 = "tf.AddV2"(%27, %3) {device = ""} : (tensor<2x16xi32>, tensor<i32>) -> tensor<2x16xi32>
+  %29 = "tf.AddV2"(%17, %28) {device = ""} : (tensor<2x16xi32>, tensor<2x16xi32>) -> tensor<2x16xi32>
+  %30 = "tf.GatherV2"(%9, %29, %5) {batch_dims = 0 : i64, device = ""} : (tensor<32x1xf32>, tensor<2x16xi32>, tensor<i32>) -> tensor<2x16x1xf32>
+  %31 = "tf.AddV2"(%19, %28) {device = ""} : (tensor<2x16xi32>, tensor<2x16xi32>) -> tensor<2x16xi32>
+  %32 = "tf.GatherV2"(%9, %31, %5) {batch_dims = 0 : i64, device = ""} : (tensor<32x1xf32>, tensor<2x16xi32>, tensor<i32>) -> tensor<2x16x1xf32>
+  %33 = "tf.AddV2"(%17, %27) {device = ""} : (tensor<2x16xi32>, tensor<2x16xi32>) -> tensor<2x16xi32>
+  %34 = "tf.GatherV2"(%9, %33, %5) {batch_dims = 0 : i64, device = ""} : (tensor<32x1xf32>, tensor<2x16xi32>, tensor<i32>) -> tensor<2x16x1xf32>
+  %35 = "tf.Sub"(%30, %34) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %36 = "tf.AddV2"(%19, %27) {device = ""} : (tensor<2x16xi32>, tensor<2x16xi32>) -> tensor<2x16xi32>
+  %37 = "tf.GatherV2"(%9, %36, %5) {batch_dims = 0 : i64, device = ""} : (tensor<32x1xf32>, tensor<2x16xi32>, tensor<i32>) -> tensor<2x16x1xf32>
+  %38 = "tf.Sub"(%32, %37) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %39 = "tf.Sub"(%10#1, %26) {device = ""} : (tensor<2x16xf32>, tensor<2x16xf32>) -> tensor<2x16xf32>
+  %40 = "tf.Maximum"(%0, %39) {device = ""} : (tensor<f32>, tensor<2x16xf32>) -> tensor<2x16xf32>
+  %41 = "tf.Minimum"(%40, %1) {device = ""} : (tensor<2x16xf32>, tensor<f32>) -> tensor<2x16xf32>
+  %42 = "tf.ExpandDims"(%41, %2) {device = ""} : (tensor<2x16xf32>, tensor<i32>) -> tensor<2x16x1xf32>
+  %43 = "tf.Mul"(%42, %38) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %44 = "tf.AddV2"(%43, %37) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %45 = "tf.Mul"(%42, %35) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %46 = "tf.AddV2"(%45, %34) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %47 = "tf.Sub"(%46, %44) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %48 = "tf.Mul"(%23, %47) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %49 = "tf.AddV2"(%48, %44) {device = ""} : (tensor<2x16x1xf32>, tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  %50 = "tf.Identity"(%49) {device = ""} : (tensor<2x16x1xf32>) -> tensor<2x16x1xf32>
+  return %50 : tensor<2x16x1xf32>
+}
+
+// CHECK-LABEL: func private @__inference_dense_image_warp(
+// CHECK-SAME:      %arg0: tensor<2x4x4x1xf32>,
+// CHECK-SAME:      %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "DenseImageWarp"} {
+// CHECK-NEXT:    %0 = "tfl.custom"(%arg0, %arg1) {custom_code = "DenseImageWarp", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<2x4x4x1xf32>, tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32>
+// CHECK-NEXT:    return %0 : tensor<2x4x4x1xf32>
+// CHECK-NEXT:  }
+}
+
+// -----
+
+module {
+// expected-error @+1 {{Invalid number of arguments to DenseImageWarp}}
+func private @dense_image_warp_invalid_inputs(%arg0: tensor<2x4x4x1xf32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Image should be a 4D float tensor}}
+func private @dense_image_warp_invalid_input_shape(%arg0: tensor<2x4x4xf32>, %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Flow should be a 4D float tensor}}
+func private @dense_image_warp_invalid_flow_shape(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x4x4xf32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Output should be a 4D float tensor}}
+func private @dense_image_warp_invalid_output_shape(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Image should be a 4D float tensor}}
+func private @dense_image_warp_dynamic_shape(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Image should be a 4D float tensor}}
+func private @dense_image_warp_invalid_input_type(%arg0: tensor<2x4x4x1xi32>, %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Flow should be a 4D float tensor}}
+func private @dense_image_warp_invalid_flow_type(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x4x4x2xi32>) -> tensor<2x4x4x1xf32> attributes {tf._implements = "addons:DenseImageWarp"}
+
+// expected-error @+1 {{Output should be a 4D float tensor}}
+func private @dense_image_warp_invalid_output_type(%arg0: tensor<2x4x4x1xf32>, %arg1: tensor<2x4x4x2xf32>) -> tensor<2x4x4x1xi32> attributes {tf._implements = "addons:DenseImageWarp"}
+}
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
@ -63,6 +63,7 @@ constexpr char kTFTextAPIPrefix[] = "tftext:";
 constexpr char kCustomSSDPostprocessing[] = "TFLite_Detection_PostProcess";
 constexpr char kTfNMSPadded[] = "non_max_suppression_padded_v2";
 constexpr char kCustomMaxUnpooling[] = "addons:MaxUnpooling2D";
+constexpr char kCustomDenseImageWarp[] = "addons:DenseImageWarp";

 using mlir::TF::FuncAttr;

@ -278,6 +279,12 @@ void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func,
      return signalPassFailure();
    }
    convert_nms_padded.RewriteFunc();
+  } else if (attr.getValue() == kCustomDenseImageWarp) {
+    ConvertDenseImageWarpFunc image_warping(func);
+    if (failed(image_warping.VerifySignature()) ||
+        failed(image_warping.RewriteFunc())) {
+      return signalPassFailure();
+    }
  }
 }

--- a/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.cc
@ -31,6 +31,7 @@ namespace {

 constexpr char kTFImplements[] = "tf._implements";
 constexpr char kMaxUnpooling[] = "MaxUnpooling2D";
+constexpr char kImageWarping[] = "DenseImageWarp";

 inline OpaqueElementsAttr CustomOption(OpBuilder* builder,
                                       const std::string& content) {
@ -143,5 +144,58 @@ LogicalResult ConvertMaxUnpoolingFunc::CreateCustomOptions(
  return success();
 }

+LogicalResult ConvertDenseImageWarpFunc::RewriteFunc() {
+  func_.eraseBody();
+  func_.addEntryBlock();
+  func_->setAttr(kTFImplements,
+                 StringAttr::get(kImageWarping, func_.getContext()));
+
+  OpBuilder builder(func_.getBody());
+  auto op = builder.create<CustomOp>(
+      func_.getLoc(), func_.getType().getResults(), func_.getArguments(),
+      kImageWarping, CustomOption(&builder, /*content=*/""));
+  builder.create<ReturnOp>(func_.getLoc(), op.getResults());
+
+  return success();
+}
+
+LogicalResult ConvertDenseImageWarpFunc::VerifySignature() {
+  // Verify high-level function signature.
+  if (func_.getNumArguments() != 2) {
+    return func_.emitError()
+           << "Invalid number of arguments to " << kImageWarping << ": "
+           << func_.getNumArguments();
+  }
+  if (func_.getType().getNumResults() != 1) {
+    return func_.emitError()
+           << "Invalid number of results from " << kImageWarping << ": "
+           << func_.getType().getNumResults();
+  }
+
+  // Check types and shapes.
+  auto image_type =
+      func_.getType().getInput(0).dyn_cast_or_null<RankedTensorType>();
+  if (!image_type || !image_type.getElementType().isF32() ||
+      image_type.getRank() != 4) {
+    return func_.emitError() << "Image should be a 4D float tensor";
+  }
+
+  auto flow_type =
+      func_.getType().getInput(1).dyn_cast_or_null<RankedTensorType>();
+  if (!flow_type || !flow_type.getElementType().isF32() ||
+      flow_type.getRank() != 4) {
+    return func_.emitError() << "Flow should be a 4D float tensor";
+  }
+
+  auto output_type =
+      func_.getType().getResult(0).dyn_cast_or_null<RankedTensorType>();
+  if (!output_type || !output_type.getElementType().isF32() ||
+      output_type.getRank() != 4) {
+    return func_.emitError() << "Output should be a 4D float tensor";
+  }
+
+  return success();
+}
+
 }  // namespace TFL
 }  // namespace mlir
--- a/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.h
@ -41,6 +41,19 @@ class ConvertMaxUnpoolingFunc {
  mlir::TF::FuncAttr attr_;
 };

+// Fuse DenseImageWarp ops annotated by tf.function to a TFLite custom op.
+class ConvertDenseImageWarpFunc {
+ public:
+  explicit ConvertDenseImageWarpFunc(FuncOp func) : func_(func) {}
+
+  LogicalResult RewriteFunc();
+
+  LogicalResult VerifySignature();
+
+ private:
+  FuncOp func_;
+};
+
 }  // end namespace TFL
 }  // end namespace mlir

--- a/tensorflow/lite/python/BUILD
+++ b/tensorflow/lite/python/BUILD
@ -96,7 +96,6 @@ py_library(

 py_library(
    name = "test_util",
-    testonly = 1,
    srcs = ["test_util.py"],
    srcs_version = "PY3",
    deps = [
--- a/tensorflow/lite/testing/generate_examples_lib.py
+++ b/tensorflow/lite/testing/generate_examples_lib.py
@ -239,6 +239,12 @@ class Options(object):
    self.multi_gen_state = None
    self.use_experimental_converter = False
    self.mlir_quantizer = False
+    # The list of ops' name that should exist in the converted model.
+    # This feature is currently only supported in MLIR conversion path.
+    # Example of supported ops' name:
+    # - "AVERAGE_POOL_2D" for builtin op.
+    # - "NumericVerify" for custom op.
+    self.expected_ops_in_converted_model = []


 def _prepare_dir(options):