MaxUnpooling task modified to be Metal compatible.

Added max_unpooling_test_util with unified tests. Added Metal max unpooling unit test. PiperOrigin-RevId: 353509117 Change-Id: If9a658f8be8786e731a4fb72ede6b2f023b601a9
2021-01-24 06:58:23 -08:00 · 2021-01-24 06:58:23 -08:00 · 1415cd282c
commit 1415cd282c
parent 2cff21749d
8 changed files with 137 additions and 56 deletions
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@ -340,7 +340,7 @@ cc_test(
        ":cl_test",
        "//tensorflow/lite/delegates/gpu/common:operations",
        "//tensorflow/lite/delegates/gpu/common:status",
-        "//tensorflow/lite/delegates/gpu/common/tasks:max_unpooling",
+        "//tensorflow/lite/delegates/gpu/common/tasks:max_unpooling_test_util",
        "@com_google_googletest//:gtest_main",
    ],
 )
--- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/delegates/gpu/common/tasks/max_unpooling.h"
 #include <vector>
 #include <gmock/gmock.h>
@ -22,9 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
-
+#include "tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.h"
 using ::testing::FloatNear;
 using ::testing::Pointwise;
 namespace tflite {
 namespace gpu {
@ -32,40 +28,8 @@ namespace cl {
 namespace {
 TEST_F(OpenCLOperationTest, MaxUnpooling) {
-  TensorFloat32 src_tensor;
+  auto status = MaxUnpoolingTest(&exec_env_);
-  src_tensor.shape = BHWC(1, 2, 2, 1);
+  ASSERT_TRUE(status.ok()) << status.error_message();
  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
  TensorFloat32 src_ind_tensor;
  src_ind_tensor.shape = BHWC(1, 2, 2, 1);
  src_ind_tensor.data = {0.1f, 1.1f, 2.1f, 3.1f};
  MaxUnpooling2DAttributes attr;
  attr.padding.prepended = HW(0, 0);
  attr.padding.appended = HW(0, 0);
  attr.strides = HW(2, 2);
  attr.kernel = HW(2, 2);
  for (auto storage : env_.GetSupportedStorages()) {
    for (auto precision : env_.GetSupportedPrecisions()) {
      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      GPUOperation operation = CreateMaxUnpooling(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(
          {src_tensor, src_ind_tensor}, creation_context_,
          absl::make_unique<GPUOperation>(std::move(operation)),
          BHWC(1, 4, 4, 1), &dst_tensor));
      EXPECT_THAT(dst_tensor.data,
                  Pointwise(FloatNear(eps),
                            {0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f,
                             0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f, 3.0f}));
    }
  }
 }
 }  // namespace
--- a/tensorflow/lite/delegates/gpu/common/tasks/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/tasks/BUILD
@ -395,6 +395,19 @@ cc_library(
    ],
 )
 cc_library(
    name = "max_unpooling_test_util",
    testonly = 1,
    srcs = ["max_unpooling_test_util.cc"],
    hdrs = ["max_unpooling_test_util.h"],
    deps = [
        ":max_unpooling",
        "//tensorflow/lite/delegates/gpu/common:operations",
        "//tensorflow/lite/delegates/gpu/common:status",
        "//tensorflow/lite/delegates/gpu/common/task:testing_util",
    ],
 )
 cc_library(
    name = "mean_stddev_normalization",
    srcs = ["mean_stddev_normalization.cc"],
--- a/tensorflow/lite/delegates/gpu/common/tasks/max_unpooling.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/max_unpooling.cc
@ -44,24 +44,23 @@ std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def,
  op->AddDstTensor("dst_tensor", dst_desc);
  std::string c;
-  c += "__kernel void main_function(\n";
+  c += "MAIN_FUNCTION($0) {\n";
-  c += "$0) {\n";
+  c += "  int X = GLOBAL_ID_0;\n";
  c += "  int X = get_global_id(0);\n";
  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
-    c += "  int linear_id_1 = get_global_id(1);\n";
+    c += "  int linear_id_1 = GLOBAL_ID_1;\n";
    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
    c += "  int Z = linear_id_1 % args.dst_tensor.Depth();\n";
    c += "  int src_z = (Z + args.padding_z) / args.stride_z;\n";
  } else {
-    c += "  int Y = get_global_id(1);\n";
+    c += "  int Y = GLOBAL_ID_1;\n";
  }
-  c += "  int S = get_global_id(2);\n";
+  c += "  int S = GLOBAL_ID_2;\n";
  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
       "S >= args.dst_tensor.Slices()) { \n";
  c += "    return; \n";
  c += "  } \n";
  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) {
-    c += "  int linear_id_0 = get_global_id(0);\n";
+    c += "  int linear_id_0 = GLOBAL_ID_0;\n";
    c += "  int X0 = linear_id_0 / args.dst_tensor.Batch();\n";
    c += "  int B = linear_id_0 % args.dst_tensor.Batch();\n";
    c += "  int src_x0 = (X0 + args.padding_x * args.dst_tensor.Batch()) / "
@ -83,16 +82,17 @@ std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def,
      c += "  bool outside = src_x < 0 || src_y < 0 || src_x >= "
           "args.src_tensor.Width() || src_y >= args.src_tensor.Height();\n";
    }
-    c += "  FLT4 src = (FLT4)(0.0f);\n";
+    c += "  FLT4 src = INIT_FLT4(0.0f);\n";
-    c += "  int4 ind = (int4)(0);\n";
+    c += "  int4 ind = INIT_INT4v4(0, 0, 0, 0);\n";
    c += "  if (!outside) {\n";
    c += "    src = args.src_tensor.Read(" + src_args + ");\n";
-    c += "    ind = convert_int4(args.src_indices.Read(" + src_args + "));\n";
+    c +=
        "    ind = CONVERT_TO_INT4(args.src_indices.Read(" + src_args + "));\n";
    c += "  }\n";
  } else {
    c += "  FLT4 src = args.src_tensor.Read(" + src_args + ");\n";
-    c +=
+    c += "  int4 ind = CONVERT_TO_INT4(args.src_indices.Read(" + src_args +
-        "  int4 ind = convert_int4(args.src_indices.Read(" + src_args + "));\n";
+         "));\n";
  }
  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) {
    c += "  int t_x = X0 - (src_x0 * args.stride_x - args.padding_x * "
--- a/tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.cc
@ -0,0 +1,67 @@
 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.h"
 #include <vector>
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
 #include "tensorflow/lite/delegates/gpu/common/tasks/max_unpooling.h"
 namespace tflite {
 namespace gpu {
 absl::Status MaxUnpoolingTest(TestExecutionEnvironment* env) {
  TensorFloat32 src_tensor;
  src_tensor.shape = BHWC(1, 2, 2, 1);
  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
  TensorFloat32 src_ind_tensor;
  src_ind_tensor.shape = BHWC(1, 2, 2, 1);
  src_ind_tensor.data = {0.1f, 1.1f, 2.1f, 3.1f};
  MaxUnpooling2DAttributes attr;
  attr.padding.prepended = HW(0, 0);
  attr.padding.appended = HW(0, 0);
  attr.strides = HW(2, 2);
  attr.kernel = HW(2, 2);
  for (auto storage : env->GetSupportedStorages()) {
    for (auto precision : env->GetSupportedPrecisions()) {
      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      GPUOperation operation = CreateMaxUnpooling(op_def, attr);
      RETURN_IF_ERROR(env->ExecuteGPUOperation(
          {src_tensor, src_ind_tensor},
          absl::make_unique<GPUOperation>(std::move(operation)),
          BHWC(1, 4, 4, 1), &dst_tensor));
      RETURN_IF_ERROR(
          PointWiseNear({0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
                         0.0f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f, 3.0f},
                        dst_tensor.data, eps));
    }
  }
  return absl::OkStatus();
 }
 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.h
+++ b/tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.h
@ -0,0 +1,30 @@
 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MAX_UNPOOLING_TEST_UTIL_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MAX_UNPOOLING_TEST_UTIL_H_
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
 namespace tflite {
 namespace gpu {
 absl::Status MaxUnpoolingTest(TestExecutionEnvironment* env);
 }  // namespace gpu
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MAX_UNPOOLING_TEST_UTIL_H_
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@ -275,6 +275,7 @@ objc_library(
    deps = [
        ":max_unpooling",
        ":test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:max_unpooling_test_util",
    ],
 )
@ -765,6 +766,7 @@ objc_library(
        "//tensorflow/lite/delegates/gpu/common/tasks:concat_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:elementwise_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:lstm_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:max_unpooling_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:padding_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:prelu_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:quantize_and_dequantize_test_util",
--- a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling_test.mm
@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/tasks/max_unpooling_test_util.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/util.h"
 #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
@ -35,12 +36,11 @@ using ::tflite::gpu::TensorRef;
 using ::tflite::gpu::metal::CompareVectors;
 using ::tflite::gpu::metal::SingleOpModel;
-@interface MaxUnpoolingTest : XCTestCase
+@interface MaxUnpoolingMetalTest : XCTestCase
@end
-@implementation MaxUnpoolingTest
+@implementation MaxUnpoolingMetalTest {
- (void)setUp {
+  tflite::gpu::metal::MetalExecutionEnvironment exec_env_;
  [super setUp];
 }
 - (void)testKernel2x2Stride2x2 {
@ -76,4 +76,9 @@ using ::tflite::gpu::metal::SingleOpModel;
  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
 }
 - (void)testMaxUnpooling {
  auto status = MaxUnpoolingTest(&exec_env_);
  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
 }
@end