Support Mean operation in the most naive way.

PiperOrigin-RevId: 291009742 Change-Id: I13d0afa5287af5418f76058b0f4706e5f68e7a53
2020-01-22 12:42:45 -08:00 · 2020-01-22 12:42:45 -08:00 · de90b76101
commit de90b76101
parent 49055157ec
13 changed files with 550 additions and 5 deletions
--- a/tensorflow/lite/delegates/gpu/common/model_builder.cc
+++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc
@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -2385,6 +2385,51 @@ class Landmarks2TransformMatrixOperationParser : public TFLiteOperationParser {
 private:
 };

+class MeanOperationParser : public TFLiteOperationParser {
+ public:
+  Status IsSupported(const TfLiteContext* context,
+                     const TfLiteNode* tflite_node,
+                     const TfLiteRegistration* registration) final {
+    return CheckInputsOutputs(context, tflite_node, /*inputs=*/1,
+                              /*outputs=*/1);
+  }
+
+  Status Parse(const TfLiteNode* tflite_node,
+               const TfLiteRegistration* registration, GraphFloat32* graph,
+               ObjectReader* reader) final {
+    auto* node = graph->NewNode();
+    node->operation.type = ToString(OperationType::MEAN);
+    RETURN_IF_ERROR(reader->AddInput(node, 0));
+    RETURN_IF_ERROR(reader->AddOutputs(node));
+
+    MeanAttributes attr;
+    Tensor<Linear, DataType::INT32> channel;
+    RETURN_IF_ERROR(reader->ReadTensor(1, &channel));
+    for (int i = 0; i < channel.data.size(); i++) {
+      std::string unsupported;
+      switch (channel.data[i]) {
+        case 1:
+          attr.dims.insert(Axis::HEIGHT);
+          break;
+        case 2:
+          attr.dims.insert(Axis::WIDTH);
+          break;
+        case 0:
+          unsupported = unsupported.empty() ? "batch" : unsupported;
+          ABSL_FALLTHROUGH_INTENDED;
+        case 3:
+          unsupported = unsupported.empty() ? "channels" : unsupported;
+          ABSL_FALLTHROUGH_INTENDED;
+        default:
+          return UnimplementedError(
+              absl::StrCat("Unsupported mean dimension: ", unsupported));
+      }
+    }
+    node->operation.attributes = attr;
+    return OkStatus();
+  }
+};
+
 class UnsupportedOperationParser : public TFLiteOperationParser {
 public:
  Status IsSupported(const TfLiteContext* context,
@ -2433,6 +2478,8 @@ std::unique_ptr<TFLiteOperationParser> NewOperationParser(
      return absl::make_unique<LSTMOperationParser>();
    case kTfLiteBuiltinMaxPool2d:
      return absl::make_unique<Pooling2DOperationParser>(PoolingType::MAX);
+    case kTfLiteBuiltinMean:
+      return absl::make_unique<MeanOperationParser>();
    case kTfLiteBuiltinMirrorPad:
      return absl::make_unique<PadOperationParser>(/*mirror_pad=*/true);
    case kTfLiteBuiltinMul:
--- a/tensorflow/lite/delegates/gpu/common/operations.cc
+++ b/tensorflow/lite/delegates/gpu/common/operations.cc
@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -102,6 +102,8 @@ std::string ToString(enum OperationType op) {
      return "lstm";
    case OperationType::MAX_UNPOOLING_2D:
      return "max_unpooling";
+    case OperationType::MEAN:
+      return "mean";
    case OperationType::MUL:
      return "mul";
    case OperationType::MULTIPLY_SCALAR:
@ -171,6 +173,7 @@ OperationType OperationTypeFromString(const std::string& name) {
          {"log", OperationType::LOG},
          {"lstm", OperationType::LSTM},
          {"max_unpooling", OperationType::MAX_UNPOOLING_2D},
+          {"mean", OperationType::MEAN},
          {"mul", OperationType::MUL},
          {"multiply_scalar", OperationType::MULTIPLY_SCALAR},
          {"pad", OperationType::PAD},
--- a/tensorflow/lite/delegates/gpu/common/operations.h
+++ b/tensorflow/lite/delegates/gpu/common/operations.h
@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -50,6 +50,7 @@ enum class OperationType {
  LOG,
  LSTM,
  MAX_UNPOOLING_2D,
+  MEAN,
  MUL,
  MULTIPLY_SCALAR,
  PAD,
@ -166,6 +167,11 @@ struct MaxUnpooling3DAttributes {
  Padding3D padding;
 };

+struct MeanAttributes {
+  // The vector of dimensions to calculate mean along.
+  std::set<Axis> dims;
+};
+
 struct ConcatAttributes {
  // Defines axis by which to concat on.
  Axis axis = Axis::UNKNOWN;
--- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD
@ -303,6 +303,36 @@ cc_test(
    ],
 )

+cc_library(
+    name = "mean",
+    srcs = ["mean.cc"],
+    hdrs = ["mean.h"],
+    deps = [
+        "//tensorflow/lite/delegates/gpu/common:data_type",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:status",
+        "//tensorflow/lite/delegates/gpu/common:types",
+        "//tensorflow/lite/delegates/gpu/gl:node_shader",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_test(
+    name = "mean_test",
+    srcs = ["mean_test.cc"],
+    tags = [
+        "notap",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":mean",
+        ":test_util",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_library(
    name = "mul",
    srcs = ["mul.cc"],
@ -641,6 +671,7 @@ TFLITE_GPU_BINARY_RELEASE_OPERATORS = [
    "pooling",
    "prelu",
    "relu",
+    "mean",
    "reshape",
    "slice",
    "softmax",
--- a/tensorflow/lite/delegates/gpu/gl/kernels/mean.cc
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mean.cc
@ -0,0 +1,81 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/gl/kernels/mean.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/types.h"
+
+namespace tflite {
+namespace gpu {
+namespace gl {
+namespace {
+
+class Mean : public NodeShader {
+ public:
+  Status GenerateCode(const GenerationContext& ctx,
+                      GeneratedCode* generated_code) const final {
+    auto attr = absl::any_cast<MeanAttributes>(ctx.node->operation.attributes);
+    if (attr.dims != std::set<Axis>({Axis::HEIGHT, Axis::WIDTH})) {
+      return InvalidArgumentError(
+          "Mean calculation is supported only for height and width.");
+    }
+
+    auto input = ctx.graph->FindInputs(ctx.node->id)[0];
+
+    std::vector<Variable> parameters = {
+        {"input_data_0_h", input->tensor.shape.h},
+        {"input_data_0_w", input->tensor.shape.w}};
+
+    std::string source = R"(
+      vec4 sum = vec4(0.0);
+      float size = float($input_data_0_w$ * $input_data_0_h$);
+      for (int w = 0; w < $input_data_0_w$; w++) {
+        for (int h = 0; h < $input_data_0_h$; h++) {
+          sum += $input_data_0[w, h, gid.z]$;
+        }
+      }
+      value_0 = sum / size;
+    )";
+    *generated_code = {
+        /*parameters=*/std::move(parameters),
+        /*objects=*/{},
+        /*shared_variables=*/{},
+        /*workload=*/uint3(),
+        /*workgroup=*/uint3(1, 1, 4),
+        /*source_code=*/std::move(source),
+        /*input=*/IOStructure::ONLY_DEFINITIONS,
+        /*output=*/IOStructure::AUTO,
+    };
+    return OkStatus();
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<NodeShader> NewMeanNodeShader() {
+  return absl::make_unique<Mean>();
+}
+
+}  // namespace gl
+}  // namespace gpu
+}  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/gl/kernels/mean.h
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mean.h
@ -0,0 +1,34 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEAN_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEAN_H_
+
+#include <memory>
+
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/gl/node_shader.h"
+
+namespace tflite {
+namespace gpu {
+namespace gl {
+
+std::unique_ptr<NodeShader> NewMeanNodeShader();
+
+}  // namespace gl
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEAN_H_
--- a/tensorflow/lite/delegates/gpu/gl/kernels/mean_test.cc
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mean_test.cc
@ -0,0 +1,54 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/gl/kernels/mean.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/gl/kernels/test_util.h"
+
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+namespace tflite {
+namespace gpu {
+namespace gl {
+namespace {
+
+TEST(MeanTest, Smoke) {
+  TensorRef<BHWC> input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 2, 2, 1);
+
+  TensorRef<BHWC> output;
+  output.type = DataType::FLOAT32;
+  output.ref = 2;
+  output.shape = BHWC(1, 1, 1, 1);
+
+  MeanAttributes attr;
+  attr.dims = {Axis::HEIGHT, Axis::WIDTH};
+
+  SingleOpModel model({ToString(OperationType::MEAN), attr}, {input}, {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 3.0, 4.0}));
+  ASSERT_OK(model.Invoke(*NewMeanNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {2.5}));
+}
+
+}  // namespace
+}  // namespace gl
+}  // namespace gpu
+}  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc
@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/gl/kernels/elementwise.h"
 #include "tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.h"
 #include "tensorflow/lite/delegates/gpu/gl/kernels/lstm.h"
+#include "tensorflow/lite/delegates/gpu/gl/kernels/mean.h"
 #include "tensorflow/lite/delegates/gpu/gl/kernels/mul.h"
 #include "tensorflow/lite/delegates/gpu/gl/kernels/pad.h"
 #include "tensorflow/lite/delegates/gpu/gl/kernels/pooling.h"
@ -80,6 +81,7 @@ class Registry : public NodeShader {
    insert_op(Type::DEPTHWISE_CONVOLUTION, NewDepthwiseConvolutionNodeShader);
    insert_op(Type::FULLY_CONNECTED, NewFullyConnectedNodeShader);
    insert_op(Type::LSTM, NewLstmNodeShader);
+    insert_op(Type::MEAN, NewMeanNodeShader);
    insert_op(Type::MULTIPLY_SCALAR, NewMultiplyScalarNodeShader);
    insert_op(Type::PAD, NewPadNodeShader);
    insert_op(Type::POOLING_2D, NewPoolingNodeShader);
--- a/tensorflow/lite/delegates/gpu/metal/api.cc
+++ b/tensorflow/lite/delegates/gpu/metal/api.cc
@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h"
 #include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h"
 #include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h"
+#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h"
 #include "tensorflow/lite/delegates/gpu/metal/kernels/mul.h"
 #include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h"
 #include "tensorflow/lite/delegates/gpu/metal/kernels/pooling.h"
@ -194,6 +195,10 @@ Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
          node_id, inputs[0], inputs[1], outputs[0],
          absl::any_cast<MaxUnpooling2DAttributes>(node->operation.attributes));
      break;
+    case OperationType::MEAN:
+      *tasks = Mean(node_id, inputs[0], outputs[0],
+                    absl::any_cast<MeanAttributes>(node->operation.attributes));
+      break;
    case OperationType::MULTIPLY_SCALAR:
      *tasks = Multiply(
          node_id, inputs[0], outputs[0],
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@ -20,6 +20,7 @@ cc_library(
        ":elementwise",
        ":fully_connected",
        ":max_unpooling",
+        ":mean",
        ":mul",
        ":padding",
        ":pooling",
@ -313,6 +314,44 @@ ios_unit_test(
    deps = [":max_unpooling_test_lib"],
 )

+cc_library(
+    name = "mean",
+    srcs = ["mean.cc"],
+    hdrs = ["mean.h"],
+    deps = [
+        ":util",
+        "//tensorflow/lite/delegates/gpu/common:model",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:tensor",
+        "//tensorflow/lite/delegates/gpu/common:util",
+        "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
+        "//tensorflow/lite/delegates/gpu/metal:runtime_options",
+        "@com_google_absl//absl/types:variant",
+    ],
+)
+
+objc_library(
+    name = "mean_test_lib",
+    testonly = 1,
+    srcs = ["mean_test.mm"],
+    sdk_frameworks = ["XCTest"],
+    deps = [
+        ":mean",
+        ":test_util",
+    ],
+)
+
+ios_unit_test(
+    name = "mean_test",
+    testonly = 1,
+    minimum_os_version = "10.0",
+    tags = [
+        "notap",
+        "tflite_not_portable_android",
+    ],
+    deps = [":mean_test_lib"],
+)
+
 cc_library(
    name = "mul",
    srcs = ["mul.cc"],
--- a/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc
@ -0,0 +1,137 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h"
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "absl/strings/substitute.h"
+#include "absl/types/variant.h"
+#include "tensorflow/lite/delegates/gpu/common/data_type.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/tensor.h"
+#include "tensorflow/lite/delegates/gpu/common/util.h"
+#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
+#include "tensorflow/lite/delegates/gpu/metal/kernels/util.h"
+#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
+
+namespace tflite {
+namespace gpu {
+namespace metal {
+
+std::string GetMeanCode() {
+  std::string shader_source = R"(
+    #include <metal_stdlib>
+    using namespace metal;
+    struct uniforms {
+      int4 src_size;
+      int4 dst_size;
+    };
+
+    $0
+    kernel void ComputeFunction(
+                                $1
+                                uint3 gid[[thread_position_in_grid]]) {
+      if (static_cast<int>(gid.x) >= params.dst_size.x ||
+          static_cast<int>(gid.y) >= params.dst_size.y ||
+          static_cast<int>(gid.z) >= params.dst_size.z) {
+        return;
+      }
+
+      float4 sum = float4(0.0);
+      float size = float( params.src_size.x * params.src_size.y);
+      for (int w = 0; w < params.src_size.x; w++) {
+        for (int h = 0; h < params.src_size.y; h++) {
+          const int buffer_index =
+            (gid.z * params.src_size.y + h) * params.src_size.x + w;
+          sum += src_buffer[buffer_index];
+        }
+      }
+      sum /= size;
+      const int linear_index =
+      (gid.z * params.dst_size.y + int(gid.y)) * params.dst_size.x + int(gid.x);
+
+      FLT4 value = FLT4(sum);
+      $2
+      output_buffer[linear_index] = value;
+    }
+  )";
+  return shader_source;
+}
+
+std::vector<ComputeTaskDescriptorPtr> Mean(int id, ValueId input_id,
+                                           ValueId output_id,
+                                           const MeanAttributes& attr) {
+  if (attr.dims != std::set<Axis>({Axis::HEIGHT, Axis::WIDTH})) {
+    // Mean calculation is supported only for height and width
+    return {};
+  }
+
+  auto desc = std::make_shared<ComputeTaskDescriptor>();
+  desc->id = id;
+  desc->is_linkable = false;
+  std::string code = GetMeanCode();
+  desc->shader_source = code;
+
+  desc->input_buffers = {
+      {input_id, "device FLT4* const src_buffer"},
+  };
+
+  desc->output_buffer = {output_id, "device FLT4* output_buffer",
+                         [input_id](const std::map<ValueId, BHWC>& buffers) {
+                           const auto& input_dimension =
+                               buffers.find(input_id)->second;
+                           return BHWC(1, 1, 1, input_dimension.c);
+                         }};
+  desc->uniform_buffers = {
+      {"constant uniforms& params",
+       [input_id, output_id](const std::map<ValueId, BHWC>& buffers) {
+         const auto& dimension = buffers.find(input_id)->second;
+         const auto& output_dimension = buffers.find(output_id)->second;
+         std::vector<int> uniform_params = {
+             dimension.w,
+             dimension.h,
+             IntegralDivideRoundUp(dimension.c, 4),
+             0,
+             output_dimension.w,
+             output_dimension.h,
+             IntegralDivideRoundUp(dimension.c, 4),
+             0};
+         return GetByteBuffer(uniform_params);
+       }},
+  };
+
+  desc->resize_function = [output_id](const std::map<ValueId, BHWC>& buffers) {
+    BHWC dst_shape = buffers.find(output_id)->second;
+    const uint3 grid =
+        uint3(dst_shape.w, dst_shape.h, IntegralDivideRoundUp(dst_shape.c, 4));
+    const uint3 groups_size = GetWorkGroupSizeForGrid(grid);
+    int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x);
+    int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y);
+    int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z);
+    return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z});
+  };
+  return {desc};
+}
+
+}  // namespace metal
+}  // namespace gpu
+}  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/metal/kernels/mean.h
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/mean.h
@ -0,0 +1,36 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_MEAN_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_MEAN_H_
+
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
+#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
+
+namespace tflite {
+namespace gpu {
+namespace metal {
+
+std::vector<ComputeTaskDescriptorPtr> Mean(int id, ValueId input_id,
+                                           ValueId output_id,
+                                           const MeanAttributes& attr);
+
+}  // namespace metal
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_MEAN_H_
--- a/tensorflow/lite/delegates/gpu/metal/kernels/mean_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/mean_test.mm
@ -0,0 +1,70 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h"
+
+#import <XCTest/XCTest.h>
+
+#include <vector>
+
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/tensor.h"
+#include "tensorflow/lite/delegates/gpu/common/util.h"
+#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
+#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
+#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
+
+using ::tflite::gpu::Axis;
+using ::tflite::gpu::BHWC;
+using ::tflite::gpu::DataType;
+using ::tflite::gpu::OperationType;
+using ::tflite::gpu::MeanAttributes;
+using ::tflite::gpu::TensorRef;
+using ::tflite::gpu::metal::CompareVectors;
+using ::tflite::gpu::metal::SingleOpModel;
+
+@interface MeanTest : XCTestCase
+@end
+
+@implementation MeanTest
+- (void)setUp {
+  [super setUp];
+}
+
+- (void)testMeanSmoke {
+  TensorRef<BHWC> input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 2, 2, 1);
+
+  TensorRef<BHWC> output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 1, 1, 1);
+
+  MeanAttributes attr;
+  attr.dims = {Axis::HEIGHT, Axis::WIDTH};
+
+  SingleOpModel model({ToString(OperationType::MEAN), attr}, {input}, {output});
+  XCTAssertTrue(model.PopulateTensor(0, {1.0, 2.0, 3.0, 4.0}));
+  auto status = model.Invoke();
+  XCTAssertTrue(status.ok(), @"%s", status.error_message().c_str());
+  status = CompareVectors({2.5}, model.GetOutput(0), 1e-6f);
+  XCTAssertTrue(status.ok(), @"%s", status.error_message().c_str());
+}
+
+@end