Support Global Average Pooling in XNNPACK delegate

- MEAN over spatial dimensions is converted as a Global Average Pooling PiperOrigin-RevId: 316031672 Change-Id: Icbecf2ccf2920c701ee2f6b04b6dcf9972b9ce0b
2020-06-11 20:19:12 -07:00 · 2020-06-11 20:19:12 -07:00 · ffc7592c82
commit ffc7592c82
parent 6642441bee
7 changed files with 700 additions and 5 deletions
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@ -172,6 +172,22 @@ cc_library(
    ],
 )
 cc_library(
    name = "reduce_tester",
    testonly = 1,
    srcs = ["reduce_tester.cc"],
    hdrs = ["reduce_tester.h"],
    deps = [
        "//tensorflow/lite:framework",
        "//tensorflow/lite:schema_fbs_version",
        "//tensorflow/lite/c:common",
        "//tensorflow/lite/kernels:builtin_ops",
        "//tensorflow/lite/schema:schema_fbs",
        "@com_google_googletest//:gtest",
        "@flatbuffers",
    ],
 )
 cc_library(
    name = "softmax_tester",
    testonly = 1,
@ -429,6 +445,21 @@ cc_test(
    ],
 )
 cc_test(
    name = "mean_test",
    srcs = ["mean_test.cc"],
    linkopts = select({
        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
        "//conditions:default": [],
    }),
    deps = [
        ":reduce_tester",
        ":test_main",
        ":xnnpack_delegate_test_mode",
        "@com_google_googletest//:gtest",
    ],
 )
 cc_test(
    name = "minimum_test",
    srcs = ["minimum_test.cc"],
--- a/tensorflow/lite/delegates/xnnpack/README.md
+++ b/tensorflow/lite/delegates/xnnpack/README.md
@ -165,6 +165,16 @@ Below is the list of current operators and limitations:
 * Inputs and outputs must be in 32-bit floating-point format.
 ### `MEAN`
 * The first input and the output must be a 4D tensors in 32-bit
  floating-point format.
 * The second input (the input with the axes specification) must be static
  (use `kTfLiteMmapRo` allocation type).
 * Only [1, 2] or [2, 1] axes specification (i.e. reduction across spatial
  dimensions) is supported.
 * Only `keep_dims = True` parameter value is supported.
 ### `MINIMUM`
 * Inputs and outputs must be in 32-bit floating-point format.
--- a/tensorflow/lite/delegates/xnnpack/mean_test.cc
+++ b/tensorflow/lite/delegates/xnnpack/mean_test.cc
@ -0,0 +1,265 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <cstdint>
 #include <functional>
 #include <memory>
 #include <random>
 #include <gtest/gtest.h>
 #include "tensorflow/lite/delegates/xnnpack/reduce_tester.h"
 #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
 namespace tflite {
 namespace xnnpack {
 TEST(Mean, DISABLED_4DReduceBatch) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto height = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({0})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_4DReduceHeight) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto height = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({1})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_4DReduceWidth) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto height = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({2})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, 4DReduceHeightWidth) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto height = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({1, 2})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({2, 1})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_4DReduceChannels) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto height = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({3})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_3DReduceBatch) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, width, channels})
      .Axes({0})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_3DReduceWidth) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, width, channels})
      .Axes({1})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_3DReduceChannels) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, width, channels})
      .Axes({2})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_2DReduceBatch) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, channels})
      .Axes({0})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_2DReduceChannels) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, channels})
      .Axes({1})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 TEST(Mean, DISABLED_1D) {
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  ReduceTester().InputShape({batch}).Axes({0}).Test(BuiltinOperator_MEAN,
                                                    xnnpack_delegate.get());
 }
 TEST(Mean, MultiThreading) {
  TfLiteXNNPackDelegateOptions delegate_options =
      TfLiteXNNPackDelegateOptionsDefault();
  delegate_options.num_threads = 2;
  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
                       TfLiteXNNPackDelegateDelete);
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto shape_rng =
      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
  const auto batch = shape_rng();
  const auto height = shape_rng();
  const auto width = shape_rng();
  const auto channels = shape_rng();
  ReduceTester()
      .InputShape({batch, height, width, channels})
      .Axes({1, 2})
      .Test(BuiltinOperator_MEAN, xnnpack_delegate.get());
 }
 }  // namespace xnnpack
 }  // namespace tflite
--- a/tensorflow/lite/delegates/xnnpack/reduce_tester.cc
+++ b/tensorflow/lite/delegates/xnnpack/reduce_tester.cc
@ -0,0 +1,171 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/delegates/xnnpack/reduce_tester.h"
 #include <array>
 #include <cstdint>
 #include <functional>
 #include <numeric>
 #include <random>
 #include <vector>
 #include <gtest/gtest.h>
 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/version.h"
 namespace tflite {
 namespace xnnpack {
 void ReduceTester::Test(tflite::BuiltinOperator reduce_op,
                        TfLiteDelegate* delegate) const {
  std::random_device random_device;
  auto rng = std::mt19937(random_device());
  auto input_rng = std::bind(
      std::uniform_real_distribution<float>(-15.0f, 15.0f), std::ref(rng));
  std::vector<char> buffer = CreateTfLiteModel(reduce_op);
  const Model* model = GetModel(buffer.data());
  std::unique_ptr<Interpreter> delegate_interpreter;
  ASSERT_EQ(
      InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())(
          &delegate_interpreter),
      kTfLiteOk);
  std::unique_ptr<Interpreter> default_interpreter;
  ASSERT_EQ(
      InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())(
          &default_interpreter),
      kTfLiteOk);
  ASSERT_TRUE(delegate_interpreter);
  ASSERT_TRUE(default_interpreter);
  ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
  ASSERT_EQ(default_interpreter->inputs().size(), 1);
  ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
  ASSERT_EQ(default_interpreter->outputs().size(), 1);
  ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
  ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
  ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
  float* default_input_data = default_interpreter->typed_tensor<float>(
      default_interpreter->inputs()[0]);
  std::generate(default_input_data, default_input_data + InputSize(),
                std::ref(input_rng));
  float* delegate_input_data = delegate_interpreter->typed_tensor<float>(
      delegate_interpreter->inputs()[0]);
  std::copy(default_input_data, default_input_data + InputSize(),
            delegate_input_data);
  ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
  ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
  float* default_output_data = default_interpreter->typed_tensor<float>(
      default_interpreter->outputs()[0]);
  float* delegate_output_data = delegate_interpreter->typed_tensor<float>(
      delegate_interpreter->outputs()[0]);
  const int32_t output_size = OutputSize();
  for (size_t i = 0; i < output_size; i++) {
    ASSERT_NEAR(
        default_output_data[i], delegate_output_data[i],
        std::numeric_limits<float>::epsilon() *
            std::max(std::abs(default_output_data[i]) * RelativeTolerance(),
                     1.0f));
  }
 }
 std::vector<char> ReduceTester::CreateTfLiteModel(
    tflite::BuiltinOperator reduce_op) const {
  flatbuffers::FlatBufferBuilder builder;
  flatbuffers::Offset<OperatorCode> operator_code =
      CreateOperatorCode(builder, reduce_op);
  const std::array<flatbuffers::Offset<Buffer>, 2> buffers{{
      CreateBuffer(builder, builder.CreateVector({})),
      CreateBuffer(builder, builder.CreateVector(
                                reinterpret_cast<const uint8_t*>(Axes().data()),
                                sizeof(int32_t) * Axes().size())),
  }};
  const std::vector<int32_t> output_shape = OutputShape();
  const std::array<int32_t, 1> axes_shape{
      {static_cast<int32_t>(Axes().size())}};
  const std::array<flatbuffers::Offset<Tensor>, 3> tensors{{
      CreateTensor(builder,
                   builder.CreateVector<int32_t>(InputShape().data(),
                                                 InputShape().size()),
                   TensorType_FLOAT32),
      CreateTensor(
          builder,
          builder.CreateVector<int32_t>(axes_shape.data(), axes_shape.size()),
          TensorType_INT32, /*buffer=*/1),
      CreateTensor(builder,
                   builder.CreateVector<int32_t>(output_shape.data(),
                                                 output_shape.size()),
                   TensorType_FLOAT32),
  }};
  const flatbuffers::Offset<ReducerOptions> reducer_options =
      CreateReducerOptions(builder, KeepDims());
  const std::array<int32_t, 2> op_inputs{{0, 1}};
  const std::array<int32_t, 1> op_outputs{{2}};
  flatbuffers::Offset<Operator> op = CreateOperator(
      builder, /*opcode_index=*/0,
      builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
      builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
      tflite::BuiltinOptions_ReducerOptions, reducer_options.Union());
  const std::array<int32_t, 1> subgraph_inputs{{0}};
  const std::array<int32_t, 1> subgraph_outputs{{2}};
  flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
      builder, builder.CreateVector(tensors.data(), tensors.size()),
      builder.CreateVector<int32_t>(subgraph_inputs.data(),
                                    subgraph_inputs.size()),
      builder.CreateVector<int32_t>(subgraph_outputs.data(),
                                    subgraph_outputs.size()),
      builder.CreateVector(&op, 1));
  flatbuffers::Offset<flatbuffers::String> description =
      builder.CreateString("Reduce model");
  flatbuffers::Offset<Model> model_buffer = CreateModel(
      builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1),
      builder.CreateVector(&subgraph, 1), description,
      builder.CreateVector(buffers.data(), buffers.size()));
  builder.Finish(model_buffer);
  return std::vector<char>(builder.GetBufferPointer(),
                           builder.GetBufferPointer() + builder.GetSize());
 }
 int32_t ReduceTester::ComputeSize(const std::vector<int32_t>& shape) {
  return std::accumulate(shape.cbegin(), shape.cend(), 1,
                         std::multiplies<int32_t>());
 }
 }  // namespace xnnpack
 }  // namespace tflite
--- a/tensorflow/lite/delegates/xnnpack/reduce_tester.h
+++ b/tensorflow/lite/delegates/xnnpack/reduce_tester.h
@ -0,0 +1,117 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_REDUCE_TESTER_H_
 #define TENSORFLOW_LITE_DELEGATES_XNNPACK_REDUCE_TESTER_H_
 #include <cstdint>
 #include <unordered_set>
 #include <vector>
 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 namespace tflite {
 namespace xnnpack {
 class ReduceTester {
 public:
  ReduceTester() = default;
  ReduceTester(const ReduceTester&) = delete;
  ReduceTester& operator=(const ReduceTester&) = delete;
  inline ReduceTester& InputShape(std::initializer_list<int32_t> shape) {
    for (auto it = shape.begin(); it != shape.end(); ++it) {
      EXPECT_GT(*it, 0);
    }
    input_shape_ = std::vector<int32_t>(shape.begin(), shape.end());
    input_size_ = ReduceTester::ComputeSize(input_shape_);
    return *this;
  }
  inline const std::vector<int32_t>& InputShape() const { return input_shape_; }
  inline int32_t InputSize() const { return input_size_; }
  inline ReduceTester& Axes(std::initializer_list<int32_t> axes) {
    for (auto it = axes.begin(); it != axes.end(); ++it) {
      EXPECT_GE(*it, 0);
    }
    axes_ = std::vector<int32_t>(axes.begin(), axes.end());
    return *this;
  }
  inline const std::vector<int32_t>& Axes() const { return axes_; }
  inline ReduceTester& KeepDims(bool keep_dims) {
    keep_dims_ = keep_dims;
    return *this;
  }
  inline bool KeepDims() const { return keep_dims_; }
  inline std::vector<int32_t> OutputShape() const {
    std::vector<int32_t> output_shape;
    output_shape.reserve(InputShape().size());
    std::unordered_set<int32_t> axes_set(Axes().cbegin(), Axes().cend());
    for (int32_t i = 0; i < InputShape().size(); i++) {
      if (axes_set.count(i) != 0) {
        if (KeepDims()) {
          output_shape.push_back(1);
        }
      } else {
        output_shape.push_back(InputShape()[i]);
      }
    }
    return output_shape;
  }
  inline int32_t OutputSize() const {
    int32_t output_size = 1;
    std::unordered_set<int32_t> axes_set(Axes().cbegin(), Axes().cend());
    for (int32_t i = 0; i < InputShape().size(); i++) {
      if (axes_set.count(i) == 0) {
        output_size *= InputShape()[i];
      }
    }
    return output_size;
  }
  inline ReduceTester& RelativeTolerance(float relative_tolerance) {
    relative_tolerance_ = relative_tolerance;
    return *this;
  }
  inline float RelativeTolerance() const { return relative_tolerance_; }
  void Test(tflite::BuiltinOperator reduce_op, TfLiteDelegate* delegate) const;
 private:
  std::vector<char> CreateTfLiteModel(tflite::BuiltinOperator reduce_op) const;
  static int32_t ComputeSize(const std::vector<int32_t>& shape);
  std::vector<int32_t> input_shape_;
  std::vector<int32_t> axes_;
  int32_t input_size_;
  bool keep_dims_ = true;
  float relative_tolerance_ = 10.0f;
 };
 }  // namespace xnnpack
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_DELEGATES_XNNPACK_REDUCE_TESTER_H_
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@ -150,8 +150,9 @@ class Subgraph {
      }
      switch (registration->builtin_code) {
        case kTfLiteBuiltinMean:
        case kTfLiteBuiltinPad:
-          // Ignore the second input (static padding), because it is
+          // Ignore the second input (static padding, or axes), because it is
          // represented as parameters of the XNNPACK operator rather than
          // extra input.
          {
@ -723,6 +724,20 @@ class Subgraph {
    return kTfLiteOk;
  }
  static TfLiteStatus CheckAxesTensorShape(TfLiteContext* context,
                                           const TfLiteTensor& tensor,
                                           int tensor_index, int node_index) {
    if (tensor.dims->size != 1) {
      TF_LITE_MAYBE_KERNEL_LOG(context,
                               "unexpected number of shape dimensions (%d) in "
                               "axes tensor #%d in node #%d: "
                               "expected a 1D tensor",
                               tensor.dims->size, tensor_index, node_index);
      return kTfLiteError;
    }
    return kTfLiteOk;
  }
  static TfLiteStatus CheckTensorNonDynamicAllocation(
      TfLiteContext* context, const TfLiteTensor& tensor, int tensor_index,
      int node_index) {
@ -846,6 +861,13 @@ class Subgraph {
      case kTfLiteBuiltinMaximum:
        return VisitMaximumNode(subgraph, logging_context, node_index, node,
                                context->tensors, xnnpack_tensors);
      case kTfLiteBuiltinMean: {
        const TfLiteReducerParams* reducer_params =
            static_cast<const TfLiteReducerParams*>(node->builtin_data);
        return VisitMeanNode(subgraph, logging_context, node_index, node,
                             context->tensors, reducer_params, xnnpack_tensors);
      }
      case kTfLiteBuiltinMinimum:
        return VisitMinimumNode(subgraph, logging_context, node_index, node,
                                context->tensors, xnnpack_tensors);
@ -1723,6 +1745,85 @@ class Subgraph {
    return kTfLiteOk;
  }
  static TfLiteStatus VisitMeanNode(
      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
      TfLiteNode* node, const TfLiteTensor* tensors,
      const TfLiteReducerParams* reducer_params,
      const std::vector<uint32_t>& xnnpack_tensors) {
    TF_LITE_ENSURE_STATUS(
        CheckNumInputsAndOutputs(logging_context, node, 2, 1, node_index));
    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
        logging_context, input_tensor, node->inputs->data[0], node_index));
    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 4,
                                           node->inputs->data[0]));
    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
        logging_context, input_tensor, node->inputs->data[0], node_index));
    const TfLiteTensor& axes_tensor = tensors[node->inputs->data[1]];
    TF_LITE_ENSURE_STATUS(CheckTensorType(logging_context, axes_tensor,
                                          kTfLiteInt32, node->inputs->data[1],
                                          node_index));
    TF_LITE_ENSURE_STATUS(CheckAxesTensorShape(
        logging_context, axes_tensor, node->inputs->data[1], node_index));
    TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation(
        logging_context, axes_tensor, node->inputs->data[1], node_index));
    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
        logging_context, output_tensor, node->outputs->data[0], node_index));
    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 4,
                                           node->outputs->data[0]));
    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
        logging_context, output_tensor, node->outputs->data[0], node_index));
    if (!reducer_params->keep_dims) {
      TF_LITE_MAYBE_KERNEL_LOG(
          logging_context,
          "unsupported MEAN reduction without keep_dims attributes in node %d",
          node_index);
      return kTfLiteError;
    }
    if (axes_tensor.dims->data[0] != 2) {
      TF_LITE_MAYBE_KERNEL_LOG(
          logging_context,
          "unsupported MEAN reduction along %d axes in node %d",
          axes_tensor.dims->data[0], node_index);
      return kTfLiteError;
    }
    const int32_t* axes_data =
        reinterpret_cast<const int32_t*>(axes_tensor.data.data);
    if (std::min(axes_data[0], axes_data[1]) != 1 ||
        std::max(axes_data[0], axes_data[1]) != 2) {
      TF_LITE_MAYBE_KERNEL_LOG(logging_context,
                               "unsupported MEAN reduction along non-spatial "
                               "axes %d and %d in node %d",
                               std::min(axes_data[0], axes_data[1]),
                               std::max(axes_data[0], axes_data[1]),
                               node_index);
      return kTfLiteError;
    }
    if (subgraph != nullptr) {
      const xnn_status status = xnn_define_global_average_pooling_2d(
          subgraph,
          /*output_min=*/-std::numeric_limits<float>::infinity(),
          /*output_max=*/+std::numeric_limits<float>::infinity(),
          /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
      if (status != xnn_status_success) {
        TF_LITE_KERNEL_LOG(logging_context, "failed to delegate MEAN node #%d",
                           node_index);
        return kTfLiteError;
      }
    }
    return kTfLiteOk;
  }
  static TfLiteStatus VisitMediaPipeDeconvolutionNode(
      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
      TfLiteNode* node, const TfLiteTensor* tensors,
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@ -164,11 +164,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
    tf_http_archive(
        name = "XNNPACK",
-        sha256 = "abdb7ec410e5ee5264178973665d0071362223699639dc08de37a4c3ca4b0a61",
+        sha256 = "7469a0a634bfa90395ed311d07a21b1d0003604b37b12745bad1cf17860984e1",
-        strip_prefix = "XNNPACK-af4524811a6d3123aa5fd603a232d97b6be2c7c9",
+        strip_prefix = "XNNPACK-a059b7da184954fb6c01db0e7959352ee805e9f3",
        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/af4524811a6d3123aa5fd603a232d97b6be2c7c9.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/a059b7da184954fb6c01db0e7959352ee805e9f3.zip",
-            "https://github.com/google/XNNPACK/archive/af4524811a6d3123aa5fd603a232d97b6be2c7c9.zip",
+            "https://github.com/google/XNNPACK/archive/a059b7da184954fb6c01db0e7959352ee805e9f3.zip",
        ],
    )