diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD index 0e6e6c98438..8b51bf28b87 100644 --- a/tensorflow/lite/delegates/xnnpack/BUILD +++ b/tensorflow/lite/delegates/xnnpack/BUILD @@ -75,6 +75,22 @@ cc_library( ], ) +cc_library( + name = "fully_connected_tester", + testonly = 1, + srcs = ["fully_connected_tester.cc"], + hdrs = ["fully_connected_tester.h"], + deps = [ + "//tensorflow/lite:framework", + "//tensorflow/lite:schema_fbs_version", + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/schema:schema_fbs", + "@com_google_googletest//:gtest", + "@flatbuffers", + ], +) + cc_library( name = "pool_2d_tester", testonly = 1, @@ -199,6 +215,21 @@ cc_test( ], ) +cc_test( + name = "fully_connected_test", + srcs = ["fully_connected_test.cc"], + linkopts = select({ + "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS, + "//conditions:default": [], + }), + deps = [ + ":fully_connected_tester", + ":test_main", + ":xnnpack_delegate_test_mode", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "hard_swish_test", srcs = ["hard_swish_test.cc"], diff --git a/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc new file mode 100644 index 00000000000..a801ce141ed --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc @@ -0,0 +1,326 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include +#include "tensorflow/lite/delegates/xnnpack/fully_connected_tester.h" +#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" + +namespace tflite { +namespace xnnpack { + +TEST(FullyConnected, 1D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 1DKeepDims) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .KeepDims(true) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 2D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = batch_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 2DKeepDims) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = batch_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .KeepDims(true) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 3D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, width, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 3DReshape) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, width, input_channels}) + .InputChannels(width * input_channels) + .OutputChannels(output_channels) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 3DKeepDims) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = shape_rng(); + const auto width = shape_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, width, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .KeepDims(true) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 4D) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, height, width, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, 4DKeepDims) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto shape_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = shape_rng(); + const auto height = shape_rng(); + const auto width = shape_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, height, width, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .KeepDims(true) + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, ReluActivation) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = batch_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .ReluActivation() + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, Relu6Activation) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = batch_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .Relu6Activation() + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, ReluMinus1To1Activation) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = batch_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .ReluMinus1To1Activation() + .Test(xnnpack_delegate.get()); +} + +TEST(FullyConnected, MultiThreading) { + TfLiteXNNPackDelegateOptions delegate_options = + TfLiteXNNPackDelegateOptionsDefault(); + delegate_options.num_threads = 2; + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 5), std::ref(rng)); + auto channels_rng = + std::bind(std::uniform_int_distribution(2, 9), std::ref(rng)); + const auto batch = batch_rng(); + const auto input_channels = channels_rng(); + const auto output_channels = channels_rng(); + + FullyConnectedTester() + .InputShape({batch, input_channels}) + .InputChannels(input_channels) + .OutputChannels(output_channels) + .Test(xnnpack_delegate.get()); +} + +} // namespace xnnpack +} // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc b/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc new file mode 100644 index 00000000000..05716bf18fb --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc @@ -0,0 +1,219 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/xnnpack/fully_connected_tester.h" + +#include +#include +#include +#include +#include +#include + +#include +#include "flatbuffers/flatbuffers.h" // from @flatbuffers +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +namespace tflite { +namespace xnnpack { + +std::vector FullyConnectedTester::OutputShape() const { + EXPECT_NE(input_shape_.size(), 0); + if (KeepDims()) { + std::vector output_shape(input_shape_.cbegin(), + input_shape_.cend() - 1); + output_shape.push_back(OutputChannels()); + return output_shape; + } else { + EXPECT_EQ(InputSize() % InputChannels(), 0); + return std::vector( + {InputSize() / InputChannels(), OutputChannels()}); + } +} + +void FullyConnectedTester::Test(TfLiteDelegate* delegate) const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto input_rng = + std::bind(std::uniform_real_distribution(), std::ref(rng)); + + std::vector buffer = CreateTfLiteModel(); + const Model* model = GetModel(buffer.data()); + + std::unique_ptr delegate_interpreter; + ASSERT_EQ( + InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())( + &delegate_interpreter), + kTfLiteOk); + std::unique_ptr default_interpreter; + ASSERT_EQ( + InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())( + &default_interpreter), + kTfLiteOk); + + ASSERT_TRUE(delegate_interpreter); + ASSERT_TRUE(default_interpreter); + + ASSERT_EQ(delegate_interpreter->inputs().size(), 1); + ASSERT_EQ(default_interpreter->inputs().size(), 1); + + ASSERT_EQ(delegate_interpreter->outputs().size(), 1); + ASSERT_EQ(default_interpreter->outputs().size(), 1); + + ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk); + + ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk); + + float* default_input_data = default_interpreter->typed_tensor( + default_interpreter->inputs()[0]); + std::generate(default_input_data, default_input_data + InputSize(), + std::ref(input_rng)); + + float* delegate_input_data = delegate_interpreter->typed_tensor( + delegate_interpreter->inputs()[0]); + std::copy(default_input_data, default_input_data + InputSize(), + delegate_input_data); + + ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk); + ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk); + + float* default_output_data = default_interpreter->typed_tensor( + default_interpreter->outputs()[0]); + float* delegate_output_data = delegate_interpreter->typed_tensor( + delegate_interpreter->outputs()[0]); + + for (size_t i = 0; i < ComputeSize(OutputShape()); i++) { + ASSERT_NEAR(default_output_data[i], delegate_output_data[i], + std::numeric_limits::epsilon() * + std::max(std::abs(default_output_data[i]) * 10.0f, 1.0f)); + } +} + +std::vector FullyConnectedTester::CreateTfLiteModel() const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + + auto range_rng = std::bind( + std::uniform_real_distribution(-25.0f, 25.0f), std::ref(rng)); + + flatbuffers::FlatBufferBuilder builder; + flatbuffers::Offset operator_code = + CreateOperatorCode(builder, BuiltinOperator_FULLY_CONNECTED); + + std::vector filter_data(InputChannels() * OutputChannels()); + std::vector bias_data(OutputChannels()); + + for (int32_t oc = 0; oc < OutputChannels(); oc++) { + // Use the same range of all-positive or all-negative values to generate + // all filter & bias weights within the same channel, but different ranges + // for different output channels. This ensures that no catastrophic + // cancellation occur, but test covers both positive and negative inputs. + const float range = range_rng(); + auto value_rng = + std::bind(std::uniform_real_distribution(std::min(range, 0.0f), + std::max(range, 0.0f)), + std::ref(rng)); + + bias_data[oc] = value_rng(); + for (int32_t ic = 0; ic < InputChannels(); ic++) { + filter_data[oc * InputChannels() + ic] = value_rng(); + } + } + + std::array, 3> buffers{{ + CreateBuffer(builder, builder.CreateVector({})), + CreateBuffer(builder, + builder.CreateVector( + reinterpret_cast(filter_data.data()), + sizeof(float) * filter_data.size())), + CreateBuffer(builder, + builder.CreateVector( + reinterpret_cast(bias_data.data()), + sizeof(float) * bias_data.size())), + }}; + + const std::array filter_shape( + {OutputChannels(), InputChannels()}); + const std::array bias_shape({OutputChannels()}); + + const std::vector output_shape = OutputShape(); + const std::array, 4> tensors{{ + CreateTensor(builder, + builder.CreateVector(InputShape().data(), + InputShape().size()), + TensorType_FLOAT32), + CreateTensor(builder, + builder.CreateVector(filter_shape.data(), + filter_shape.size()), + TensorType_FLOAT32, /*buffer=*/1), + CreateTensor( + builder, + builder.CreateVector(bias_shape.data(), bias_shape.size()), + TensorType_FLOAT32, /*buffer=*/2), + CreateTensor(builder, + builder.CreateVector(output_shape.data(), + output_shape.size()), + TensorType_FLOAT32), + }}; + + flatbuffers::Offset fully_connected_options = + CreateFullyConnectedOptions(builder, Activation(), + FullyConnectedOptionsWeightsFormat_DEFAULT, + KeepDims()); + + const std::array op_inputs{{0, 1, 2}}; + const std::array op_outputs{{3}}; + flatbuffers::Offset op = CreateOperator( + builder, /*opcode_index=*/0, + builder.CreateVector(op_inputs.data(), op_inputs.size()), + builder.CreateVector(op_outputs.data(), op_outputs.size()), + BuiltinOptions_FullyConnectedOptions, fully_connected_options.Union()); + + const std::array subgraph_inputs{{0}}; + const std::array subgraph_outputs{{3}}; + flatbuffers::Offset subgraph = CreateSubGraph( + builder, builder.CreateVector(tensors.data(), tensors.size()), + builder.CreateVector(subgraph_inputs.data(), + subgraph_inputs.size()), + builder.CreateVector(subgraph_outputs.data(), + subgraph_outputs.size()), + builder.CreateVector(&op, 1)); + + flatbuffers::Offset description = + builder.CreateString("Fully Connected model"); + + flatbuffers::Offset model_buffer = CreateModel( + builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1), + builder.CreateVector(&subgraph, 1), description, + builder.CreateVector(buffers.data(), buffers.size())); + + builder.Finish(model_buffer); + + return std::vector(builder.GetBufferPointer(), + builder.GetBufferPointer() + builder.GetSize()); +} + +int32_t FullyConnectedTester::ComputeSize(const std::vector& shape) { + return std::accumulate(shape.cbegin(), shape.cend(), 1, + std::multiplies()); +} + +} // namespace xnnpack +} // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/fully_connected_tester.h b/tensorflow/lite/delegates/xnnpack/fully_connected_tester.h new file mode 100644 index 00000000000..1c8e3d5d60c --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/fully_connected_tester.h @@ -0,0 +1,114 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_FULLY_CONNECTED_TESTER_H_ +#define TENSORFLOW_LITE_DELEGATES_XNNPACK_FULLY_CONNECTED_TESTER_H_ + +#include +#include +#include +#include + +#include +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace xnnpack { + +class FullyConnectedTester { + public: + FullyConnectedTester() = default; + FullyConnectedTester(const FullyConnectedTester&) = delete; + FullyConnectedTester& operator=(const FullyConnectedTester&) = delete; + + inline FullyConnectedTester& InputShape( + std::initializer_list shape) { + for (auto it = shape.begin(); it != shape.end(); ++it) { + EXPECT_GT(*it, 0); + } + input_shape_ = std::vector(shape.begin(), shape.end()); + input_size_ = ComputeSize(input_shape_); + return *this; + } + + inline const std::vector& InputShape() const { return input_shape_; } + + inline int32_t InputSize() const { return input_size_; } + + inline FullyConnectedTester& InputChannels(int32_t input_channels) { + EXPECT_GT(input_channels, 0); + input_channels_ = input_channels; + return *this; + } + + inline int32_t InputChannels() const { return input_channels_; } + + inline FullyConnectedTester& OutputChannels(int32_t output_channels) { + EXPECT_GT(output_channels, 0); + output_channels_ = output_channels; + return *this; + } + + inline int32_t OutputChannels() const { return output_channels_; } + + std::vector OutputShape() const; + + inline FullyConnectedTester& KeepDims(bool keep_dims) { + keep_dims_ = keep_dims; + return *this; + } + + inline bool KeepDims() const { return keep_dims_; } + + inline FullyConnectedTester& ReluActivation() { + activation_ = ::tflite::ActivationFunctionType_RELU; + return *this; + } + + inline FullyConnectedTester& Relu6Activation() { + activation_ = ::tflite::ActivationFunctionType_RELU6; + return *this; + } + + inline FullyConnectedTester& ReluMinus1To1Activation() { + activation_ = ::tflite::ActivationFunctionType_RELU_N1_TO_1; + return *this; + } + + void Test(TfLiteDelegate* delegate) const; + + private: + std::vector CreateTfLiteModel() const; + + inline ::tflite::ActivationFunctionType Activation() const { + return activation_; + } + + static int32_t ComputeSize(const std::vector& shape); + + std::vector input_shape_; + int32_t input_size_ = 1; + int32_t input_channels_ = 1; + int32_t output_channels_ = 1; + bool keep_dims_ = false; + ::tflite::ActivationFunctionType activation_ = + ::tflite::ActivationFunctionType_NONE; +}; + +} // namespace xnnpack +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_FULLY_CONNECTED_TESTER_H_ diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index dc660e4128a..925ef82ddcb 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -397,6 +397,21 @@ class Subgraph { return kTfLiteOk; } + static TfLiteStatus CheckFullyConnectedParams( + TfLiteContext* context, const TfLiteFullyConnectedParams* params, + int node_index) { + if (params->weights_format != kTfLiteFullyConnectedWeightsFormatDefault) { + if (context != nullptr) { + TF_LITE_KERNEL_LOG(context, + "unsupported non-default weights format in node #%d", + node_index); + } + return kTfLiteError; + } + + return kTfLiteOk; + } + static TfLiteStatus CheckPoolingParams(TfLiteContext* context, const TfLitePoolParams* params, int node_index) { @@ -493,8 +508,11 @@ class Subgraph { } for (int i = 0; i < tensor.dims->size; i++) { if (tensor.dims->data[i] <= 0) { - TF_LITE_KERNEL_LOG(context, "invalid dimension #%d (%d) in tensor #%d", - i, tensor.dims->data[i], tensor_index); + if (context != nullptr) { + TF_LITE_KERNEL_LOG(context, + "invalid dimension #%d (%d) in tensor #%d", i, + tensor.dims->data[i], tensor_index); + } return kTfLiteError; } } @@ -604,6 +622,14 @@ class Subgraph { node, context->tensors, dwconv_params, xnnpack_tensors); } + case kTfLiteBuiltinFullyConnected: { + const TfLiteFullyConnectedParams* fc_params = + static_cast(node->builtin_data); + + return VisitFullyConnectedNode(subgraph, logging_context, node_index, + node, context->tensors, fc_params, + xnnpack_tensors); + } case kTfLiteBuiltinHardSwish: return VisitHardSwishNode(subgraph, logging_context, node_index, node, context->tensors, xnnpack_tensors); @@ -934,6 +960,156 @@ class Subgraph { return kTfLiteOk; } + static TfLiteStatus VisitFullyConnectedNode( + xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index, + TfLiteNode* node, const TfLiteTensor* tensors, + const TfLiteFullyConnectedParams* fc_params, + const std::vector& xnnpack_tensors) { + TF_LITE_ENSURE_STATUS( + CheckFullyConnectedParams(logging_context, fc_params, node_index)); + + TF_LITE_ENSURE_STATUS( + CheckNumInputsAndOutputs(logging_context, node, 3, 1, node_index)); + + const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]]; + TF_LITE_ENSURE_STATUS(CheckTensorFloatType( + logging_context, input_tensor, node->inputs->data[0], node_index)); + TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation( + logging_context, input_tensor, node->inputs->data[0], node_index)); + + const TfLiteTensor& filter_tensor = tensors[node->inputs->data[1]]; + TF_LITE_ENSURE_STATUS(CheckTensorFloatType( + logging_context, filter_tensor, node->inputs->data[1], node_index)); + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, filter_tensor, 2, + node->inputs->data[1])); + TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation( + logging_context, filter_tensor, node->inputs->data[1], node_index)); + + const TfLiteTensor& bias_tensor = tensors[node->inputs->data[2]]; + TF_LITE_ENSURE_STATUS(CheckTensorFloatType( + logging_context, filter_tensor, node->inputs->data[2], node_index)); + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, bias_tensor, 1, + node->inputs->data[2])); + TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation( + logging_context, bias_tensor, node->inputs->data[2], node_index)); + + const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]]; + TF_LITE_ENSURE_STATUS(CheckTensorFloatType( + logging_context, output_tensor, node->outputs->data[0], node_index)); + TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation( + logging_context, output_tensor, node->outputs->data[0], node_index)); + + const int32_t output_channels = filter_tensor.dims->data[0]; + const int32_t input_channels = filter_tensor.dims->data[1]; + + if (input_tensor.dims->size == 0) { + if (logging_context != nullptr) { + TF_LITE_KERNEL_LOG( + logging_context, + "unexpected number of shape dimensions %d in tensor #%d", + input_tensor.dims->size, node->inputs->data[0]); + } + return kTfLiteError; + } + + int32_t num_input_elements = 1; + for (int i = 0; i < input_tensor.dims->size; i++) { + if (input_tensor.dims->data[i] <= 0) { + if (logging_context != nullptr) { + TF_LITE_KERNEL_LOG(logging_context, + "invalid dimension #%d (%d) in tensor #%d", i, + input_tensor.dims->data[i], node->inputs->data[0]); + } + return kTfLiteError; + } + num_input_elements *= input_tensor.dims->data[i]; + } + + if (fc_params->keep_num_dims) { + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, + input_tensor.dims->size, + node->outputs->data[0])); + + for (int i = 0; i < input_tensor.dims->size - 1; i++) { + if (input_tensor.dims->data[i] != output_tensor.dims->data[i]) { + if (logging_context != nullptr) { + TF_LITE_KERNEL_LOG( + logging_context, + "mismatch in shape dimension %d (%d != %d) in input and output " + "tensors of FULLY_CONNECTED operator #%d", + i, input_tensor.dims->data[i], output_tensor.dims->data[i], + node_index); + } + return kTfLiteError; + } + } + } else { + if (num_input_elements % input_channels != 0) { + if (logging_context != nullptr) { + TF_LITE_KERNEL_LOG( + logging_context, + "number of elements in input tensor #%d in FULLY_CONNECTED " + "operator is not divisible by input channels (%d)", + node->inputs->data[0], input_channels); + return kTfLiteError; + } + } + + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 2, + node->outputs->data[0])); + + if (output_tensor.dims->data[0] != num_input_elements / input_channels) { + if (logging_context != nullptr) { + TF_LITE_KERNEL_LOG( + logging_context, + "batch size %d in output tensor #%d in FULLY_CONNECTED operator " + "does not match batch size %d in reshaped input tensor #%d", + output_tensor.dims->data[0], node->outputs->data[0], + num_input_elements / input_channels, node->inputs->data[0]); + } + return kTfLiteError; + } + } + + if (output_tensor.dims->data[output_tensor.dims->size - 1] != + output_channels) { + if (logging_context != nullptr) { + TF_LITE_KERNEL_LOG( + logging_context, + "number of channels %d in output tensor #%d does not match output " + "channels %d in filter tensor #%d", + output_tensor.dims->data[output_tensor.dims->size - 1], + node->outputs->data[0], output_channels, node->inputs->data[1]); + } + return kTfLiteError; + } + + float output_min = -std::numeric_limits::infinity(); + float output_max = +std::numeric_limits::infinity(); + TF_LITE_ENSURE_STATUS(ConvertActivationToOutputRange( + logging_context, node_index, fc_params->activation, &output_min, + &output_max)); + + if (subgraph != nullptr) { + const xnn_status status = xnn_define_fully_connected( + subgraph, output_min, output_max, + /*input_id=*/xnnpack_tensors[node->inputs->data[0]], + /*filter_id=*/xnnpack_tensors[node->inputs->data[1]], + /*bias_id=*/xnnpack_tensors[node->inputs->data[2]], + /*output_id=*/xnnpack_tensors[node->outputs->data[0]], + /*flags=*/fc_params->keep_num_dims ? 0 + : XNN_FLAG_TENSORFLOW_RESHAPE_2D); + if (status != xnn_status_success) { + TF_LITE_KERNEL_LOG(logging_context, + "failed to delegate FULLY_CONNECTED node #%d", + node_index); + return kTfLiteError; + } + } + + return kTfLiteOk; + } + static TfLiteStatus VisitHardSwishNode( xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index, TfLiteNode* node, const TfLiteTensor* tensors, diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 3814e9c0971..6d642f15ef5 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -164,11 +164,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "XNNPACK", - sha256 = "c002c961fd73b87b68074f9fda49d0dcbd0627c783e487a445da16bcd8dfdee6", - strip_prefix = "XNNPACK-10a38087936d84ab2879a2e39fc7e204757ff3e8", + sha256 = "f6eb0f1759eca187d922a72a3a12dfe1593bd09783aa4b67bee70630985eb832", + strip_prefix = "XNNPACK-38c07ec51af0cbacb255922fb6219df80c06df59", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/10a38087936d84ab2879a2e39fc7e204757ff3e8.zip", - "https://github.com/google/XNNPACK/archive/10a38087936d84ab2879a2e39fc7e204757ff3e8.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/38c07ec51af0cbacb255922fb6219df80c06df59.zip", + "https://github.com/google/XNNPACK/archive/38c07ec51af0cbacb255922fb6219df80c06df59.zip", ], )