Support DEPTH_TO_SPACE in XNNPACK delegate

PiperOrigin-RevId: 344815817
Change-Id: I20179c0a6fe0db730dc0f2d6fc1d1825cb1bdbfa
This commit is contained in:
Marat Dukhan 2020-11-30 08:20:22 -08:00 committed by TensorFlower Gardener
parent 4f1baf0219
commit 634864312d
8 changed files with 473 additions and 5 deletions

View File

@ -102,6 +102,23 @@ cc_library(
],
)
cc_library(
name = "depth_to_space_tester",
testonly = 1,
srcs = ["depth_to_space_tester.cc"],
hdrs = ["depth_to_space_tester.h"],
deps = [
"//tensorflow/lite:framework",
"//tensorflow/lite:schema_fbs_version",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/schema:schema_conversion_utils",
"//tensorflow/lite/schema:schema_fbs",
"@com_google_googletest//:gtest",
"@flatbuffers",
],
)
cc_library(
name = "depthwise_conv_2d_tester",
testonly = 1,
@ -381,6 +398,21 @@ cc_test(
],
)
cc_test(
name = "depth_to_space_test",
srcs = ["depth_to_space_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":depth_to_space_tester",
":test_main",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "depthwise_conv_2d_test",
srcs = ["depthwise_conv_2d_test.cc"],

View File

@ -154,6 +154,11 @@ Below is the list of current operators and limitations:
* Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
but fused `TANH` and `SIGN_BIT` activations are not.
### `DEPTH_TO_SPACE`
* Inputs and outputs must be in 32-bit floating-point format.
* Block size must be greater than 1.
### `DEPTHWISE_CONV_2D`
* Inputs and outputs must be in 32-bit floating-point format.

View File

@ -0,0 +1,156 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/depth_to_space_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(DepthToSpace, SinglePixel) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto batch_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
auto block_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
auto channel_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
DepthToSpaceTester()
.BatchSize(batch_rng())
.InputHeight(1)
.InputWidth(1)
.OutputChannels(channel_rng())
.BlockSize(block_rng())
.Test(xnnpack_delegate.get());
}
TEST(DepthToSpace, SingleRow) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto batch_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
auto width_rng =
std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
auto block_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
auto channel_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
DepthToSpaceTester()
.BatchSize(batch_rng())
.InputHeight(1)
.InputWidth(width_rng())
.OutputChannels(channel_rng())
.BlockSize(block_rng())
.Test(xnnpack_delegate.get());
}
TEST(DepthToSpace, SingleColumn) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto batch_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
auto height_rng =
std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
auto block_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
auto channel_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
DepthToSpaceTester()
.BatchSize(batch_rng())
.InputHeight(height_rng())
.InputWidth(1)
.OutputChannels(channel_rng())
.BlockSize(block_rng())
.Test(xnnpack_delegate.get());
}
TEST(DepthToSpace, FullImage) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto batch_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
auto size_rng =
std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
auto block_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
auto channel_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
DepthToSpaceTester()
.BatchSize(batch_rng())
.InputHeight(size_rng())
.InputWidth(size_rng())
.OutputChannels(channel_rng())
.BlockSize(block_rng())
.Test(xnnpack_delegate.get());
}
TEST(DepthToSpace, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto batch_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
auto size_rng =
std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
auto block_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
auto channel_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
DepthToSpaceTester()
.BatchSize(batch_rng())
.InputHeight(size_rng())
.InputWidth(size_rng())
.OutputChannels(channel_rng())
.BlockSize(block_rng())
.Test(xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,171 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/xnnpack/depth_to_space_tester.h"
#include <array>
#include <cstdint>
#include <functional>
#include <numeric>
#include <random>
#include <vector>
#include <gtest/gtest.h>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/schema/schema_conversion_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
namespace tflite {
namespace xnnpack {
void DepthToSpaceTester::Test(TfLiteDelegate* delegate) const {
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto f32rng =
std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
std::vector<char> buffer = CreateTfLiteModel();
const Model* model = GetModel(buffer.data());
std::unique_ptr<Interpreter> delegate_interpreter;
ASSERT_EQ(
InterpreterBuilder(
model,
::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
&delegate_interpreter),
kTfLiteOk);
std::unique_ptr<Interpreter> default_interpreter;
ASSERT_EQ(
InterpreterBuilder(
model,
::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
&default_interpreter),
kTfLiteOk);
ASSERT_TRUE(delegate_interpreter);
ASSERT_TRUE(default_interpreter);
ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
ASSERT_EQ(default_interpreter->inputs().size(), 1);
ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
ASSERT_EQ(default_interpreter->outputs().size(), 1);
ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
float* default_input_data = default_interpreter->typed_tensor<float>(
default_interpreter->inputs()[0]);
std::generate(default_input_data,
default_input_data + BatchSize() * InputHeight() *
InputWidth() * InputChannels(),
std::ref(f32rng));
float* delegate_input_data = delegate_interpreter->typed_tensor<float>(
delegate_interpreter->inputs()[0]);
std::copy(default_input_data,
default_input_data +
BatchSize() * InputHeight() * InputWidth() * InputChannels(),
delegate_input_data);
ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
float* default_output_data = default_interpreter->typed_tensor<float>(
default_interpreter->outputs()[0]);
float* delegate_output_data = delegate_interpreter->typed_tensor<float>(
delegate_interpreter->outputs()[0]);
for (int32_t i = 0; i < BatchSize(); i++) {
for (int32_t y = 0; y < OutputHeight(); y++) {
for (int32_t x = 0; x < OutputWidth(); x++) {
for (int32_t c = 0; c < OutputChannels(); c++) {
const int32_t index = ((i * OutputHeight() + y) * OutputWidth() + x) *
OutputChannels() +
c;
ASSERT_EQ(default_output_data[index], delegate_output_data[index])
<< "batch " << i << " / " << BatchSize() << ", y position " << y
<< " / " << OutputHeight() << ", x position " << x << " / "
<< OutputWidth() << ", channel " << c << " / "
<< OutputChannels();
}
}
}
}
}
std::vector<char> DepthToSpaceTester::CreateTfLiteModel() const {
flatbuffers::FlatBufferBuilder builder;
flatbuffers::Offset<OperatorCode> operator_code =
CreateOperatorCode(builder, BuiltinOperator_DEPTH_TO_SPACE, 0);
const std::array<flatbuffers::Offset<Buffer>, 1> buffers{{
CreateBuffer(builder, builder.CreateVector({})),
}};
const std::array<int32_t, 4> input_shape{
{BatchSize(), InputHeight(), InputWidth(), InputChannels()}};
const std::array<int32_t, 4> output_shape{
{BatchSize(), OutputHeight(), OutputWidth(), OutputChannels()}};
const std::array<flatbuffers::Offset<Tensor>, 2> tensors{{
CreateTensor(
builder,
builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
TensorType_FLOAT32),
CreateTensor(builder,
builder.CreateVector<int32_t>(output_shape.data(),
output_shape.size()),
TensorType_FLOAT32),
}};
const std::array<int32_t, 1> op_inputs{{0}};
const std::array<int32_t, 1> op_outputs{{1}};
const flatbuffers::Offset<Operator> op = CreateOperator(
builder, /*opcode_index=*/0,
builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
tflite::BuiltinOptions_DepthToSpaceOptions,
CreateDepthToSpaceOptions(builder, BlockSize()).Union());
const std::array<int32_t, 1> subgraph_inputs{{op_inputs.front()}};
const std::array<int32_t, 1> subgraph_outputs{{op_outputs.front()}};
flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
builder, builder.CreateVector(tensors.data(), tensors.size()),
builder.CreateVector<int32_t>(subgraph_inputs.data(),
subgraph_inputs.size()),
builder.CreateVector<int32_t>(subgraph_outputs.data(),
subgraph_outputs.size()),
builder.CreateVector(&op, 1));
const flatbuffers::Offset<Model> model_buffer = CreateModel(
builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1),
builder.CreateVector(&subgraph, 1),
builder.CreateString("Depth-To-Space model"),
builder.CreateVector(buffers.data(), buffers.size()));
builder.Finish(model_buffer);
return std::vector<char>(builder.GetBufferPointer(),
builder.GetBufferPointer() + builder.GetSize());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,97 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_DEPTH_TO_SPACE_TESTER_H_
#define TENSORFLOW_LITE_DELEGATES_XNNPACK_DEPTH_TO_SPACE_TESTER_H_
#include <cstdint>
#include <vector>
#include <gtest/gtest.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace xnnpack {
class DepthToSpaceTester {
public:
DepthToSpaceTester() = default;
DepthToSpaceTester(const DepthToSpaceTester&) = delete;
DepthToSpaceTester& operator=(const DepthToSpaceTester&) = delete;
inline DepthToSpaceTester& BatchSize(int32_t batch_size) {
EXPECT_GT(batch_size, 0);
batch_size_ = batch_size;
return *this;
}
inline int32_t BatchSize() const { return batch_size_; }
inline int32_t InputChannels() const {
return OutputChannels() * BlockSize() * BlockSize();
}
inline DepthToSpaceTester& OutputChannels(int32_t output_channels) {
EXPECT_GT(output_channels, 0);
output_channels_ = output_channels;
return *this;
}
inline int32_t OutputChannels() const { return output_channels_; }
inline DepthToSpaceTester& InputHeight(int32_t input_height) {
EXPECT_GT(input_height, 0);
input_height_ = input_height;
return *this;
}
inline int32_t InputHeight() const { return input_height_; }
inline DepthToSpaceTester& InputWidth(int32_t input_width) {
EXPECT_GT(input_width, 0);
input_width_ = input_width;
return *this;
}
inline int32_t InputWidth() const { return input_width_; }
inline int32_t OutputWidth() const { return InputWidth() * BlockSize(); }
inline int32_t OutputHeight() const { return InputHeight() * BlockSize(); }
inline DepthToSpaceTester& BlockSize(int32_t block_size) {
EXPECT_GT(block_size, 1);
block_size_ = block_size;
return *this;
}
inline int32_t BlockSize() const { return block_size_; }
void Test(TfLiteDelegate* delegate) const;
private:
std::vector<char> CreateTfLiteModel() const;
int32_t batch_size_ = 1;
int32_t input_height_ = 1;
int32_t input_width_ = 1;
int32_t output_channels_ = 1;
int32_t block_size_ = 2;
};
} // namespace xnnpack
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_DEPTH_TO_SPACE_TESTER_H_

View File

@ -1420,6 +1420,13 @@ class Subgraph {
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (depth_to_space_params->block_size <= 1) {
TF_LITE_MAYBE_KERNEL_LOG(
logging_context, "invalid block size (%d) in DEPTH_TO_SPACE node #%d",
depth_to_space_params->block_size, node_index);
return kTfLiteError;
}
if (subgraph != nullptr) {
const xnn_status status = xnn_define_depth_to_space(
subgraph,

View File

@ -22,7 +22,7 @@ include(FetchContent)
OverridableFetchContent_Declare(
xnnpack
GIT_REPOSITORY https://github.com/google/xnnpack
GIT_TAG bbe85068bb7aa6249a4e915462014016373c945f
GIT_TAG 0a9c1200ccb49bba0170a46a62044b13714f39a3
GIT_PROGRESS TRUE
PREFIX "${CMAKE_BINARY_DIR}"
SOURCE_DIR "${CMAKE_BINARY_DIR}/xnnpack"

View File

@ -135,11 +135,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
# and update the sha256 with the result.
tf_http_archive(
name = "XNNPACK",
sha256 = "22c065f68df9a7a6321c4e9ee1f2d3cbfb471785804fb4fffa0fb2858d847e7f",
strip_prefix = "XNNPACK-bbe85068bb7aa6249a4e915462014016373c945f",
sha256 = "eb087959b684d2d3965f8914075032e3995e4726ac8ce9c09a367863ff184b99",
strip_prefix = "XNNPACK-0a9c1200ccb49bba0170a46a62044b13714f39a3",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/bbe85068bb7aa6249a4e915462014016373c945f.zip",
"https://github.com/google/XNNPACK/archive/bbe85068bb7aa6249a4e915462014016373c945f.zip",
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/0a9c1200ccb49bba0170a46a62044b13714f39a3.zip",
"https://github.com/google/XNNPACK/archive/0a9c1200ccb49bba0170a46a62044b13714f39a3.zip",
],
)