From 77f6f8ccdeeca06a05b3513842f7e4e12febbe1b Mon Sep 17 00:00:00 2001 From: Juhyun Lee Date: Tue, 9 Jul 2019 14:50:05 -0700 Subject: [PATCH] TFLite GPU: Implement HARD_SWISH for MobileNet v3. PiperOrigin-RevId: 257276206 --- .../delegates/gpu/common/model_builder.cc | 20 ++ .../lite/delegates/gpu/common/operations.cc | 43 +-- .../lite/delegates/gpu/common/operations.h | 3 +- .../delegates/gpu/gl/kernels/elementwise.cc | 89 +++--- .../gpu/gl/kernels/elementwise_test.cc | 270 ++++++++++-------- .../lite/delegates/gpu/gl/kernels/registry.cc | 15 +- tensorflow/lite/delegates/gpu/metal/api.cc | 4 + .../lite/delegates/gpu/metal/kernels/BUILD | 13 + .../delegates/gpu/metal/kernels/hard_swish.cc | 47 +++ .../delegates/gpu/metal/kernels/hard_swish.h | 37 +++ 10 files changed, 344 insertions(+), 197 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc create mode 100644 tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 253f792b0fd..f762a0fe0a3 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -816,6 +816,24 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser { } }; +class HardSwishOperationParser : public TFLiteOperationParser { + public: + Status IsSupported(const TfLiteContext* context, + const TfLiteNode* tflite_node, + const TfLiteRegistration*) final { + return CheckInputsOutputs(context, tflite_node, /*inputs=*/1, + /*outputs=*/1); + } + + Status Parse(const TfLiteNode*, const TfLiteRegistration*, + GraphFloat32* graph, ObjectReader* reader) final { + Node* node = graph->NewNode(); + node->operation.type = ToString(OperationType::HARD_SWISH); + RETURN_IF_ERROR(reader->AddInput(node, 0)); + return reader->AddOutputs(node); + } +}; + class ReshapeOperationParser : public TFLiteOperationParser { public: Status IsSupported(const TfLiteContext* context, @@ -2003,6 +2021,8 @@ std::unique_ptr NewOperationParser( return make_unique(OperationType::DIV); case kTfLiteBuiltinFullyConnected: return make_unique(); + case kTfLiteBuiltinHardSwish: + return make_unique(); case kTfLiteBuiltinLogistic: return make_unique(OperationType::SIGMOID); case kTfLiteBuiltinLog: diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index f7f9d1b7351..eb1f01804df 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -46,50 +46,58 @@ Padding2D& Padding2D::operator-(const Padding2D& value) { std::string ToString(enum OperationType op) { switch (op) { - case OperationType::UNKNOWN: - break; case OperationType::ABS: return "abs"; case OperationType::ADD: return "add"; case OperationType::APPLY_MASK: return "apply_mask"; - case OperationType::BATCH_TO_SPACE: - return "batch_to_space"; - case OperationType::POOLING_2D: - return "pooling_2d"; - case OperationType::MAX_UNPOOLING_2D: - return "max_unpooling"; case OperationType::BATCH_NORMALIZATION: return "batch_normalization"; + case OperationType::BATCH_TO_SPACE: + return "batch_to_space"; case OperationType::CONCAT: return "concat"; case OperationType::CONST: return "const"; case OperationType::CONVOLUTION_2D: return "convolution_2d"; + case OperationType::CONVOLUTION_TRANSPOSED: + return "convolution_transposed"; case OperationType::COS: return "cos"; case OperationType::DEPTHWISE_CONVOLUTION: return "depthwise_convolution"; case OperationType::DIV: return "div"; + case OperationType::FULLY_CONNECTED: + return "fully_connected"; + case OperationType::HARD_SWISH: + return "hard_swish"; case OperationType::LOG: return "log"; + case OperationType::LSTM: + return "lstm"; + case OperationType::MAX_UNPOOLING_2D: + return "max_unpooling"; case OperationType::MUL: return "mul"; + case OperationType::MULTIPLY_SCALAR: + return "multiply_scalar"; case OperationType::PAD: return "pad"; + case OperationType::POOLING_2D: + return "pooling_2d"; case OperationType::POW: return "pow"; case OperationType::PRELU: return "prelu"; case OperationType::RELU: return "relu"; - case OperationType::RESIZE: - return "resize"; case OperationType::RESHAPE: return "reshape"; + case OperationType::RESIZE: + return "resize"; case OperationType::RSQRT: return "rsqrt"; case OperationType::SIGMOID: @@ -110,18 +118,12 @@ std::string ToString(enum OperationType op) { return "squared_diff"; case OperationType::SUB: return "subtract"; - case OperationType::UPSAMPLE_2D: - return "upsample_2d"; - case OperationType::CONVOLUTION_TRANSPOSED: - return "convolution_transposed"; - case OperationType::MULTIPLY_SCALAR: - return "multiply_scalar"; - case OperationType::FULLY_CONNECTED: - return "fully_connected"; case OperationType::TANH: return "tanh"; - case OperationType::LSTM: - return "lstm"; + case OperationType::UPSAMPLE_2D: + return "upsample_2d"; + default: + break; } return "unknown_operation"; } @@ -140,6 +142,7 @@ OperationType OperationTypeFromString(const std::string& name) { {"cos", OperationType::COS}, {"depthwise_convolution", OperationType::DEPTHWISE_CONVOLUTION}, {"fully_connected", OperationType::FULLY_CONNECTED}, + {"hard_swish", OperationType::HARD_SWISH}, {"log", OperationType::LOG}, {"lstm", OperationType::LSTM}, {"max_unpooling", OperationType::MAX_UNPOOLING_2D}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index ef825376b31..5e564f6763c 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -46,14 +46,15 @@ enum class OperationType { DEPTHWISE_CONVOLUTION, DIV, FULLY_CONNECTED, + HARD_SWISH, LOG, LSTM, MAX_UNPOOLING_2D, MUL, MULTIPLY_SCALAR, + PAD, POOLING_2D, POW, - PAD, PRELU, RELU, RESHAPE, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc index 37ee322ac8a..8ad2679e62e 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc @@ -34,60 +34,56 @@ class ElementwiseOneArgument : public NodeShader { GeneratedCode* generated_code) const final { std::string source; switch (operation_type_) { - case OperationType::ABS: { + case OperationType::ABS: source = "value_0 = abs(value_0);"; break; - } - case OperationType::SIN: { - source = "value_0 = sin(value_0);"; - break; - } - case OperationType::COS: { + case OperationType::COS: source = "value_0 = cos(value_0);"; break; - } - case OperationType::LOG: { + case OperationType::HARD_SWISH: + source = + "value_0 *= clamp(value_0 / 6.0 + vec4(0.5), vec4(0.0), " + "vec4(1.0));"; + break; + case OperationType::LOG: source = R"( - const float nan = normalize(vec4(0,0,0,0)).x; - value_0.x = value_0.x > 0.0 ? log(value_0.x) : nan; - value_0.y = value_0.y > 0.0 ? log(value_0.y) : nan; - value_0.z = value_0.z > 0.0 ? log(value_0.z) : nan; - value_0.w = value_0.w > 0.0 ? log(value_0.w) : nan; - )"; + const float nan = normalize(vec4(0, 0, 0, 0)).x; + value_0.x = value_0.x > 0.0 ? log(value_0.x) : nan; + value_0.y = value_0.y > 0.0 ? log(value_0.y) : nan; + value_0.z = value_0.z > 0.0 ? log(value_0.z) : nan; + value_0.w = value_0.w > 0.0 ? log(value_0.w) : nan; + )"; break; - } - case OperationType::SQRT: { + case OperationType::RSQRT: source = R"( - const float nan = normalize(vec4(0,0,0,0)).x; - value_0.x = value_0.x >= 0.0 ? sqrt(value_0.x) : nan; - value_0.y = value_0.y >= 0.0 ? sqrt(value_0.y) : nan; - value_0.z = value_0.z >= 0.0 ? sqrt(value_0.z) : nan; - value_0.w = value_0.w >= 0.0 ? sqrt(value_0.w) : nan; - )"; + const float nan = normalize(vec4(0, 0, 0, 0)).x; + value_0.x = value_0.x >= 0.0 ? 1.0 / sqrt(value_0.x) : nan; + value_0.y = value_0.y >= 0.0 ? 1.0 / sqrt(value_0.y) : nan; + value_0.z = value_0.z >= 0.0 ? 1.0 / sqrt(value_0.z) : nan; + value_0.w = value_0.w >= 0.0 ? 1.0 / sqrt(value_0.w) : nan; + )"; break; - } - case OperationType::RSQRT: { - source = R"( - const float nan = normalize(vec4(0,0,0,0)).x; - value_0.x = value_0.x >= 0.0 ? 1.0 / sqrt(value_0.x) : nan; - value_0.y = value_0.y >= 0.0 ? 1.0 / sqrt(value_0.y) : nan; - value_0.z = value_0.z >= 0.0 ? 1.0 / sqrt(value_0.z) : nan; - value_0.w = value_0.w >= 0.0 ? 1.0 / sqrt(value_0.w) : nan; - )"; - break; - } - case OperationType::SQUARE: { - source = "value_0 = value_0 * value_0;"; - break; - } - case OperationType::SIGMOID: { + case OperationType::SIGMOID: source = "value_0 = 1.0 / (1.0 + exp(-1.0 * value_0));"; break; - } - case OperationType::TANH: { + case OperationType::SIN: + source = "value_0 = sin(value_0);"; + break; + case OperationType::SQRT: + source = R"( + const float nan = normalize(vec4(0, 0, 0, 0)).x; + value_0.x = value_0.x >= 0.0 ? sqrt(value_0.x) : nan; + value_0.y = value_0.y >= 0.0 ? sqrt(value_0.y) : nan; + value_0.z = value_0.z >= 0.0 ? sqrt(value_0.z) : nan; + value_0.w = value_0.w >= 0.0 ? sqrt(value_0.w) : nan; + )"; + break; + case OperationType::SQUARE: + source = "value_0 = value_0 * value_0;"; + break; + case OperationType::TANH: source = "value_0 = tanh(value_0);"; break; - } default: return InvalidArgumentError("Incorrect elementwise operation type."); } @@ -183,19 +179,20 @@ std::unique_ptr NewElementwiseNodeShader( OperationType operation_type) { switch (operation_type) { case OperationType::ABS: - case OperationType::SIN: case OperationType::COS: case OperationType::LOG: - case OperationType::SQRT: + case OperationType::HARD_SWISH: case OperationType::RSQRT: - case OperationType::SQUARE: case OperationType::SIGMOID: + case OperationType::SIN: + case OperationType::SQRT: + case OperationType::SQUARE: case OperationType::TANH: return absl::make_unique(operation_type); - case OperationType::SUB: case OperationType::DIV: case OperationType::POW: case OperationType::SQUARED_DIFF: + case OperationType::SUB: return absl::make_unique(operation_type); default: return nullptr; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc index a0d088dbe48..6743664f7e2 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc @@ -28,139 +28,45 @@ namespace gpu { namespace gl { namespace { -class ElementwiseOneArgumentTest : public ::testing::Test { - public: - ElementwiseOneArgumentTest() = default; - ~ElementwiseOneArgumentTest() override = default; +TensorRef GetTensorRef(int ref, const BHWC& shape) { + TensorRef tensor_ref; + tensor_ref.type = DataType::FLOAT32; + tensor_ref.ref = ref; + tensor_ref.shape = shape; + return tensor_ref; +} - TensorRef GetTensorRef(int ref) { - TensorRef tensor_ref; - tensor_ref.type = DataType::FLOAT32; - tensor_ref.ref = ref; - tensor_ref.shape = BHWC(1, 2, 2, 1); - return tensor_ref; - } -}; - -TEST_F(ElementwiseOneArgumentTest, Abs) { +TEST(ElementwiseTest, Abs) { OperationType op_type = OperationType::ABS; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {0.0, 6.2, 2.0, 4.0})); } -TEST_F(ElementwiseOneArgumentTest, Sin) { - OperationType op_type = OperationType::SIN; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, 3.1415926, -3.1415926, 1.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, 0.0, 0.0, 0.841471})); -} - -TEST_F(ElementwiseOneArgumentTest, Cos) { +TEST(ElementwiseTest, Cos) { OperationType op_type = OperationType::COS; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, 3.1415926, -3.1415926, 1})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1.0, -1.0, -1.0, 0.540302})); } -TEST_F(ElementwiseOneArgumentTest, Log) { - OperationType op_type = OperationType::LOG; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {1.0, 3.1415926, 1.0, 1.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, 1.14473, 0.0, 0.0})); -} - -TEST_F(ElementwiseOneArgumentTest, Sqrt) { - OperationType op_type = OperationType::SQRT; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, 1.0, 2.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, 1.0, 1.414213, 2.0})); -} - -TEST_F(ElementwiseOneArgumentTest, Rsqrt) { - OperationType op_type = OperationType::RSQRT; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 4.0, 9.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {1.0, 0.707106, 0.5, 0.333333})); -} - -TEST_F(ElementwiseOneArgumentTest, Square) { - OperationType op_type = OperationType::SQUARE; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 0.5, -3.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {1.0, 4.0, 0.25, 9.0})); -} - -TEST_F(ElementwiseOneArgumentTest, Sigmoid) { - OperationType op_type = OperationType::SIGMOID; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.5, 0.002473, 0.880797, 0.982014})); -} - -TEST_F(ElementwiseOneArgumentTest, Tanh) { - OperationType op_type = OperationType::TANH; - SingleOpModel model({ToString(op_type), {}}, {GetTensorRef(0)}, - {GetTensorRef(1)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {0.0, -0.999987, 0.964027, 0.999329})); -} - -class ElementwiseTwoArgumentsTest : public ::testing::Test { - public: - ElementwiseTwoArgumentsTest() = default; - ~ElementwiseTwoArgumentsTest() override = default; - - TensorRef GetTensorRef(int ref) { - TensorRef tensor_ref; - tensor_ref.type = DataType::FLOAT32; - tensor_ref.ref = ref; - tensor_ref.shape = BHWC(1, 2, 2, 1); - return tensor_ref; - } -}; - -TEST_F(ElementwiseTwoArgumentsTest, Sub) { - OperationType op_type = OperationType::SUB; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); - ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); - ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, 4.0})); - ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); - EXPECT_THAT(model.GetOutput(0), - Pointwise(FloatNear(1e-6), {-1.0, -8.2, -1.0, 0.0})); -} - -TEST_F(ElementwiseTwoArgumentsTest, Div) { +TEST(ElementwiseTest, Div) { OperationType op_type = OperationType::DIV; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, -0.5, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); @@ -168,10 +74,39 @@ TEST_F(ElementwiseTwoArgumentsTest, Div) { Pointwise(FloatNear(1e-6), {0.0, -3.1, -4.0, 1.0})); } -TEST_F(ElementwiseTwoArgumentsTest, Pow) { +TEST(ElementwiseTest, HardSwish) { + OperationType op_type = OperationType::HARD_SWISH; + const BHWC shape(1, 1, 1, 7); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE( + model.PopulateTensor(0, {-4.5f, -3.0f, -1.5f, 0.0f, 1.5f, 3.0f, 4.5f})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6f), + {0.0f, 0.0f, -0.375f, 0.0f, 1.125f, 3.f, 4.5f})); +} + +TEST(ElementwiseTest, Log) { + OperationType op_type = OperationType::LOG; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {1.0, 3.1415926, 1.0, 1.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, 1.14473, 0.0, 0.0})); +} + +TEST(ElementwiseTest, Pow) { OperationType op_type = OperationType::POW; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, 1.0, 2.0, 4.0})); ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); @@ -179,10 +114,73 @@ TEST_F(ElementwiseTwoArgumentsTest, Pow) { Pointwise(FloatNear(1e-6), {0.0, 1.0, 8.0, 256.0})); } -TEST_F(ElementwiseTwoArgumentsTest, SquaredDiff) { +TEST(ElementwiseTest, Rsqrt) { + OperationType op_type = OperationType::RSQRT; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 4.0, 9.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {1.0, 0.707106, 0.5, 0.333333})); +} + +TEST(ElementwiseTest, Sigmoid) { + OperationType op_type = OperationType::SIGMOID; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.5, 0.002473, 0.880797, 0.982014})); +} + +TEST(ElementwiseTest, Sin) { + OperationType op_type = OperationType::SIN; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, 3.1415926, -3.1415926, 1.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, 0.0, 0.0, 0.841471})); +} + +TEST(ElementwiseTest, Sqrt) { + OperationType op_type = OperationType::SQRT; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, 1.0, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, 1.0, 1.414213, 2.0})); +} + +TEST(ElementwiseTest, Square) { + OperationType op_type = OperationType::SQUARE; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {1.0, 2.0, 0.5, -3.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {1.0, 4.0, 0.25, 9.0})); +} + +TEST(ElementwiseTest, SquaredDiff) { OperationType op_type = OperationType::SQUARED_DIFF; - SingleOpModel model({ToString(op_type), {}}, - {GetTensorRef(0), GetTensorRef(1)}, {GetTensorRef(2)}); + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); ASSERT_TRUE(model.PopulateTensor(0, {0.0, 2.0, 2.0, 4.0})); ASSERT_TRUE(model.PopulateTensor(1, {1.0, 1.0, 5.0, 4.0})); ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); @@ -190,6 +188,32 @@ TEST_F(ElementwiseTwoArgumentsTest, SquaredDiff) { Pointwise(FloatNear(1e-6), {1.0, 1.0, 9.0, 0.0})); } +TEST(ElementwiseTest, Sub) { + OperationType op_type = OperationType::SUB; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); + ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-1.0, -8.2, -1.0, 0.0})); +} + +TEST(ElementwiseTest, Tanh) { + OperationType op_type = OperationType::TANH; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.0, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, -0.999987, 0.964027, 0.999329})); +} + } // namespace } // namespace gl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index 2201d0018dd..7c93ebd1caf 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -60,10 +60,10 @@ class Registry : public NodeShader { using Type = OperationType; using NewShaderFunc = std::function()>; - auto insert_op = [&](Type type, NewShaderFunc func) { + const auto insert_op = [&](Type type, NewShaderFunc func) { shaders_[ToString(type)].push_back(func()); }; - auto insert_elementwise_op = [&](Type operation_type) { + const auto insert_elementwise_op = [&](Type operation_type) { shaders_[ToString(operation_type)].push_back( NewElementwiseNodeShader(operation_type)); }; @@ -82,26 +82,27 @@ class Registry : public NodeShader { insert_op(Type::MULTIPLY_SCALAR, NewMultiplyScalarNodeShader); insert_op(Type::PAD, NewPadNodeShader); insert_op(Type::POOLING_2D, NewPoolingNodeShader); + insert_op(Type::PRELU, NewPReLUNodeShader); insert_op(Type::RELU, NewReLUNodeShader); insert_op(Type::RESHAPE, NewReshapeNodeShader); - insert_op(Type::PRELU, NewPReLUNodeShader); insert_op(Type::SLICE, NewSliceNodeShader); insert_op(Type::SOFT_MAX, NewSoftMaxNodeShader); insert_op(Type::UPSAMPLE_2D, NewUpsamplingNodeShader); insert_elementwise_op(Type::ABS); insert_elementwise_op(Type::COS); + insert_elementwise_op(Type::DIV); + insert_elementwise_op(Type::HARD_SWISH); insert_elementwise_op(Type::LOG); + insert_elementwise_op(Type::POW); insert_elementwise_op(Type::RSQRT); insert_elementwise_op(Type::SIGMOID); insert_elementwise_op(Type::SIN); insert_elementwise_op(Type::SQRT); insert_elementwise_op(Type::SQUARE); - insert_elementwise_op(Type::TANH); - insert_elementwise_op(Type::SUB); - insert_elementwise_op(Type::DIV); - insert_elementwise_op(Type::POW); insert_elementwise_op(Type::SQUARED_DIFF); + insert_elementwise_op(Type::SUB); + insert_elementwise_op(Type::TANH); #ifndef TFLITE_GPU_BINARY_RELEASE insert_op(Type::MAX_UNPOOLING_2D, NewMaxUnpoolingNodeShader); diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 3588cd97169..856939eb9b2 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/mul.h" #include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h" @@ -172,6 +173,9 @@ Status Compile(const GraphFloat32& graph, const RuntimeOptions& options, node->operation.attributes), options); break; + case OperationType::HARD_SWISH: + tasks = HardSwish(node_id, inputs[0], outputs[0], options); + break; case OperationType::MAX_UNPOOLING_2D: tasks = MaxUnpooling(node_id, inputs[0], inputs[1], outputs[0], absl::any_cast( diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index 48ff3632a02..c1b57bd4fc0 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -12,6 +12,7 @@ cc_library( ":depthwise_conv", ":elementwise", ":fully_connected", + ":hard_swish", ":max_unpooling", ":mul", ":padding", @@ -122,6 +123,18 @@ cc_library( ], ) +cc_library( + name = "hard_swish", + srcs = ["hard_swish.cc"], + hdrs = ["hard_swish.h"], + deps = [ + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", + "//tensorflow/lite/delegates/gpu/metal:runtime_options", + ], +) + cc_library( name = "max_unpooling", srcs = ["max_unpooling.cc"], diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc new file mode 100644 index 00000000000..fbf2be92627 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.cc @@ -0,0 +1,47 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h" + +#include +#include + +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" + +namespace tflite { +namespace gpu { +namespace metal { + +std::vector HardSwish(int id, ValueId input_id, + ValueId output_id, + const RuntimeOptions& options) { + auto desc = std::make_shared(); + desc->id = id; + desc->is_linkable = true; + desc->shader_source = R"( + FLT4 linkable$0(FLT4 value, int linear_index, uint3 gid) { + return value * clamp(value / 6.0f + FLT4(0.5f), FLT4(0.0f), FLT4(1.0f)); + } + )"; + desc->input_buffers = {{input_id}}; + desc->output_buffer = {output_id}; + return {desc}; +} + +} // namespace metal +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h new file mode 100644 index 00000000000..fa040ebcb97 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/kernels/hard_swish.h @@ -0,0 +1,37 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_HARD_SWISH_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_HARD_SWISH_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" + +namespace tflite { +namespace gpu { +namespace metal { + +std::vector HardSwish(int id, ValueId input_id, + ValueId output_id, + const RuntimeOptions& options); + +} // namespace metal +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_HARD_SWISH_H_