Inline and optimize ApplyActivationToVector function.

Also implement missing activations. PiperOrigin-RevId: 282848506 Change-Id: Iddba8efd6d28b2777347ebccdf8e410f6f65adb2
2019-11-27 16:00:40 -08:00 · 2019-11-27 16:00:40 -08:00 · 9e2427c588
commit 9e2427c588
parent a6b7271e73
9 changed files with 70 additions and 129 deletions
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@ -354,17 +354,6 @@ cc_test(
    ],
 )

-cc_library(
-    name = "activation_functor",
-    hdrs = [
-        "activation_functor.h",
-    ],
-    copts = tflite_copts(),
-    deps = [
-        "//tensorflow/lite/c:common",
-    ],
-)
-
 cc_library(
    name = "op_macros",
    hdrs = [
--- a/tensorflow/lite/kernels/activation_functor.h
+++ b/tensorflow/lite/kernels/activation_functor.h
@ -1,58 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_ACTIVATION_FUNCTOR_H_
-#define TENSORFLOW_LITE_KERNELS_ACTIVATION_FUNCTOR_H_
-
-#include <algorithm>
-#include <cmath>
-#include <cstdlib>
-
-#include "tensorflow/lite/c/builtin_op_data.h"
-
-namespace tflite {
-
-// Dynamic (non-fused) activation functor. perhaps it is worth having
-// template instantiation?
-// TODO(aselle): Make this more efficient by pulling the switch to conv_eval
-// using template inlining.
-class ActivationFunctor {
- public:
-  explicit ActivationFunctor(TfLiteFusedActivation act) : act_(act) {}
-
-  float operator()(float a) const {
-    switch (act_) {
-      case kTfLiteActNone:
-        return a;
-      case kTfLiteActRelu:
-        return a < 0.f ? 0.f : a;
-      case kTfLiteActRelu6:
-        return std::max(0.f, std::min(a, 6.f));
-      case kTfLiteActTanh:
-        return std::tanh(a);
-      case kTfLiteActSigmoid:
-        return 1.0f / (1.0f + std::exp(-a));
-      default:
-        // TODO(aselle): More informative fatal error!
-        exit(1);
-    }
-  }
-
- private:
-  TfLiteFusedActivation act_;
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_ACTIVATION_FUNCTOR_H_
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@ -577,7 +577,6 @@ cc_library(
        ":compatibility",
        ":round",
        "//tensorflow/lite/c:common",
-        "//tensorflow/lite/kernels:activation_functor",
        "//tensorflow/lite/kernels:cpu_backend_context",
        "@gemmlowp",
    ],
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
@ -196,15 +196,6 @@ void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch,
  PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector);
 }

-void ApplySigmoidToVector(const float* vector, int v_size, float* result) {
-  PortableApplySigmoidToVector(vector, v_size, result);
-}
-
-void ApplyActivationToVector(const float* vector, int v_size,
-                             TfLiteFusedActivation activation, float* result) {
-  PortableApplyActivationToVector(vector, v_size, activation, result);
-}
-
 void Sub1Vector(const float* vector, int v_size, float* result) {
  NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
 }
--- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
@ -206,15 +206,6 @@ void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch,
  PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector);
 }

-void ApplySigmoidToVector(const float* vector, int v_size, float* result) {
-  PortableApplySigmoidToVector(vector, v_size, result);
-}
-
-void ApplyActivationToVector(const float* vector, int v_size,
-                             TfLiteFusedActivation activation, float* result) {
-  PortableApplyActivationToVector(vector, v_size, activation, result);
-}
-
 void Sub1Vector(const float* vector, int v_size, float* result) {
  NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
 }
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@ -21,7 +21,6 @@ limitations under the License.

 #include "fixedpoint/fixedpoint.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/activation_functor.h"
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
@ -591,23 +590,6 @@ void PortableVectorBatchVectorAdd(const float* vector, int v_size, int n_batch,
  }
 }

-void PortableApplySigmoidToVector(const float* vector, int v_size,
-                                  float* result) {
-  auto sigmoid_func = ActivationFunctor(kTfLiteActSigmoid);
-  for (int v = 0; v < v_size; v++) {
-    *result++ = (sigmoid_func)(*vector++);
-  }
-}
-
-void PortableApplyActivationToVector(const float* vector, int v_size,
-                                     TfLiteFusedActivation activation,
-                                     float* result) {
-  auto activation_func = ActivationFunctor(activation);
-  for (int v = 0; v < v_size; v++) {
-    *result++ = (activation_func)(*vector++);
-  }
-}
-
 void PortableSub1Vector(const float* vector, int v_size, float* result) {
  for (int v = 0; v < v_size; v++) {
    *result++ = 1.0f - *vector++;
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@ -229,15 +229,6 @@ void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch,
  PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector);
 }

-void ApplySigmoidToVector(const float* vector, int v_size, float* result) {
-  PortableApplySigmoidToVector(vector, v_size, result);
-}
-
-void ApplyActivationToVector(const float* vector, int v_size,
-                             TfLiteFusedActivation activation, float* result) {
-  PortableApplyActivationToVector(vector, v_size, activation, result);
-}
-
 void Sub1Vector(const float* vector, int v_size, float* result) {
  PortableSub1Vector(vector, v_size, result);
 }
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@ -171,15 +171,6 @@ void PortableVectorBatchVectorAssign(const float* vector, int v_size,
 void PortableVectorBatchVectorAdd(const float* vector, int v_size, int n_batch,
                                  float* batch_vector);

-// Apply sigmoid to elements of a vector.
-void PortableApplySigmoidToVector(const float* vector, int v_size,
-                                  float* result);
-
-// Apply activation function to elements of a vector.
-void PortableApplyActivationToVector(const float* vector, int v_size,
-                                     TfLiteFusedActivation activation,
-                                     float* result);
-
 // Compute "1.0f - elements of vector" (used in CIFG).
 void PortableSub1Vector(const float* vector, int v_size, float* result);

--- a/tensorflow/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/tensor_utils.h
@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_

 #include <algorithm>
+#include <cmath>

 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
@ -401,12 +402,76 @@ void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
  }
 }

-// Apply sigmoid to elements of a vector.
-void ApplySigmoidToVector(const float* vector, int v_size, float* result);
+// Apply Rectified Linear to elements of a vector.
+inline void ApplyReluToVector(const float* __restrict__ vector, int v_size,
+                              float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::max(0.0f, vector[v]);
+  }
+}

-// Apply activation function to elements of a vector.
-void ApplyActivationToVector(const float* vector, int v_size,
-                             TfLiteFusedActivation activation, float* result);
+// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector
+inline void ApplyRelu1ToVector(const float* __restrict__ vector, int v_size,
+                               float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::max(-1.0f, std::min(vector[v], 1.0f));
+  }
+}
+
+// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector
+inline void ApplyRelu6ToVector(const float* __restrict__ vector, int v_size,
+                               float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::max(0.0f, std::min(vector[v], 6.0f));
+  }
+}
+
+// Apply tanh to elements of a vector
+inline void ApplyTanhToVector(const float* __restrict__ vector, int v_size,
+                              float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::tanh(vector[v]);
+  }
+}
+
+// Apply signbit to elements of a vector
+inline void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
+                                 float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::signbit(vector[v]);
+  }
+}
+
+// Apply sigmoid to elements of a vector.
+inline void ApplySigmoidToVector(const float* __restrict__ vector, int v_size,
+                                 float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = 1.0f / (1.0f + std::exp(-vector[v]));
+  }
+}
+
+// Apply appropriate activation function to elements of a vector.
+inline void ApplyActivationToVector(const float* __restrict__ vector,
+                                    int v_size,
+                                    TfLiteFusedActivation activation,
+                                    float* __restrict__ result) {
+  switch (activation) {
+    case kTfLiteActNone:
+      return;
+    case kTfLiteActRelu:
+      return ApplyReluToVector(vector, v_size, result);
+    case kTfLiteActRelu1:
+      return ApplyRelu1ToVector(vector, v_size, result);
+    case kTfLiteActRelu6:
+      return ApplyRelu6ToVector(vector, v_size, result);
+    case kTfLiteActTanh:
+      return ApplyTanhToVector(vector, v_size, result);
+    case kTfLiteActSignBit:
+      return ApplySignbitToVector(vector, v_size, result);
+    case kTfLiteActSigmoid:
+      return ApplySigmoidToVector(vector, v_size, result);
+  }
+}

 // Compute "1.0f - elements of vector" (used in CIFG).
 void Sub1Vector(const float* vector, int v_size, float* result);