Restore functionality of --define=tflite_with_ruy=true

PiperOrigin-RevId: 317677225 Change-Id: If6533fdfeb21f676dd4b77a536b1aca894a03003
2020-06-22 10:06:04 -07:00 · 2020-06-22 10:06:04 -07:00 · 60be0c3c0e
commit 60be0c3c0e
parent 3f97342876
12 changed files with 55 additions and 55 deletions
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@ -14,8 +14,8 @@ package(
 # This will cause TFLite to build with ruy only, providing a smaller binary.
 # WARNING: This build flag is experimental and subject to change.
 config_setting(
-    name = "tflite_with_ruy_only_explicit_true",
-    define_values = {"TFLITE_WITH_RUY_ONLY": "true"},
+    name = "tflite_with_ruy_explicit_true",
+    define_values = {"tflite_with_ruy": "true"},
 )

 # Disables usage of ruy as the exclusive GEMM backend in TFLite kernels.
@ -23,14 +23,14 @@ config_setting(
 # the default GEMM option at runtime.
 # WARNING: This build flag is experimental and subject to change.
 config_setting(
-    name = "tflite_with_ruy_only_explicit_false",
-    define_values = {"TFLITE_WITH_RUY_ONLY": "false"},
+    name = "tflite_with_ruy_explicit_false",
+    define_values = {"tflite_with_ruy": "false"},
 )

 ###### Beginning of config_setting's to match aarch64 ######
 #
 # We need to identify the aarch64 instruction set to decide whether to enable
-# TFLITE_WITH_RUY_ONLY by default. This is surprisingly hard to do because select()
+# TFLITE_WITH_RUY by default. This is surprisingly hard to do because select()
 # can only consume config_setting's, these config_settings are not centralized,
 # and the "cpu" value which they define are free-form strings and there is no
 # standardization of the strings that we need to match for the aarch64 architecture.
@ -239,45 +239,45 @@ cc_test(
 )

 cc_library(
-    name = "tflite_with_ruy_only_enabled",
+    name = "tflite_with_ruy_enabled",
    build_for_embedded = True,
-    defines = ["TFLITE_WITH_RUY_ONLY"],
+    defines = ["TFLITE_WITH_RUY"],
    visibility = ["//visibility:private"],
 )

 cc_library(
-    name = "tflite_with_ruy_only_and_caching_enabled",
+    name = "tflite_with_ruy_and_caching_enabled",
    defines = [
-        "TFLITE_WITH_RUY_ONLY",
+        "TFLITE_WITH_RUY",
        "TFLITE_WITH_RUY_GEMV",
    ],
    visibility = ["//visibility:private"],
 )

 cc_library(
-    name = "tflite_with_ruy_only_default",
+    name = "tflite_with_ruy_default",
    build_for_embedded = True,
    select_deps = {
-        ":chromiumos_arm64": [":tflite_with_ruy_only_enabled"],
-        ":cpu_aarch64": [":tflite_with_ruy_only_enabled"],
-        ":cpu_arm64": [":tflite_with_ruy_only_enabled"],
-        ":cpu_arm64e": [":tflite_with_ruy_only_enabled"],
-        ":cpu_ios_arm64": [":tflite_with_ruy_only_enabled"],
-        ":cpu_ios_arm64e": [":tflite_with_ruy_only_enabled"],
-        ":cpu_arm64_v8a": [":tflite_with_ruy_only_enabled"],
-        "//tensorflow:android_arm": ["tflite_with_ruy_only_enabled"],
+        ":chromiumos_arm64": [":tflite_with_ruy_enabled"],
+        ":cpu_aarch64": [":tflite_with_ruy_enabled"],
+        ":cpu_arm64": [":tflite_with_ruy_enabled"],
+        ":cpu_arm64e": [":tflite_with_ruy_enabled"],
+        ":cpu_ios_arm64": [":tflite_with_ruy_enabled"],
+        ":cpu_ios_arm64e": [":tflite_with_ruy_enabled"],
+        ":cpu_arm64_v8a": [":tflite_with_ruy_enabled"],
+        "//tensorflow:android_arm": ["tflite_with_ruy_enabled"],
        "//conditions:default": [],
    },
    visibility = ["//visibility:private"],
 )

 cc_library(
-    name = "tflite_with_ruy_only",
+    name = "tflite_with_ruy",
    build_for_embedded = True,
    select_deps = {
-        ":tflite_with_ruy_only_explicit_true": [":tflite_with_ruy_only_enabled"],
-        ":tflite_with_ruy_only_explicit_false": [],
-        "//conditions:default": [":tflite_with_ruy_only_default"],
+        ":tflite_with_ruy_explicit_true": [":tflite_with_ruy_enabled"],
+        ":tflite_with_ruy_explicit_false": [],
+        "//conditions:default": [":tflite_with_ruy_default"],
    },
 )

@ -291,7 +291,7 @@ cc_library(
    ],
    copts = tflite_copts(),
    deps = [
-        ":tflite_with_ruy_only",
+        ":tflite_with_ruy",
        ":op_macros",
        # For now this unconditionally depends on both ruy and gemmlowp.
        # See the comment inside class CpuBackendContext on the
@ -311,11 +311,11 @@ cc_library(
    copts = tflite_copts(),
    deps = [
        ":cpu_backend_context",
-        ":tflite_with_ruy_only",
+        ":tflite_with_ruy",
        "//tensorflow/lite/kernels/internal:compatibility",
        "//tensorflow/lite/kernels/internal:types",
        # For now this unconditionally depends on both ruy and gemmlowp.
-        # We only need to depend on gemmlowp when tflite_with_ruy_only
+        # We only need to depend on gemmlowp when tflite_with_ruy
        # is false, but putting these dependencies in a select() seems to
        # defeat copybara's rewriting rules.
        "@ruy//ruy:context",
@ -349,20 +349,20 @@ cc_library(
    ],
    copts = tflite_copts(),
    deps = [
-        ":tflite_with_ruy_only",
+        ":tflite_with_ruy",
        "//tensorflow/lite/kernels/internal:common",
        "//tensorflow/lite/kernels/internal:compatibility",
        "//tensorflow/lite/kernels/internal:cpu_check",
        "//tensorflow/lite/kernels/internal:types",
        ":cpu_backend_context",
        ":cpu_backend_threadpool",
-        # Depend on ruy regardless of `tflite_with_ruy_only`. See the comment in
+        # Depend on ruy regardless of `tflite_with_ruy`. See the comment in
        # cpu_backend_gemm.h about why ruy is the generic path.
        "@ruy//ruy",
        "@ruy//ruy:matrix",
        "@ruy//ruy:path",
        "@ruy//ruy/profiler:instrumentation",
-        # We only need to depend on gemmlowp and Eigen when tflite_with_ruy_only
+        # We only need to depend on gemmlowp and Eigen when tflite_with_ruy
        # is false, but putting these dependencies in a select() seems to
        # defeat copybara's rewriting rules.
        "@gemmlowp",
@ -605,7 +605,7 @@ cc_library(
        "//tensorflow/lite/kernels/internal:cppmath",
        "//tensorflow/lite:string",
        "@farmhash_archive//:farmhash",
-    ] + [":tflite_with_ruy_only_and_caching_enabled"],
+    ] + [":tflite_with_ruy_and_caching_enabled"],
 )

 cc_library(
--- a/tensorflow/lite/kernels/conv.cc
+++ b/tensorflow/lite/kernels/conv.cc
@ -26,7 +26,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 // b/131835803 forces us to include multithreaded_conv.h before optimized_ops.h
-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY
 #include "tensorflow/lite/kernels/internal/optimized/multithreaded_conv.h"
 #endif
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@ -765,8 +765,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
      break;
    }
    case kMultithreadOptimized: {
-#ifdef TFLITE_WITH_RUY_ONLY
-      // See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY_ONLY
+#ifdef TFLITE_WITH_RUY
+      // See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY
      // was enabled. We #if out this code in order to get the corresponding
      // binary size benefits.
      TFLITE_DCHECK(false);
@ -1051,8 +1051,8 @@ TfLiteRegistration* Register_CONVOLUTION_CBLAS_OPT() {
 TfLiteRegistration* Register_CONV_2D() {
 #if defined TFLITE_USE_APPLE_ACCELERATE_FOR_CONV
  return Register_CONVOLUTION_CBLAS_OPT();
-#elif defined TFLITE_WITH_RUY_ONLY
-  // TFLITE_WITH_RUY_ONLY optimizes the generic kernel type.
+#elif defined TFLITE_WITH_RUY
+  // TFLITE_WITH_RUY optimizes the generic kernel type.
  return Register_CONVOLUTION_GENERIC_OPT();
 #else
  return Register_CONVOLUTION_MULTITHREADED_OPT();
@ -1063,8 +1063,8 @@ TfLiteRegistration* Register_CONV_2D() {
 // models only need the UINT8 type. TFLite's op registration mechanism doesn't
 // yet allow for more nuanced registration mechanisms.
 TfLiteRegistration* Register_CONV_2D_UINT8() {
-#if defined TFLITE_WITH_RUY_ONLY
-  // TFLITE_WITH_RUY_ONLY optimizes the generic kernel type.
+#if defined TFLITE_WITH_RUY
+  // TFLITE_WITH_RUY optimizes the generic kernel type.
  return Register_CONVOLUTION_GENERIC_OPT_UINT8();
 #else
  return Register_CONV_2D();
--- a/tensorflow/lite/kernels/conv_test.cc
+++ b/tensorflow/lite/kernels/conv_test.cc
@ -148,7 +148,7 @@ class ConvolutionOpModel : public BaseConvolutionOpModel<float> {
 const auto kKernelMap = new std::map<string, TfLiteRegistration*>({
    {"Reference", ops::builtin::Register_CONVOLUTION_REF()},
    {"GenericOptimized", ops::builtin::Register_CONVOLUTION_GENERIC_OPT()},
-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY
    {"MultithreadedOptimized",
     ops::builtin::Register_CONVOLUTION_MULTITHREADED_OPT()},
 #endif
--- a/tensorflow/lite/kernels/cpu_backend_context.h
+++ b/tensorflow/lite/kernels/cpu_backend_context.h
@ -56,7 +56,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
  // (see :cpu_backend_gemm), for now a CpuBackendContext always
  // stores both a gemmlowp context and a ruy context.
  // TODO(b/131416458): Once call sites all go through abstractions,
-  // elide what can be elided based on TFLITE_WITH_RUY_ONLY.
+  // elide what can be elided based on TFLITE_WITH_RUY.
  const std::unique_ptr<ruy::Context> ruy_context_;
  const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;

--- a/tensorflow/lite/kernels/cpu_backend_gemm.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm.h
@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
 #include "tensorflow/lite/kernels/cpu_backend_gemm_ruy.h"

-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY
 #include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
 #include "tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h"
 #endif
@ -42,7 +42,7 @@ template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
 struct GemmImpl : detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar,
                                           DstScalar, quantization_flavor> {};

-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY

 /* Specializations using gemmlowp */

@ -82,7 +82,7 @@ template <>
 struct GemmImpl<float, float, float, float, QuantizationFlavor::kFloatingPoint>
    : detail::GemmImplUsingEigen {};

-#endif  // not TFLITE_WITH_RUY_ONLY
+#endif  // not TFLITE_WITH_RUY

 /* Public entry point */

--- a/tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h
@ -591,10 +591,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,
 // The float specialization below is unconditionally faster than ruy
 // because ruy does not currently have any Gemv path.
 // But it is not unconditionally faster than Eigen, which is what is used
-// unless TFLITE_WITH_RUY_ONLY is defined. Indeed, Eigen has decently efficient
+// unless TFLITE_WITH_RUY is defined. Indeed, Eigen has decently efficient
 // Gemv paths, and they may use AVX instructions, while the present
 // NEON intrinsics code maps at best to SSE4 on x86.
-#ifdef TFLITE_WITH_RUY_ONLY
+#ifdef TFLITE_WITH_RUY

 // We want to use fused multiply-add when it's available (that is, on A64
 // unconditionally and on A32 with VFPv4) because it's often faster, and
@ -778,7 +778,7 @@ struct CustomGemvImpl<float, float, float, float,
  }
 };

-#endif  // TFLITE_WITH_RUY_ONLY
+#endif  // TFLITE_WITH_RUY

 #endif  // USE_NEON

--- a/tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY

 #include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"

@ -78,4 +78,4 @@ void GemmImplUsingEigen::Run(
 }  // namespace cpu_backend_gemm
 }  // namespace tflite

-#endif  // not TFLITE_WITH_RUY_ONLY
+#endif  // not TFLITE_WITH_RUY
--- a/tensorflow/lite/kernels/cpu_backend_gemm_eigen.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_eigen.h
@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
 #define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_

-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY

 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
@ -37,6 +37,6 @@ struct GemmImplUsingEigen {
 }  // namespace cpu_backend_gemm
 }  // namespace tflite

-#endif  // not TFLITE_WITH_RUY_ONLY
+#endif  // not TFLITE_WITH_RUY

 #endif  // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
--- a/tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h
@ -19,7 +19,7 @@ limitations under the License.
 #include <tuple>

 #include "tensorflow/lite/kernels/internal/compatibility.h"
-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY

 #include <cstdint>
 #include <type_traits>
@ -190,6 +190,6 @@ struct GemmImplUsingGemmlowp<LhsScalar, RhsScalar, AccumScalar, DstScalar,
 }  // namespace cpu_backend_gemm
 }  // namespace tflite

-#endif  // not TFLITE_WITH_RUY_ONLY
+#endif  // not TFLITE_WITH_RUY

 #endif  // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_
--- a/tensorflow/lite/kernels/cpu_backend_threadpool.h
+++ b/tensorflow/lite/kernels/cpu_backend_threadpool.h
@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"

-#ifdef TFLITE_WITH_RUY_ONLY
+#ifdef TFLITE_WITH_RUY
 #include "ruy/context.h"  // from @ruy
 #include "ruy/thread_pool.h"  // from @ruy
 #else
@ -29,7 +29,7 @@ limitations under the License.
 namespace tflite {
 namespace cpu_backend_threadpool {

-#ifdef TFLITE_WITH_RUY_ONLY
+#ifdef TFLITE_WITH_RUY

 using Task = ruy::Task;

@ -41,7 +41,7 @@ void Execute(int tasks_count, TaskType* tasks,
      tasks_count, tasks);
 }

-#else  // not TFLITE_WITH_RUY_ONLY
+#else  // not TFLITE_WITH_RUY

 using Task = gemmlowp::Task;

--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_multithread.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_multithread.h
@ -132,7 +132,7 @@ inline void DepthwiseConv(const DepthwiseParams& params,
  int thread_count = HowManyConvThreads(output_shape, filter_shape);
  const int max_threads = cpu_backend_context->max_num_threads();
  thread_count = std::max(1, std::min(thread_count, max_threads));
-#ifndef TFLITE_WITH_RUY_ONLY
+#ifndef TFLITE_WITH_RUY
  // Cap the number of threads to 2 for float path to avoid regression in
  // performance (b/132294857).
  if (std::is_floating_point<T>::value) {
--- a/tensorflow/lite/tools/make/Makefile
+++ b/tensorflow/lite/tools/make/Makefile
@ -187,7 +187,7 @@ ifeq ($(TARGET_ARCH),aarch64)
 	BUILD_WITH_RUY=true
 endif
 ifeq ($(BUILD_WITH_RUY),true)
-  CXXFLAGS += -DTFLITE_WITH_RUY_ONLY
+  CXXFLAGS += -DTFLITE_WITH_RUY
 endif

 BUILD_WITH_RUY_PROFILER ?= false