diff --git a/tensorflow/lite/experimental/ruy/BUILD b/tensorflow/lite/experimental/ruy/BUILD index 904d8ebcbef..64450e6f99a 100644 --- a/tensorflow/lite/experimental/ruy/BUILD +++ b/tensorflow/lite/experimental/ruy/BUILD @@ -12,6 +12,11 @@ package( licenses = ["notice"], # Apache 2.0 ) +cc_library( + name = "platform", + hdrs = ["platform.h"], +) + cc_library( name = "check_macros", hdrs = ["check_macros.h"], @@ -60,6 +65,7 @@ cc_library( ], deps = [ ":opt_set", + ":platform", ":time", ], ) @@ -164,7 +170,10 @@ cc_library( name = "path", hdrs = ["path.h"], visibility = ruy_visibility(), - deps = [":size_util"], + deps = [ + ":platform", + ":size_util", + ], ) cc_library( @@ -238,6 +247,7 @@ cc_library( ":matrix", ":opt_set", ":path", + ":platform", ], ) @@ -255,6 +265,7 @@ cc_library( ":internal_matrix", ":opt_set", ":path", + ":platform", ":size_util", ":spec", ":tune", @@ -276,6 +287,7 @@ cc_library( ":internal_matrix", ":opt_set", ":path", + ":platform", ":spec", ":tune", "@gemmlowp//:profiler", @@ -371,6 +383,7 @@ cc_library( ":ruy", ":time", "@com_google_googletest//:gtest", + ":platform", ] + ruy_test_ext_deps(), ) diff --git a/tensorflow/lite/experimental/ruy/common.h b/tensorflow/lite/experimental/ruy/common.h index 1027b9ef1be..9a596815fa2 100644 --- a/tensorflow/lite/experimental/ruy/common.h +++ b/tensorflow/lite/experimental/ruy/common.h @@ -26,8 +26,9 @@ limitations under the License. #include "tensorflow/lite/experimental/ruy/matrix.h" #include "tensorflow/lite/experimental/ruy/opt_set.h" #include "tensorflow/lite/experimental/ruy/path.h" +#include "tensorflow/lite/experimental/ruy/platform.h" -#if ((defined RUY_NEON_64) || (defined RUY_NEON_32)) +#if (RUY_PLATFORM(NEON_64) || RUY_PLATFORM(NEON_32)) #include #endif diff --git a/tensorflow/lite/experimental/ruy/kernel.h b/tensorflow/lite/experimental/ruy/kernel.h index cd4255af830..0c7a2e356f5 100644 --- a/tensorflow/lite/experimental/ruy/kernel.h +++ b/tensorflow/lite/experimental/ruy/kernel.h @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/experimental/ruy/internal_matrix.h" #include "tensorflow/lite/experimental/ruy/opt_set.h" #include "tensorflow/lite/experimental/ruy/path.h" +#include "tensorflow/lite/experimental/ruy/platform.h" #include "tensorflow/lite/experimental/ruy/size_util.h" #include "tensorflow/lite/experimental/ruy/spec.h" #include "tensorflow/lite/experimental/ruy/tune.h" @@ -217,7 +218,7 @@ RUY_INHERIT_KERNEL(Path::kStandardCpp, Path::kNeon) RUY_INHERIT_KERNEL(Path::kNeon, Path::kNeonDotprod) // KernelParams are shared across 32-bit and 64-bit NEON code. -#if ((defined RUY_NEON_64) || (defined RUY_NEON_32)) && \ +#if (RUY_PLATFORM(NEON_64) || RUY_PLATFORM(NEON_32)) && \ (RUY_OPT_ENABLED(RUY_OPT_ASM)) #define RUY_ASM_FLAG_HAS_BIAS 0x1 @@ -369,7 +370,7 @@ void Kernel8bitNeonInOrder(const KernelParams8bit<4, 4>& params); void Kernel8bitNeonDotprodOutOfOrder(const KernelParams8bit<8, 8>& params); void Kernel8bitNeonDotprodInOrder(const KernelParams8bit<8, 8>& params); -#ifdef RUY_NEON_64 +#if RUY_PLATFORM(NEON_64) template struct Kernel> { @@ -489,7 +490,7 @@ void KernelFloatNeonInOrder(const KernelParamsFloat<8, 8>& params); void KernelFloat32NeonOutOfOrder(const KernelParamsFloat<8, 4>& params); void KernelFloatNeonDotprodInOrder(const KernelParamsFloat<8, 8>& params); -#ifdef RUY_NEON_64 +#if RUY_PLATFORM(NEON_64) // A Float kernel for ARM64 Neon. template <> struct Kernel> { @@ -512,7 +513,7 @@ struct Kernel> { }; #endif -#ifdef RUY_NEON_32 +#if RUY_PLATFORM(NEON_32) // A Float kernel for ARM32 Neon. template <> struct Kernel> { @@ -559,7 +560,7 @@ struct Kernel& params) { #undef RUY_OFFSET_RHS_BASE_PTR #undef RUY_OFFSET_DST_BASE_PTR -#endif // (defined RUY_NEON_32) && (RUY_OPT_ENABLED(RUY_OPT_ASM) +#endif // RUY_PLATFORM(NEON_32) && (RUY_OPT_ENABLED(RUY_OPT_ASM) } // namespace ruy diff --git a/tensorflow/lite/experimental/ruy/kernel_arm64.cc b/tensorflow/lite/experimental/ruy/kernel_arm64.cc index 4e207775576..d2bcc1083eb 100644 --- a/tensorflow/lite/experimental/ruy/kernel_arm64.cc +++ b/tensorflow/lite/experimental/ruy/kernel_arm64.cc @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/experimental/ruy/kernel.h" - #include "profiling/instrumentation.h" +#include "tensorflow/lite/experimental/ruy/kernel.h" +#include "tensorflow/lite/experimental/ruy/platform.h" namespace ruy { -#if (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) +#if RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) #define RUY_ASM_LABEL_STORE_UINT8 91 #define RUY_ASM_LABEL_STORE_INT8 92 @@ -6302,6 +6302,6 @@ void KernelFloatNeonDotprodInOrder(const KernelParamsFloat<8, 8>& params) { #undef RUY_OFFSET_RHS_BASE_PTR #undef RUY_OFFSET_DST_BASE_PTR -#endif // (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) +#endif // RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) } // namespace ruy diff --git a/tensorflow/lite/experimental/ruy/pack.cc b/tensorflow/lite/experimental/ruy/pack.cc index fe3096f208f..e195e4931dc 100644 --- a/tensorflow/lite/experimental/ruy/pack.cc +++ b/tensorflow/lite/experimental/ruy/pack.cc @@ -15,9 +15,11 @@ limitations under the License. #include "tensorflow/lite/experimental/ruy/pack.h" +#include "tensorflow/lite/experimental/ruy/platform.h" + namespace ruy { -#if (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) +#if RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) void Pack8bitNeonOutOfOrder(const void* src_ptr0, const void* src_ptr1, const void* src_ptr2, const void* src_ptr3, @@ -1329,6 +1331,6 @@ void PackFloatNeonInOrder(const float* src_ptr0, const float* src_ptr1, "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); } -#endif // (defined RUY_NEON64) && RUY_OPT_ENABLED(RUY_OPT_ASM) +#endif // RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) } // namespace ruy diff --git a/tensorflow/lite/experimental/ruy/pack.h b/tensorflow/lite/experimental/ruy/pack.h index af9418469b3..d0f062f3f15 100644 --- a/tensorflow/lite/experimental/ruy/pack.h +++ b/tensorflow/lite/experimental/ruy/pack.h @@ -89,6 +89,7 @@ limitations under the License. #include "tensorflow/lite/experimental/ruy/common.h" #include "tensorflow/lite/experimental/ruy/internal_matrix.h" #include "tensorflow/lite/experimental/ruy/opt_set.h" +#include "tensorflow/lite/experimental/ruy/platform.h" #include "tensorflow/lite/experimental/ruy/tune.h" namespace ruy { @@ -158,7 +159,7 @@ struct PackImpl, float, } }; -#endif // (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) +#endif // RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM) // Main entry point for packing. template +#include "tensorflow/lite/experimental/ruy/platform.h" #include "tensorflow/lite/experimental/ruy/size_util.h" -// Detect ARM, 32-bit or 64-bit -#ifdef __aarch64__ -#define RUY_ARM_64 -#elif defined(__arm__) -#define RUY_ARM_32 -#endif - -// Detect NEON. -#if (defined __ARM_NEON) || (defined __ARM_NEON__) -#define RUY_NEON -#endif - -// Define 32bit ARM NEON and 64 bit ARM NEON -#if defined(RUY_NEON) && defined(RUY_ARM_32) -#define RUY_NEON_32 -#endif - -#if defined(RUY_NEON) && defined(RUY_ARM_64) -#define RUY_NEON_64 -#endif - namespace ruy { // A Path is a choice of implementation path, e.g. between reference code @@ -119,21 +99,21 @@ inline Path GetMostSignificantPath(Path path_mask) { // ruy::kAllPaths represents all Path's that make sense to on a given // base architecture. #ifdef __linux__ -#ifdef RUY_NEON_64 +#if RUY_PLATFORM(NEON_64) constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon | Path::kNeonDotprod; -#elif defined RUY_NEON_32 +#elif RUY_PLATFORM(NEON_32) constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon; #else constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp; -#endif // RUY_NEON_64 +#endif #else // __linux__ // We don't know how to do runtime dotprod detection outside of linux for now. -#if defined(RUY_NEON_64) || defined(RUY_NEON_32) +#if RUY_PLATFORM(NEON) constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon; #else constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp; -#endif // defined(RUY_NEON_64) || defined(RUY_NEON_32) +#endif #endif // __linux__ } // namespace ruy diff --git a/tensorflow/lite/experimental/ruy/platform.h b/tensorflow/lite/experimental/ruy/platform.h new file mode 100644 index 00000000000..13eccf8acf6 --- /dev/null +++ b/tensorflow/lite/experimental/ruy/platform.h @@ -0,0 +1,52 @@ +/* Copyright 2019 Google LLC. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_ + +#define RUY_PLATFORM(X) ((RUY_DONOTUSEDIRECTLY_##X) != 0) + +// Detect ARM 32-bit +#ifdef __arm__ +#define RUY_DONOTUSEDIRECTLY_ARM_32 1 +#else +#define RUY_DONOTUSEDIRECTLY_ARM_32 0 +#endif + +// Detect ARM 64-bit +#ifdef __aarch64__ +#define RUY_DONOTUSEDIRECTLY_ARM_64 1 +#else +#define RUY_DONOTUSEDIRECTLY_ARM_64 0 +#endif + +// Detect NEON +#if (defined __ARM_NEON) || (defined __ARM_NEON__) +#define RUY_DONOTUSEDIRECTLY_NEON 1 +#else +#define RUY_DONOTUSEDIRECTLY_NEON 0 +#endif + +// Define ARM 32-bit NEON +#define RUY_DONOTUSEDIRECTLY_NEON_32 \ + (RUY_DONOTUSEDIRECTLY_NEON && RUY_DONOTUSEDIRECTLY_ARM_32) + +// Define ARM 64-bit NEON +// Note: NEON is implied by ARM64, so this define is redundant. +// It still allows some conveyance of intent. +#define RUY_DONOTUSEDIRECTLY_NEON_64 \ + (RUY_DONOTUSEDIRECTLY_NEON && RUY_DONOTUSEDIRECTLY_ARM_64) + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_ diff --git a/tensorflow/lite/experimental/ruy/test.h b/tensorflow/lite/experimental/ruy/test.h index a81e65420c5..d604bc7c1df 100644 --- a/tensorflow/lite/experimental/ruy/test.h +++ b/tensorflow/lite/experimental/ruy/test.h @@ -29,6 +29,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/experimental/ruy/platform.h" #include "tensorflow/lite/experimental/ruy/pmu.h" #include "tensorflow/lite/experimental/ruy/ruy.h" #include "tensorflow/lite/experimental/ruy/ruy_advanced.h" @@ -1651,7 +1652,7 @@ void TestSet::MakeResultPaths() { } // We link against a generic BLAS target that only maps to OpenBLAS on specific // architectures. -#if defined RUY_ARM_64 || defined RUY_ARM_32 +#if RUY_PLATFORM(ARM_32) || RUY_PLATFORM(ARM_64) // OpenBLAS multi-threading is disabled, so avoid mixing single-threaded // and multi-threaded benchmark results. if (max_num_threads == 1) { diff --git a/tensorflow/lite/experimental/ruy/tune.cc b/tensorflow/lite/experimental/ruy/tune.cc index 2d84cae7c42..d2ca263e706 100644 --- a/tensorflow/lite/experimental/ruy/tune.cc +++ b/tensorflow/lite/experimental/ruy/tune.cc @@ -19,11 +19,12 @@ limitations under the License. #include #include "tensorflow/lite/experimental/ruy/opt_set.h" +#include "tensorflow/lite/experimental/ruy/platform.h" #include "tensorflow/lite/experimental/ruy/time.h" namespace ruy { -#ifdef RUY_NEON_64 +#if RUY_PLATFORM(NEON_64) namespace { @@ -130,7 +131,7 @@ Tuning TuningResolver::ResolveNow() { return is_probably_inorder ? Tuning::kInOrder : Tuning::kOutOfOrder; } -#else // not defined RUY_NEON_64 +#else // not RUY_PLATFORM(NEON_64) float TuningResolver::EvalRatio() { return 0; } float TuningResolver::ThresholdRatio() { return 0; }