Fix performance regression (b/137615815) introduced by new platform
#defines - they were tested directly by #ifdef, and were being defined by path.h. As tune.cc did not #include path.h, it did not enable its platform-specific tuning code, resulting in a performance regression in cases relying on tuning for maximal performance --- in-order ARM. To prevent that from happening again, this moves the platform defines to a new platform.h and forces users to use a RUY_PLATFORM(X) function macro, so that if they fail to #include platform.h, they get a compilation error. PiperOrigin-RevId: 258372624
This commit is contained in:
parent
f2df2c2865
commit
8c1064d91a
@ -12,6 +12,11 @@ package(
|
|||||||
licenses = ["notice"], # Apache 2.0
|
licenses = ["notice"], # Apache 2.0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "platform",
|
||||||
|
hdrs = ["platform.h"],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "check_macros",
|
name = "check_macros",
|
||||||
hdrs = ["check_macros.h"],
|
hdrs = ["check_macros.h"],
|
||||||
@ -60,6 +65,7 @@ cc_library(
|
|||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
":opt_set",
|
":opt_set",
|
||||||
|
":platform",
|
||||||
":time",
|
":time",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -164,7 +170,10 @@ cc_library(
|
|||||||
name = "path",
|
name = "path",
|
||||||
hdrs = ["path.h"],
|
hdrs = ["path.h"],
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [":size_util"],
|
deps = [
|
||||||
|
":platform",
|
||||||
|
":size_util",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
@ -238,6 +247,7 @@ cc_library(
|
|||||||
":matrix",
|
":matrix",
|
||||||
":opt_set",
|
":opt_set",
|
||||||
":path",
|
":path",
|
||||||
|
":platform",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -255,6 +265,7 @@ cc_library(
|
|||||||
":internal_matrix",
|
":internal_matrix",
|
||||||
":opt_set",
|
":opt_set",
|
||||||
":path",
|
":path",
|
||||||
|
":platform",
|
||||||
":size_util",
|
":size_util",
|
||||||
":spec",
|
":spec",
|
||||||
":tune",
|
":tune",
|
||||||
@ -276,6 +287,7 @@ cc_library(
|
|||||||
":internal_matrix",
|
":internal_matrix",
|
||||||
":opt_set",
|
":opt_set",
|
||||||
":path",
|
":path",
|
||||||
|
":platform",
|
||||||
":spec",
|
":spec",
|
||||||
":tune",
|
":tune",
|
||||||
"@gemmlowp//:profiler",
|
"@gemmlowp//:profiler",
|
||||||
@ -371,6 +383,7 @@ cc_library(
|
|||||||
":ruy",
|
":ruy",
|
||||||
":time",
|
":time",
|
||||||
"@com_google_googletest//:gtest",
|
"@com_google_googletest//:gtest",
|
||||||
|
":platform",
|
||||||
] + ruy_test_ext_deps(),
|
] + ruy_test_ext_deps(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -26,8 +26,9 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/experimental/ruy/matrix.h"
|
#include "tensorflow/lite/experimental/ruy/matrix.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/path.h"
|
#include "tensorflow/lite/experimental/ruy/path.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
|
|
||||||
#if ((defined RUY_NEON_64) || (defined RUY_NEON_32))
|
#if (RUY_PLATFORM(NEON_64) || RUY_PLATFORM(NEON_32))
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/experimental/ruy/internal_matrix.h"
|
#include "tensorflow/lite/experimental/ruy/internal_matrix.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/path.h"
|
#include "tensorflow/lite/experimental/ruy/path.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/size_util.h"
|
#include "tensorflow/lite/experimental/ruy/size_util.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/spec.h"
|
#include "tensorflow/lite/experimental/ruy/spec.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/tune.h"
|
#include "tensorflow/lite/experimental/ruy/tune.h"
|
||||||
@ -217,7 +218,7 @@ RUY_INHERIT_KERNEL(Path::kStandardCpp, Path::kNeon)
|
|||||||
RUY_INHERIT_KERNEL(Path::kNeon, Path::kNeonDotprod)
|
RUY_INHERIT_KERNEL(Path::kNeon, Path::kNeonDotprod)
|
||||||
|
|
||||||
// KernelParams are shared across 32-bit and 64-bit NEON code.
|
// KernelParams are shared across 32-bit and 64-bit NEON code.
|
||||||
#if ((defined RUY_NEON_64) || (defined RUY_NEON_32)) && \
|
#if (RUY_PLATFORM(NEON_64) || RUY_PLATFORM(NEON_32)) && \
|
||||||
(RUY_OPT_ENABLED(RUY_OPT_ASM))
|
(RUY_OPT_ENABLED(RUY_OPT_ASM))
|
||||||
|
|
||||||
#define RUY_ASM_FLAG_HAS_BIAS 0x1
|
#define RUY_ASM_FLAG_HAS_BIAS 0x1
|
||||||
@ -369,7 +370,7 @@ void Kernel8bitNeonInOrder(const KernelParams8bit<4, 4>& params);
|
|||||||
void Kernel8bitNeonDotprodOutOfOrder(const KernelParams8bit<8, 8>& params);
|
void Kernel8bitNeonDotprodOutOfOrder(const KernelParams8bit<8, 8>& params);
|
||||||
void Kernel8bitNeonDotprodInOrder(const KernelParams8bit<8, 8>& params);
|
void Kernel8bitNeonDotprodInOrder(const KernelParams8bit<8, 8>& params);
|
||||||
|
|
||||||
#ifdef RUY_NEON_64
|
#if RUY_PLATFORM(NEON_64)
|
||||||
template <typename DstScalar>
|
template <typename DstScalar>
|
||||||
struct Kernel<Path::kNeon, std::int8_t, std::int8_t, DstScalar,
|
struct Kernel<Path::kNeon, std::int8_t, std::int8_t, DstScalar,
|
||||||
BasicSpec<std::int32_t, DstScalar>> {
|
BasicSpec<std::int32_t, DstScalar>> {
|
||||||
@ -489,7 +490,7 @@ void KernelFloatNeonInOrder(const KernelParamsFloat<8, 8>& params);
|
|||||||
void KernelFloat32NeonOutOfOrder(const KernelParamsFloat<8, 4>& params);
|
void KernelFloat32NeonOutOfOrder(const KernelParamsFloat<8, 4>& params);
|
||||||
void KernelFloatNeonDotprodInOrder(const KernelParamsFloat<8, 8>& params);
|
void KernelFloatNeonDotprodInOrder(const KernelParamsFloat<8, 8>& params);
|
||||||
|
|
||||||
#ifdef RUY_NEON_64
|
#if RUY_PLATFORM(NEON_64)
|
||||||
// A Float kernel for ARM64 Neon.
|
// A Float kernel for ARM64 Neon.
|
||||||
template <>
|
template <>
|
||||||
struct Kernel<Path::kNeon, float, float, float, BasicSpec<float, float>> {
|
struct Kernel<Path::kNeon, float, float, float, BasicSpec<float, float>> {
|
||||||
@ -512,7 +513,7 @@ struct Kernel<Path::kNeon, float, float, float, BasicSpec<float, float>> {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef RUY_NEON_32
|
#if RUY_PLATFORM(NEON_32)
|
||||||
// A Float kernel for ARM32 Neon.
|
// A Float kernel for ARM32 Neon.
|
||||||
template <>
|
template <>
|
||||||
struct Kernel<Path::kNeon, float, float, float, BasicSpec<float, float>> {
|
struct Kernel<Path::kNeon, float, float, float, BasicSpec<float, float>> {
|
||||||
@ -559,7 +560,7 @@ struct Kernel<Path::kNeonDotprod, float, float, float,
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // ((defined RUY_NEON_64) || (defined RUY_NEON_32)) &&
|
#endif // (RUY_PLATFORM(NEON_64) || RUY_PLATFORM(NEON_32)) &&
|
||||||
// (RUY_OPT_ENABLED(RUY_OPT_ASM)
|
// (RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
} // namespace ruy
|
} // namespace ruy
|
||||||
|
|
||||||
|
@ -15,10 +15,11 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "profiling/instrumentation.h"
|
#include "profiling/instrumentation.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/kernel.h"
|
#include "tensorflow/lite/experimental/ruy/kernel.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
|
|
||||||
namespace ruy {
|
namespace ruy {
|
||||||
|
|
||||||
#if (defined RUY_NEON_32) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#if RUY_PLATFORM(NEON_32) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
#define RUY_ASM_LABEL_STORE_UINT8 91
|
#define RUY_ASM_LABEL_STORE_UINT8 91
|
||||||
#define RUY_ASM_LABEL_STORE_INT8 92
|
#define RUY_ASM_LABEL_STORE_INT8 92
|
||||||
@ -539,5 +540,5 @@ void KernelFloat32NeonOutOfOrder(const KernelParamsFloat<8, 4>& params) {
|
|||||||
#undef RUY_OFFSET_RHS_BASE_PTR
|
#undef RUY_OFFSET_RHS_BASE_PTR
|
||||||
#undef RUY_OFFSET_DST_BASE_PTR
|
#undef RUY_OFFSET_DST_BASE_PTR
|
||||||
|
|
||||||
#endif // (defined RUY_NEON_32) && (RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#endif // RUY_PLATFORM(NEON_32) && (RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
} // namespace ruy
|
} // namespace ruy
|
||||||
|
@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/ruy/kernel.h"
|
|
||||||
|
|
||||||
#include "profiling/instrumentation.h"
|
#include "profiling/instrumentation.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/kernel.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
|
|
||||||
namespace ruy {
|
namespace ruy {
|
||||||
|
|
||||||
#if (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#if RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
#define RUY_ASM_LABEL_STORE_UINT8 91
|
#define RUY_ASM_LABEL_STORE_UINT8 91
|
||||||
#define RUY_ASM_LABEL_STORE_INT8 92
|
#define RUY_ASM_LABEL_STORE_INT8 92
|
||||||
@ -6302,6 +6302,6 @@ void KernelFloatNeonDotprodInOrder(const KernelParamsFloat<8, 8>& params) {
|
|||||||
#undef RUY_OFFSET_RHS_BASE_PTR
|
#undef RUY_OFFSET_RHS_BASE_PTR
|
||||||
#undef RUY_OFFSET_DST_BASE_PTR
|
#undef RUY_OFFSET_DST_BASE_PTR
|
||||||
|
|
||||||
#endif // (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#endif // RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
} // namespace ruy
|
} // namespace ruy
|
||||||
|
@ -15,9 +15,11 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "tensorflow/lite/experimental/ruy/pack.h"
|
#include "tensorflow/lite/experimental/ruy/pack.h"
|
||||||
|
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
|
|
||||||
namespace ruy {
|
namespace ruy {
|
||||||
|
|
||||||
#if (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#if RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
void Pack8bitNeonOutOfOrder(const void* src_ptr0, const void* src_ptr1,
|
void Pack8bitNeonOutOfOrder(const void* src_ptr0, const void* src_ptr1,
|
||||||
const void* src_ptr2, const void* src_ptr3,
|
const void* src_ptr2, const void* src_ptr3,
|
||||||
@ -1329,6 +1331,6 @@ void PackFloatNeonInOrder(const float* src_ptr0, const float* src_ptr1,
|
|||||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
|
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21",
|
||||||
"v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
|
"v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
|
||||||
}
|
}
|
||||||
#endif // (defined RUY_NEON64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#endif // RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
} // namespace ruy
|
} // namespace ruy
|
||||||
|
@ -89,6 +89,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/experimental/ruy/common.h"
|
#include "tensorflow/lite/experimental/ruy/common.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/internal_matrix.h"
|
#include "tensorflow/lite/experimental/ruy/internal_matrix.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/tune.h"
|
#include "tensorflow/lite/experimental/ruy/tune.h"
|
||||||
|
|
||||||
namespace ruy {
|
namespace ruy {
|
||||||
@ -158,7 +159,7 @@ struct PackImpl<Path::kStandardCpp, FixedKernelLayout, Scalar, PackedScalar,
|
|||||||
RUY_INHERIT_PACK(Path::kStandardCpp, Path::kNeon)
|
RUY_INHERIT_PACK(Path::kStandardCpp, Path::kNeon)
|
||||||
RUY_INHERIT_PACK(Path::kNeon, Path::kNeonDotprod)
|
RUY_INHERIT_PACK(Path::kNeon, Path::kNeonDotprod)
|
||||||
|
|
||||||
#if (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#if RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
void Pack8bitNeonOutOfOrder(const void* src_ptr0, const void* src_ptr1,
|
void Pack8bitNeonOutOfOrder(const void* src_ptr0, const void* src_ptr1,
|
||||||
const void* src_ptr2, const void* src_ptr3,
|
const void* src_ptr2, const void* src_ptr3,
|
||||||
@ -384,7 +385,7 @@ struct PackImpl<Path::kNeon, FixedKernelLayout<Order::kRowMajor, 1, 8>, float,
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // (defined RUY_NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
#endif // RUY_PLATFORM(NEON_64) && RUY_OPT_ENABLED(RUY_OPT_ASM)
|
||||||
|
|
||||||
// Main entry point for packing.
|
// Main entry point for packing.
|
||||||
template <Path ThePath, typename FixedKernelLayout, typename Scalar,
|
template <Path ThePath, typename FixedKernelLayout, typename Scalar,
|
||||||
|
@ -18,29 +18,9 @@ limitations under the License.
|
|||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/size_util.h"
|
#include "tensorflow/lite/experimental/ruy/size_util.h"
|
||||||
|
|
||||||
// Detect ARM, 32-bit or 64-bit
|
|
||||||
#ifdef __aarch64__
|
|
||||||
#define RUY_ARM_64
|
|
||||||
#elif defined(__arm__)
|
|
||||||
#define RUY_ARM_32
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Detect NEON.
|
|
||||||
#if (defined __ARM_NEON) || (defined __ARM_NEON__)
|
|
||||||
#define RUY_NEON
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Define 32bit ARM NEON and 64 bit ARM NEON
|
|
||||||
#if defined(RUY_NEON) && defined(RUY_ARM_32)
|
|
||||||
#define RUY_NEON_32
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(RUY_NEON) && defined(RUY_ARM_64)
|
|
||||||
#define RUY_NEON_64
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace ruy {
|
namespace ruy {
|
||||||
|
|
||||||
// A Path is a choice of implementation path, e.g. between reference code
|
// A Path is a choice of implementation path, e.g. between reference code
|
||||||
@ -119,21 +99,21 @@ inline Path GetMostSignificantPath(Path path_mask) {
|
|||||||
// ruy::kAllPaths represents all Path's that make sense to on a given
|
// ruy::kAllPaths represents all Path's that make sense to on a given
|
||||||
// base architecture.
|
// base architecture.
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#ifdef RUY_NEON_64
|
#if RUY_PLATFORM(NEON_64)
|
||||||
constexpr Path kAllPaths =
|
constexpr Path kAllPaths =
|
||||||
Path::kReference | Path::kStandardCpp | Path::kNeon | Path::kNeonDotprod;
|
Path::kReference | Path::kStandardCpp | Path::kNeon | Path::kNeonDotprod;
|
||||||
#elif defined RUY_NEON_32
|
#elif RUY_PLATFORM(NEON_32)
|
||||||
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
|
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
|
||||||
#else
|
#else
|
||||||
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
|
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
|
||||||
#endif // RUY_NEON_64
|
#endif
|
||||||
#else // __linux__
|
#else // __linux__
|
||||||
// We don't know how to do runtime dotprod detection outside of linux for now.
|
// We don't know how to do runtime dotprod detection outside of linux for now.
|
||||||
#if defined(RUY_NEON_64) || defined(RUY_NEON_32)
|
#if RUY_PLATFORM(NEON)
|
||||||
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
|
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
|
||||||
#else
|
#else
|
||||||
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
|
constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
|
||||||
#endif // defined(RUY_NEON_64) || defined(RUY_NEON_32)
|
#endif
|
||||||
#endif // __linux__
|
#endif // __linux__
|
||||||
|
|
||||||
} // namespace ruy
|
} // namespace ruy
|
||||||
|
52
tensorflow/lite/experimental/ruy/platform.h
Normal file
52
tensorflow/lite/experimental/ruy/platform.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/* Copyright 2019 Google LLC. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_
|
||||||
|
#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_
|
||||||
|
|
||||||
|
#define RUY_PLATFORM(X) ((RUY_DONOTUSEDIRECTLY_##X) != 0)
|
||||||
|
|
||||||
|
// Detect ARM 32-bit
|
||||||
|
#ifdef __arm__
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_ARM_32 1
|
||||||
|
#else
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_ARM_32 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Detect ARM 64-bit
|
||||||
|
#ifdef __aarch64__
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_ARM_64 1
|
||||||
|
#else
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_ARM_64 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Detect NEON
|
||||||
|
#if (defined __ARM_NEON) || (defined __ARM_NEON__)
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_NEON 1
|
||||||
|
#else
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_NEON 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Define ARM 32-bit NEON
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_NEON_32 \
|
||||||
|
(RUY_DONOTUSEDIRECTLY_NEON && RUY_DONOTUSEDIRECTLY_ARM_32)
|
||||||
|
|
||||||
|
// Define ARM 64-bit NEON
|
||||||
|
// Note: NEON is implied by ARM64, so this define is redundant.
|
||||||
|
// It still allows some conveyance of intent.
|
||||||
|
#define RUY_DONOTUSEDIRECTLY_NEON_64 \
|
||||||
|
(RUY_DONOTUSEDIRECTLY_NEON && RUY_DONOTUSEDIRECTLY_ARM_64)
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_
|
@ -29,6 +29,7 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/pmu.h"
|
#include "tensorflow/lite/experimental/ruy/pmu.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/ruy.h"
|
#include "tensorflow/lite/experimental/ruy/ruy.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/ruy_advanced.h"
|
#include "tensorflow/lite/experimental/ruy/ruy_advanced.h"
|
||||||
@ -1651,7 +1652,7 @@ void TestSet<LhsScalar, RhsScalar, SpecType>::MakeResultPaths() {
|
|||||||
}
|
}
|
||||||
// We link against a generic BLAS target that only maps to OpenBLAS on specific
|
// We link against a generic BLAS target that only maps to OpenBLAS on specific
|
||||||
// architectures.
|
// architectures.
|
||||||
#if defined RUY_ARM_64 || defined RUY_ARM_32
|
#if RUY_PLATFORM(ARM_32) || RUY_PLATFORM(ARM_64)
|
||||||
// OpenBLAS multi-threading is disabled, so avoid mixing single-threaded
|
// OpenBLAS multi-threading is disabled, so avoid mixing single-threaded
|
||||||
// and multi-threaded benchmark results.
|
// and multi-threaded benchmark results.
|
||||||
if (max_num_threads == 1) {
|
if (max_num_threads == 1) {
|
||||||
|
@ -19,11 +19,12 @@ limitations under the License.
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
||||||
|
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||||
#include "tensorflow/lite/experimental/ruy/time.h"
|
#include "tensorflow/lite/experimental/ruy/time.h"
|
||||||
|
|
||||||
namespace ruy {
|
namespace ruy {
|
||||||
|
|
||||||
#ifdef RUY_NEON_64
|
#if RUY_PLATFORM(NEON_64)
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -130,7 +131,7 @@ Tuning TuningResolver::ResolveNow() {
|
|||||||
return is_probably_inorder ? Tuning::kInOrder : Tuning::kOutOfOrder;
|
return is_probably_inorder ? Tuning::kInOrder : Tuning::kOutOfOrder;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // not defined RUY_NEON_64
|
#else // not RUY_PLATFORM(NEON_64)
|
||||||
|
|
||||||
float TuningResolver::EvalRatio() { return 0; }
|
float TuningResolver::EvalRatio() { return 0; }
|
||||||
float TuningResolver::ThresholdRatio() { return 0; }
|
float TuningResolver::ThresholdRatio() { return 0; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user