Restore functionality of --define=tflite_with_ruy=true
PiperOrigin-RevId: 317677225 Change-Id: If6533fdfeb21f676dd4b77a536b1aca894a03003
This commit is contained in:
parent
3f97342876
commit
60be0c3c0e
|
@ -14,8 +14,8 @@ package(
|
|||
# This will cause TFLite to build with ruy only, providing a smaller binary.
|
||||
# WARNING: This build flag is experimental and subject to change.
|
||||
config_setting(
|
||||
name = "tflite_with_ruy_only_explicit_true",
|
||||
define_values = {"TFLITE_WITH_RUY_ONLY": "true"},
|
||||
name = "tflite_with_ruy_explicit_true",
|
||||
define_values = {"tflite_with_ruy": "true"},
|
||||
)
|
||||
|
||||
# Disables usage of ruy as the exclusive GEMM backend in TFLite kernels.
|
||||
|
@ -23,14 +23,14 @@ config_setting(
|
|||
# the default GEMM option at runtime.
|
||||
# WARNING: This build flag is experimental and subject to change.
|
||||
config_setting(
|
||||
name = "tflite_with_ruy_only_explicit_false",
|
||||
define_values = {"TFLITE_WITH_RUY_ONLY": "false"},
|
||||
name = "tflite_with_ruy_explicit_false",
|
||||
define_values = {"tflite_with_ruy": "false"},
|
||||
)
|
||||
|
||||
###### Beginning of config_setting's to match aarch64 ######
|
||||
#
|
||||
# We need to identify the aarch64 instruction set to decide whether to enable
|
||||
# TFLITE_WITH_RUY_ONLY by default. This is surprisingly hard to do because select()
|
||||
# TFLITE_WITH_RUY by default. This is surprisingly hard to do because select()
|
||||
# can only consume config_setting's, these config_settings are not centralized,
|
||||
# and the "cpu" value which they define are free-form strings and there is no
|
||||
# standardization of the strings that we need to match for the aarch64 architecture.
|
||||
|
@ -239,45 +239,45 @@ cc_test(
|
|||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflite_with_ruy_only_enabled",
|
||||
name = "tflite_with_ruy_enabled",
|
||||
build_for_embedded = True,
|
||||
defines = ["TFLITE_WITH_RUY_ONLY"],
|
||||
defines = ["TFLITE_WITH_RUY"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflite_with_ruy_only_and_caching_enabled",
|
||||
name = "tflite_with_ruy_and_caching_enabled",
|
||||
defines = [
|
||||
"TFLITE_WITH_RUY_ONLY",
|
||||
"TFLITE_WITH_RUY",
|
||||
"TFLITE_WITH_RUY_GEMV",
|
||||
],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflite_with_ruy_only_default",
|
||||
name = "tflite_with_ruy_default",
|
||||
build_for_embedded = True,
|
||||
select_deps = {
|
||||
":chromiumos_arm64": [":tflite_with_ruy_only_enabled"],
|
||||
":cpu_aarch64": [":tflite_with_ruy_only_enabled"],
|
||||
":cpu_arm64": [":tflite_with_ruy_only_enabled"],
|
||||
":cpu_arm64e": [":tflite_with_ruy_only_enabled"],
|
||||
":cpu_ios_arm64": [":tflite_with_ruy_only_enabled"],
|
||||
":cpu_ios_arm64e": [":tflite_with_ruy_only_enabled"],
|
||||
":cpu_arm64_v8a": [":tflite_with_ruy_only_enabled"],
|
||||
"//tensorflow:android_arm": ["tflite_with_ruy_only_enabled"],
|
||||
":chromiumos_arm64": [":tflite_with_ruy_enabled"],
|
||||
":cpu_aarch64": [":tflite_with_ruy_enabled"],
|
||||
":cpu_arm64": [":tflite_with_ruy_enabled"],
|
||||
":cpu_arm64e": [":tflite_with_ruy_enabled"],
|
||||
":cpu_ios_arm64": [":tflite_with_ruy_enabled"],
|
||||
":cpu_ios_arm64e": [":tflite_with_ruy_enabled"],
|
||||
":cpu_arm64_v8a": [":tflite_with_ruy_enabled"],
|
||||
"//tensorflow:android_arm": ["tflite_with_ruy_enabled"],
|
||||
"//conditions:default": [],
|
||||
},
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflite_with_ruy_only",
|
||||
name = "tflite_with_ruy",
|
||||
build_for_embedded = True,
|
||||
select_deps = {
|
||||
":tflite_with_ruy_only_explicit_true": [":tflite_with_ruy_only_enabled"],
|
||||
":tflite_with_ruy_only_explicit_false": [],
|
||||
"//conditions:default": [":tflite_with_ruy_only_default"],
|
||||
":tflite_with_ruy_explicit_true": [":tflite_with_ruy_enabled"],
|
||||
":tflite_with_ruy_explicit_false": [],
|
||||
"//conditions:default": [":tflite_with_ruy_default"],
|
||||
},
|
||||
)
|
||||
|
||||
|
@ -291,7 +291,7 @@ cc_library(
|
|||
],
|
||||
copts = tflite_copts(),
|
||||
deps = [
|
||||
":tflite_with_ruy_only",
|
||||
":tflite_with_ruy",
|
||||
":op_macros",
|
||||
# For now this unconditionally depends on both ruy and gemmlowp.
|
||||
# See the comment inside class CpuBackendContext on the
|
||||
|
@ -311,11 +311,11 @@ cc_library(
|
|||
copts = tflite_copts(),
|
||||
deps = [
|
||||
":cpu_backend_context",
|
||||
":tflite_with_ruy_only",
|
||||
":tflite_with_ruy",
|
||||
"//tensorflow/lite/kernels/internal:compatibility",
|
||||
"//tensorflow/lite/kernels/internal:types",
|
||||
# For now this unconditionally depends on both ruy and gemmlowp.
|
||||
# We only need to depend on gemmlowp when tflite_with_ruy_only
|
||||
# We only need to depend on gemmlowp when tflite_with_ruy
|
||||
# is false, but putting these dependencies in a select() seems to
|
||||
# defeat copybara's rewriting rules.
|
||||
"@ruy//ruy:context",
|
||||
|
@ -349,20 +349,20 @@ cc_library(
|
|||
],
|
||||
copts = tflite_copts(),
|
||||
deps = [
|
||||
":tflite_with_ruy_only",
|
||||
":tflite_with_ruy",
|
||||
"//tensorflow/lite/kernels/internal:common",
|
||||
"//tensorflow/lite/kernels/internal:compatibility",
|
||||
"//tensorflow/lite/kernels/internal:cpu_check",
|
||||
"//tensorflow/lite/kernels/internal:types",
|
||||
":cpu_backend_context",
|
||||
":cpu_backend_threadpool",
|
||||
# Depend on ruy regardless of `tflite_with_ruy_only`. See the comment in
|
||||
# Depend on ruy regardless of `tflite_with_ruy`. See the comment in
|
||||
# cpu_backend_gemm.h about why ruy is the generic path.
|
||||
"@ruy//ruy",
|
||||
"@ruy//ruy:matrix",
|
||||
"@ruy//ruy:path",
|
||||
"@ruy//ruy/profiler:instrumentation",
|
||||
# We only need to depend on gemmlowp and Eigen when tflite_with_ruy_only
|
||||
# We only need to depend on gemmlowp and Eigen when tflite_with_ruy
|
||||
# is false, but putting these dependencies in a select() seems to
|
||||
# defeat copybara's rewriting rules.
|
||||
"@gemmlowp",
|
||||
|
@ -605,7 +605,7 @@ cc_library(
|
|||
"//tensorflow/lite/kernels/internal:cppmath",
|
||||
"//tensorflow/lite:string",
|
||||
"@farmhash_archive//:farmhash",
|
||||
] + [":tflite_with_ruy_only_and_caching_enabled"],
|
||||
] + [":tflite_with_ruy_and_caching_enabled"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
|
|
|
@ -26,7 +26,7 @@ limitations under the License.
|
|||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
// b/131835803 forces us to include multithreaded_conv.h before optimized_ops.h
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
#include "tensorflow/lite/kernels/internal/optimized/multithreaded_conv.h"
|
||||
#endif
|
||||
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
|
||||
|
@ -765,8 +765,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||
break;
|
||||
}
|
||||
case kMultithreadOptimized: {
|
||||
#ifdef TFLITE_WITH_RUY_ONLY
|
||||
// See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY_ONLY
|
||||
#ifdef TFLITE_WITH_RUY
|
||||
// See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY
|
||||
// was enabled. We #if out this code in order to get the corresponding
|
||||
// binary size benefits.
|
||||
TFLITE_DCHECK(false);
|
||||
|
@ -1051,8 +1051,8 @@ TfLiteRegistration* Register_CONVOLUTION_CBLAS_OPT() {
|
|||
TfLiteRegistration* Register_CONV_2D() {
|
||||
#if defined TFLITE_USE_APPLE_ACCELERATE_FOR_CONV
|
||||
return Register_CONVOLUTION_CBLAS_OPT();
|
||||
#elif defined TFLITE_WITH_RUY_ONLY
|
||||
// TFLITE_WITH_RUY_ONLY optimizes the generic kernel type.
|
||||
#elif defined TFLITE_WITH_RUY
|
||||
// TFLITE_WITH_RUY optimizes the generic kernel type.
|
||||
return Register_CONVOLUTION_GENERIC_OPT();
|
||||
#else
|
||||
return Register_CONVOLUTION_MULTITHREADED_OPT();
|
||||
|
@ -1063,8 +1063,8 @@ TfLiteRegistration* Register_CONV_2D() {
|
|||
// models only need the UINT8 type. TFLite's op registration mechanism doesn't
|
||||
// yet allow for more nuanced registration mechanisms.
|
||||
TfLiteRegistration* Register_CONV_2D_UINT8() {
|
||||
#if defined TFLITE_WITH_RUY_ONLY
|
||||
// TFLITE_WITH_RUY_ONLY optimizes the generic kernel type.
|
||||
#if defined TFLITE_WITH_RUY
|
||||
// TFLITE_WITH_RUY optimizes the generic kernel type.
|
||||
return Register_CONVOLUTION_GENERIC_OPT_UINT8();
|
||||
#else
|
||||
return Register_CONV_2D();
|
||||
|
|
|
@ -148,7 +148,7 @@ class ConvolutionOpModel : public BaseConvolutionOpModel<float> {
|
|||
const auto kKernelMap = new std::map<string, TfLiteRegistration*>({
|
||||
{"Reference", ops::builtin::Register_CONVOLUTION_REF()},
|
||||
{"GenericOptimized", ops::builtin::Register_CONVOLUTION_GENERIC_OPT()},
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
{"MultithreadedOptimized",
|
||||
ops::builtin::Register_CONVOLUTION_MULTITHREADED_OPT()},
|
||||
#endif
|
||||
|
|
|
@ -56,7 +56,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
|
|||
// (see :cpu_backend_gemm), for now a CpuBackendContext always
|
||||
// stores both a gemmlowp context and a ruy context.
|
||||
// TODO(b/131416458): Once call sites all go through abstractions,
|
||||
// elide what can be elided based on TFLITE_WITH_RUY_ONLY.
|
||||
// elide what can be elided based on TFLITE_WITH_RUY.
|
||||
const std::unique_ptr<ruy::Context> ruy_context_;
|
||||
const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ limitations under the License.
|
|||
#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
|
||||
#include "tensorflow/lite/kernels/cpu_backend_gemm_ruy.h"
|
||||
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
#include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
|
||||
#include "tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h"
|
||||
#endif
|
||||
|
@ -42,7 +42,7 @@ template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
|
|||
struct GemmImpl : detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar,
|
||||
DstScalar, quantization_flavor> {};
|
||||
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
|
||||
/* Specializations using gemmlowp */
|
||||
|
||||
|
@ -82,7 +82,7 @@ template <>
|
|||
struct GemmImpl<float, float, float, float, QuantizationFlavor::kFloatingPoint>
|
||||
: detail::GemmImplUsingEigen {};
|
||||
|
||||
#endif // not TFLITE_WITH_RUY_ONLY
|
||||
#endif // not TFLITE_WITH_RUY
|
||||
|
||||
/* Public entry point */
|
||||
|
||||
|
|
|
@ -591,10 +591,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,
|
|||
// The float specialization below is unconditionally faster than ruy
|
||||
// because ruy does not currently have any Gemv path.
|
||||
// But it is not unconditionally faster than Eigen, which is what is used
|
||||
// unless TFLITE_WITH_RUY_ONLY is defined. Indeed, Eigen has decently efficient
|
||||
// unless TFLITE_WITH_RUY is defined. Indeed, Eigen has decently efficient
|
||||
// Gemv paths, and they may use AVX instructions, while the present
|
||||
// NEON intrinsics code maps at best to SSE4 on x86.
|
||||
#ifdef TFLITE_WITH_RUY_ONLY
|
||||
#ifdef TFLITE_WITH_RUY
|
||||
|
||||
// We want to use fused multiply-add when it's available (that is, on A64
|
||||
// unconditionally and on A32 with VFPv4) because it's often faster, and
|
||||
|
@ -778,7 +778,7 @@ struct CustomGemvImpl<float, float, float, float,
|
|||
}
|
||||
};
|
||||
|
||||
#endif // TFLITE_WITH_RUY_ONLY
|
||||
#endif // TFLITE_WITH_RUY
|
||||
|
||||
#endif // USE_NEON
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
|||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
|
||||
#include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
|
||||
|
||||
|
@ -78,4 +78,4 @@ void GemmImplUsingEigen::Run(
|
|||
} // namespace cpu_backend_gemm
|
||||
} // namespace tflite
|
||||
|
||||
#endif // not TFLITE_WITH_RUY_ONLY
|
||||
#endif // not TFLITE_WITH_RUY
|
||||
|
|
|
@ -16,7 +16,7 @@ limitations under the License.
|
|||
#ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
|
||||
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
|
||||
#include "tensorflow/lite/kernels/cpu_backend_context.h"
|
||||
#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
|
||||
|
@ -37,6 +37,6 @@ struct GemmImplUsingEigen {
|
|||
} // namespace cpu_backend_gemm
|
||||
} // namespace tflite
|
||||
|
||||
#endif // not TFLITE_WITH_RUY_ONLY
|
||||
#endif // not TFLITE_WITH_RUY
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
|
||||
|
|
|
@ -19,7 +19,7 @@ limitations under the License.
|
|||
#include <tuple>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
@ -190,6 +190,6 @@ struct GemmImplUsingGemmlowp<LhsScalar, RhsScalar, AccumScalar, DstScalar,
|
|||
} // namespace cpu_backend_gemm
|
||||
} // namespace tflite
|
||||
|
||||
#endif // not TFLITE_WITH_RUY_ONLY
|
||||
#endif // not TFLITE_WITH_RUY
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_
|
||||
|
|
|
@ -19,7 +19,7 @@ limitations under the License.
|
|||
#include "tensorflow/lite/kernels/cpu_backend_context.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
|
||||
#ifdef TFLITE_WITH_RUY_ONLY
|
||||
#ifdef TFLITE_WITH_RUY
|
||||
#include "ruy/context.h" // from @ruy
|
||||
#include "ruy/thread_pool.h" // from @ruy
|
||||
#else
|
||||
|
@ -29,7 +29,7 @@ limitations under the License.
|
|||
namespace tflite {
|
||||
namespace cpu_backend_threadpool {
|
||||
|
||||
#ifdef TFLITE_WITH_RUY_ONLY
|
||||
#ifdef TFLITE_WITH_RUY
|
||||
|
||||
using Task = ruy::Task;
|
||||
|
||||
|
@ -41,7 +41,7 @@ void Execute(int tasks_count, TaskType* tasks,
|
|||
tasks_count, tasks);
|
||||
}
|
||||
|
||||
#else // not TFLITE_WITH_RUY_ONLY
|
||||
#else // not TFLITE_WITH_RUY
|
||||
|
||||
using Task = gemmlowp::Task;
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ inline void DepthwiseConv(const DepthwiseParams& params,
|
|||
int thread_count = HowManyConvThreads(output_shape, filter_shape);
|
||||
const int max_threads = cpu_backend_context->max_num_threads();
|
||||
thread_count = std::max(1, std::min(thread_count, max_threads));
|
||||
#ifndef TFLITE_WITH_RUY_ONLY
|
||||
#ifndef TFLITE_WITH_RUY
|
||||
// Cap the number of threads to 2 for float path to avoid regression in
|
||||
// performance (b/132294857).
|
||||
if (std::is_floating_point<T>::value) {
|
||||
|
|
|
@ -187,7 +187,7 @@ ifeq ($(TARGET_ARCH),aarch64)
|
|||
BUILD_WITH_RUY=true
|
||||
endif
|
||||
ifeq ($(BUILD_WITH_RUY),true)
|
||||
CXXFLAGS += -DTFLITE_WITH_RUY_ONLY
|
||||
CXXFLAGS += -DTFLITE_WITH_RUY
|
||||
endif
|
||||
|
||||
BUILD_WITH_RUY_PROFILER ?= false
|
||||
|
|
Loading…
Reference in New Issue