Restore functionality of --define=tflite_with_ruy=true

PiperOrigin-RevId: 317677225
Change-Id: If6533fdfeb21f676dd4b77a536b1aca894a03003
This commit is contained in:
T.J. Alumbaugh 2020-06-22 10:06:04 -07:00 committed by TensorFlower Gardener
parent 3f97342876
commit 60be0c3c0e
12 changed files with 55 additions and 55 deletions

View File

@ -14,8 +14,8 @@ package(
# This will cause TFLite to build with ruy only, providing a smaller binary.
# WARNING: This build flag is experimental and subject to change.
config_setting(
name = "tflite_with_ruy_only_explicit_true",
define_values = {"TFLITE_WITH_RUY_ONLY": "true"},
name = "tflite_with_ruy_explicit_true",
define_values = {"tflite_with_ruy": "true"},
)
# Disables usage of ruy as the exclusive GEMM backend in TFLite kernels.
@ -23,14 +23,14 @@ config_setting(
# the default GEMM option at runtime.
# WARNING: This build flag is experimental and subject to change.
config_setting(
name = "tflite_with_ruy_only_explicit_false",
define_values = {"TFLITE_WITH_RUY_ONLY": "false"},
name = "tflite_with_ruy_explicit_false",
define_values = {"tflite_with_ruy": "false"},
)
###### Beginning of config_setting's to match aarch64 ######
#
# We need to identify the aarch64 instruction set to decide whether to enable
# TFLITE_WITH_RUY_ONLY by default. This is surprisingly hard to do because select()
# TFLITE_WITH_RUY by default. This is surprisingly hard to do because select()
# can only consume config_setting's, these config_settings are not centralized,
# and the "cpu" value which they define are free-form strings and there is no
# standardization of the strings that we need to match for the aarch64 architecture.
@ -239,45 +239,45 @@ cc_test(
)
cc_library(
name = "tflite_with_ruy_only_enabled",
name = "tflite_with_ruy_enabled",
build_for_embedded = True,
defines = ["TFLITE_WITH_RUY_ONLY"],
defines = ["TFLITE_WITH_RUY"],
visibility = ["//visibility:private"],
)
cc_library(
name = "tflite_with_ruy_only_and_caching_enabled",
name = "tflite_with_ruy_and_caching_enabled",
defines = [
"TFLITE_WITH_RUY_ONLY",
"TFLITE_WITH_RUY",
"TFLITE_WITH_RUY_GEMV",
],
visibility = ["//visibility:private"],
)
cc_library(
name = "tflite_with_ruy_only_default",
name = "tflite_with_ruy_default",
build_for_embedded = True,
select_deps = {
":chromiumos_arm64": [":tflite_with_ruy_only_enabled"],
":cpu_aarch64": [":tflite_with_ruy_only_enabled"],
":cpu_arm64": [":tflite_with_ruy_only_enabled"],
":cpu_arm64e": [":tflite_with_ruy_only_enabled"],
":cpu_ios_arm64": [":tflite_with_ruy_only_enabled"],
":cpu_ios_arm64e": [":tflite_with_ruy_only_enabled"],
":cpu_arm64_v8a": [":tflite_with_ruy_only_enabled"],
"//tensorflow:android_arm": ["tflite_with_ruy_only_enabled"],
":chromiumos_arm64": [":tflite_with_ruy_enabled"],
":cpu_aarch64": [":tflite_with_ruy_enabled"],
":cpu_arm64": [":tflite_with_ruy_enabled"],
":cpu_arm64e": [":tflite_with_ruy_enabled"],
":cpu_ios_arm64": [":tflite_with_ruy_enabled"],
":cpu_ios_arm64e": [":tflite_with_ruy_enabled"],
":cpu_arm64_v8a": [":tflite_with_ruy_enabled"],
"//tensorflow:android_arm": ["tflite_with_ruy_enabled"],
"//conditions:default": [],
},
visibility = ["//visibility:private"],
)
cc_library(
name = "tflite_with_ruy_only",
name = "tflite_with_ruy",
build_for_embedded = True,
select_deps = {
":tflite_with_ruy_only_explicit_true": [":tflite_with_ruy_only_enabled"],
":tflite_with_ruy_only_explicit_false": [],
"//conditions:default": [":tflite_with_ruy_only_default"],
":tflite_with_ruy_explicit_true": [":tflite_with_ruy_enabled"],
":tflite_with_ruy_explicit_false": [],
"//conditions:default": [":tflite_with_ruy_default"],
},
)
@ -291,7 +291,7 @@ cc_library(
],
copts = tflite_copts(),
deps = [
":tflite_with_ruy_only",
":tflite_with_ruy",
":op_macros",
# For now this unconditionally depends on both ruy and gemmlowp.
# See the comment inside class CpuBackendContext on the
@ -311,11 +311,11 @@ cc_library(
copts = tflite_copts(),
deps = [
":cpu_backend_context",
":tflite_with_ruy_only",
":tflite_with_ruy",
"//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/kernels/internal:types",
# For now this unconditionally depends on both ruy and gemmlowp.
# We only need to depend on gemmlowp when tflite_with_ruy_only
# We only need to depend on gemmlowp when tflite_with_ruy
# is false, but putting these dependencies in a select() seems to
# defeat copybara's rewriting rules.
"@ruy//ruy:context",
@ -349,20 +349,20 @@ cc_library(
],
copts = tflite_copts(),
deps = [
":tflite_with_ruy_only",
":tflite_with_ruy",
"//tensorflow/lite/kernels/internal:common",
"//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/kernels/internal:cpu_check",
"//tensorflow/lite/kernels/internal:types",
":cpu_backend_context",
":cpu_backend_threadpool",
# Depend on ruy regardless of `tflite_with_ruy_only`. See the comment in
# Depend on ruy regardless of `tflite_with_ruy`. See the comment in
# cpu_backend_gemm.h about why ruy is the generic path.
"@ruy//ruy",
"@ruy//ruy:matrix",
"@ruy//ruy:path",
"@ruy//ruy/profiler:instrumentation",
# We only need to depend on gemmlowp and Eigen when tflite_with_ruy_only
# We only need to depend on gemmlowp and Eigen when tflite_with_ruy
# is false, but putting these dependencies in a select() seems to
# defeat copybara's rewriting rules.
"@gemmlowp",
@ -605,7 +605,7 @@ cc_library(
"//tensorflow/lite/kernels/internal:cppmath",
"//tensorflow/lite:string",
"@farmhash_archive//:farmhash",
] + [":tflite_with_ruy_only_and_caching_enabled"],
] + [":tflite_with_ruy_and_caching_enabled"],
)
cc_library(

View File

@ -26,7 +26,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
// b/131835803 forces us to include multithreaded_conv.h before optimized_ops.h
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/internal/optimized/multithreaded_conv.h"
#endif
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@ -765,8 +765,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
break;
}
case kMultithreadOptimized: {
#ifdef TFLITE_WITH_RUY_ONLY
// See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY_ONLY
#ifdef TFLITE_WITH_RUY
// See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY
// was enabled. We #if out this code in order to get the corresponding
// binary size benefits.
TFLITE_DCHECK(false);
@ -1051,8 +1051,8 @@ TfLiteRegistration* Register_CONVOLUTION_CBLAS_OPT() {
TfLiteRegistration* Register_CONV_2D() {
#if defined TFLITE_USE_APPLE_ACCELERATE_FOR_CONV
return Register_CONVOLUTION_CBLAS_OPT();
#elif defined TFLITE_WITH_RUY_ONLY
// TFLITE_WITH_RUY_ONLY optimizes the generic kernel type.
#elif defined TFLITE_WITH_RUY
// TFLITE_WITH_RUY optimizes the generic kernel type.
return Register_CONVOLUTION_GENERIC_OPT();
#else
return Register_CONVOLUTION_MULTITHREADED_OPT();
@ -1063,8 +1063,8 @@ TfLiteRegistration* Register_CONV_2D() {
// models only need the UINT8 type. TFLite's op registration mechanism doesn't
// yet allow for more nuanced registration mechanisms.
TfLiteRegistration* Register_CONV_2D_UINT8() {
#if defined TFLITE_WITH_RUY_ONLY
// TFLITE_WITH_RUY_ONLY optimizes the generic kernel type.
#if defined TFLITE_WITH_RUY
// TFLITE_WITH_RUY optimizes the generic kernel type.
return Register_CONVOLUTION_GENERIC_OPT_UINT8();
#else
return Register_CONV_2D();

View File

@ -148,7 +148,7 @@ class ConvolutionOpModel : public BaseConvolutionOpModel<float> {
const auto kKernelMap = new std::map<string, TfLiteRegistration*>({
{"Reference", ops::builtin::Register_CONVOLUTION_REF()},
{"GenericOptimized", ops::builtin::Register_CONVOLUTION_GENERIC_OPT()},
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
{"MultithreadedOptimized",
ops::builtin::Register_CONVOLUTION_MULTITHREADED_OPT()},
#endif

View File

@ -56,7 +56,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
// (see :cpu_backend_gemm), for now a CpuBackendContext always
// stores both a gemmlowp context and a ruy context.
// TODO(b/131416458): Once call sites all go through abstractions,
// elide what can be elided based on TFLITE_WITH_RUY_ONLY.
// elide what can be elided based on TFLITE_WITH_RUY.
const std::unique_ptr<ruy::Context> ruy_context_;
const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;

View File

@ -24,7 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
#include "tensorflow/lite/kernels/cpu_backend_gemm_ruy.h"
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
#include "tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h"
#endif
@ -42,7 +42,7 @@ template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
struct GemmImpl : detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar,
DstScalar, quantization_flavor> {};
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
/* Specializations using gemmlowp */
@ -82,7 +82,7 @@ template <>
struct GemmImpl<float, float, float, float, QuantizationFlavor::kFloatingPoint>
: detail::GemmImplUsingEigen {};
#endif // not TFLITE_WITH_RUY_ONLY
#endif // not TFLITE_WITH_RUY
/* Public entry point */

View File

@ -591,10 +591,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,
// The float specialization below is unconditionally faster than ruy
// because ruy does not currently have any Gemv path.
// But it is not unconditionally faster than Eigen, which is what is used
// unless TFLITE_WITH_RUY_ONLY is defined. Indeed, Eigen has decently efficient
// unless TFLITE_WITH_RUY is defined. Indeed, Eigen has decently efficient
// Gemv paths, and they may use AVX instructions, while the present
// NEON intrinsics code maps at best to SSE4 on x86.
#ifdef TFLITE_WITH_RUY_ONLY
#ifdef TFLITE_WITH_RUY
// We want to use fused multiply-add when it's available (that is, on A64
// unconditionally and on A32 with VFPv4) because it's often faster, and
@ -778,7 +778,7 @@ struct CustomGemvImpl<float, float, float, float,
}
};
#endif // TFLITE_WITH_RUY_ONLY
#endif // TFLITE_WITH_RUY
#endif // USE_NEON

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
@ -78,4 +78,4 @@ void GemmImplUsingEigen::Run(
} // namespace cpu_backend_gemm
} // namespace tflite
#endif // not TFLITE_WITH_RUY_ONLY
#endif // not TFLITE_WITH_RUY

View File

@ -16,7 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
#define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
@ -37,6 +37,6 @@ struct GemmImplUsingEigen {
} // namespace cpu_backend_gemm
} // namespace tflite
#endif // not TFLITE_WITH_RUY_ONLY
#endif // not TFLITE_WITH_RUY
#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_

View File

@ -19,7 +19,7 @@ limitations under the License.
#include <tuple>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
#include <cstdint>
#include <type_traits>
@ -190,6 +190,6 @@ struct GemmImplUsingGemmlowp<LhsScalar, RhsScalar, AccumScalar, DstScalar,
} // namespace cpu_backend_gemm
} // namespace tflite
#endif // not TFLITE_WITH_RUY_ONLY
#endif // not TFLITE_WITH_RUY
#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_

View File

@ -19,7 +19,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#ifdef TFLITE_WITH_RUY_ONLY
#ifdef TFLITE_WITH_RUY
#include "ruy/context.h" // from @ruy
#include "ruy/thread_pool.h" // from @ruy
#else
@ -29,7 +29,7 @@ limitations under the License.
namespace tflite {
namespace cpu_backend_threadpool {
#ifdef TFLITE_WITH_RUY_ONLY
#ifdef TFLITE_WITH_RUY
using Task = ruy::Task;
@ -41,7 +41,7 @@ void Execute(int tasks_count, TaskType* tasks,
tasks_count, tasks);
}
#else // not TFLITE_WITH_RUY_ONLY
#else // not TFLITE_WITH_RUY
using Task = gemmlowp::Task;

View File

@ -132,7 +132,7 @@ inline void DepthwiseConv(const DepthwiseParams& params,
int thread_count = HowManyConvThreads(output_shape, filter_shape);
const int max_threads = cpu_backend_context->max_num_threads();
thread_count = std::max(1, std::min(thread_count, max_threads));
#ifndef TFLITE_WITH_RUY_ONLY
#ifndef TFLITE_WITH_RUY
// Cap the number of threads to 2 for float path to avoid regression in
// performance (b/132294857).
if (std::is_floating_point<T>::value) {

View File

@ -187,7 +187,7 @@ ifeq ($(TARGET_ARCH),aarch64)
BUILD_WITH_RUY=true
endif
ifeq ($(BUILD_WITH_RUY),true)
CXXFLAGS += -DTFLITE_WITH_RUY_ONLY
CXXFLAGS += -DTFLITE_WITH_RUY
endif
BUILD_WITH_RUY_PROFILER ?= false