Restore functionality of --define=tflite_with_ruy=true

PiperOrigin-RevId: 317677225
Change-Id: If6533fdfeb21f676dd4b77a536b1aca894a03003
This commit is contained in:
T.J. Alumbaugh 2020-06-22 10:06:04 -07:00 committed by TensorFlower Gardener
parent 3f97342876
commit 60be0c3c0e
12 changed files with 55 additions and 55 deletions

View File

@ -14,8 +14,8 @@ package(
# This will cause TFLite to build with ruy only, providing a smaller binary. # This will cause TFLite to build with ruy only, providing a smaller binary.
# WARNING: This build flag is experimental and subject to change. # WARNING: This build flag is experimental and subject to change.
config_setting( config_setting(
name = "tflite_with_ruy_only_explicit_true", name = "tflite_with_ruy_explicit_true",
define_values = {"TFLITE_WITH_RUY_ONLY": "true"}, define_values = {"tflite_with_ruy": "true"},
) )
# Disables usage of ruy as the exclusive GEMM backend in TFLite kernels. # Disables usage of ruy as the exclusive GEMM backend in TFLite kernels.
@ -23,14 +23,14 @@ config_setting(
# the default GEMM option at runtime. # the default GEMM option at runtime.
# WARNING: This build flag is experimental and subject to change. # WARNING: This build flag is experimental and subject to change.
config_setting( config_setting(
name = "tflite_with_ruy_only_explicit_false", name = "tflite_with_ruy_explicit_false",
define_values = {"TFLITE_WITH_RUY_ONLY": "false"}, define_values = {"tflite_with_ruy": "false"},
) )
###### Beginning of config_setting's to match aarch64 ###### ###### Beginning of config_setting's to match aarch64 ######
# #
# We need to identify the aarch64 instruction set to decide whether to enable # We need to identify the aarch64 instruction set to decide whether to enable
# TFLITE_WITH_RUY_ONLY by default. This is surprisingly hard to do because select() # TFLITE_WITH_RUY by default. This is surprisingly hard to do because select()
# can only consume config_setting's, these config_settings are not centralized, # can only consume config_setting's, these config_settings are not centralized,
# and the "cpu" value which they define are free-form strings and there is no # and the "cpu" value which they define are free-form strings and there is no
# standardization of the strings that we need to match for the aarch64 architecture. # standardization of the strings that we need to match for the aarch64 architecture.
@ -239,45 +239,45 @@ cc_test(
) )
cc_library( cc_library(
name = "tflite_with_ruy_only_enabled", name = "tflite_with_ruy_enabled",
build_for_embedded = True, build_for_embedded = True,
defines = ["TFLITE_WITH_RUY_ONLY"], defines = ["TFLITE_WITH_RUY"],
visibility = ["//visibility:private"], visibility = ["//visibility:private"],
) )
cc_library( cc_library(
name = "tflite_with_ruy_only_and_caching_enabled", name = "tflite_with_ruy_and_caching_enabled",
defines = [ defines = [
"TFLITE_WITH_RUY_ONLY", "TFLITE_WITH_RUY",
"TFLITE_WITH_RUY_GEMV", "TFLITE_WITH_RUY_GEMV",
], ],
visibility = ["//visibility:private"], visibility = ["//visibility:private"],
) )
cc_library( cc_library(
name = "tflite_with_ruy_only_default", name = "tflite_with_ruy_default",
build_for_embedded = True, build_for_embedded = True,
select_deps = { select_deps = {
":chromiumos_arm64": [":tflite_with_ruy_only_enabled"], ":chromiumos_arm64": [":tflite_with_ruy_enabled"],
":cpu_aarch64": [":tflite_with_ruy_only_enabled"], ":cpu_aarch64": [":tflite_with_ruy_enabled"],
":cpu_arm64": [":tflite_with_ruy_only_enabled"], ":cpu_arm64": [":tflite_with_ruy_enabled"],
":cpu_arm64e": [":tflite_with_ruy_only_enabled"], ":cpu_arm64e": [":tflite_with_ruy_enabled"],
":cpu_ios_arm64": [":tflite_with_ruy_only_enabled"], ":cpu_ios_arm64": [":tflite_with_ruy_enabled"],
":cpu_ios_arm64e": [":tflite_with_ruy_only_enabled"], ":cpu_ios_arm64e": [":tflite_with_ruy_enabled"],
":cpu_arm64_v8a": [":tflite_with_ruy_only_enabled"], ":cpu_arm64_v8a": [":tflite_with_ruy_enabled"],
"//tensorflow:android_arm": ["tflite_with_ruy_only_enabled"], "//tensorflow:android_arm": ["tflite_with_ruy_enabled"],
"//conditions:default": [], "//conditions:default": [],
}, },
visibility = ["//visibility:private"], visibility = ["//visibility:private"],
) )
cc_library( cc_library(
name = "tflite_with_ruy_only", name = "tflite_with_ruy",
build_for_embedded = True, build_for_embedded = True,
select_deps = { select_deps = {
":tflite_with_ruy_only_explicit_true": [":tflite_with_ruy_only_enabled"], ":tflite_with_ruy_explicit_true": [":tflite_with_ruy_enabled"],
":tflite_with_ruy_only_explicit_false": [], ":tflite_with_ruy_explicit_false": [],
"//conditions:default": [":tflite_with_ruy_only_default"], "//conditions:default": [":tflite_with_ruy_default"],
}, },
) )
@ -291,7 +291,7 @@ cc_library(
], ],
copts = tflite_copts(), copts = tflite_copts(),
deps = [ deps = [
":tflite_with_ruy_only", ":tflite_with_ruy",
":op_macros", ":op_macros",
# For now this unconditionally depends on both ruy and gemmlowp. # For now this unconditionally depends on both ruy and gemmlowp.
# See the comment inside class CpuBackendContext on the # See the comment inside class CpuBackendContext on the
@ -311,11 +311,11 @@ cc_library(
copts = tflite_copts(), copts = tflite_copts(),
deps = [ deps = [
":cpu_backend_context", ":cpu_backend_context",
":tflite_with_ruy_only", ":tflite_with_ruy",
"//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/kernels/internal:types", "//tensorflow/lite/kernels/internal:types",
# For now this unconditionally depends on both ruy and gemmlowp. # For now this unconditionally depends on both ruy and gemmlowp.
# We only need to depend on gemmlowp when tflite_with_ruy_only # We only need to depend on gemmlowp when tflite_with_ruy
# is false, but putting these dependencies in a select() seems to # is false, but putting these dependencies in a select() seems to
# defeat copybara's rewriting rules. # defeat copybara's rewriting rules.
"@ruy//ruy:context", "@ruy//ruy:context",
@ -349,20 +349,20 @@ cc_library(
], ],
copts = tflite_copts(), copts = tflite_copts(),
deps = [ deps = [
":tflite_with_ruy_only", ":tflite_with_ruy",
"//tensorflow/lite/kernels/internal:common", "//tensorflow/lite/kernels/internal:common",
"//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/kernels/internal:cpu_check", "//tensorflow/lite/kernels/internal:cpu_check",
"//tensorflow/lite/kernels/internal:types", "//tensorflow/lite/kernels/internal:types",
":cpu_backend_context", ":cpu_backend_context",
":cpu_backend_threadpool", ":cpu_backend_threadpool",
# Depend on ruy regardless of `tflite_with_ruy_only`. See the comment in # Depend on ruy regardless of `tflite_with_ruy`. See the comment in
# cpu_backend_gemm.h about why ruy is the generic path. # cpu_backend_gemm.h about why ruy is the generic path.
"@ruy//ruy", "@ruy//ruy",
"@ruy//ruy:matrix", "@ruy//ruy:matrix",
"@ruy//ruy:path", "@ruy//ruy:path",
"@ruy//ruy/profiler:instrumentation", "@ruy//ruy/profiler:instrumentation",
# We only need to depend on gemmlowp and Eigen when tflite_with_ruy_only # We only need to depend on gemmlowp and Eigen when tflite_with_ruy
# is false, but putting these dependencies in a select() seems to # is false, but putting these dependencies in a select() seems to
# defeat copybara's rewriting rules. # defeat copybara's rewriting rules.
"@gemmlowp", "@gemmlowp",
@ -605,7 +605,7 @@ cc_library(
"//tensorflow/lite/kernels/internal:cppmath", "//tensorflow/lite/kernels/internal:cppmath",
"//tensorflow/lite:string", "//tensorflow/lite:string",
"@farmhash_archive//:farmhash", "@farmhash_archive//:farmhash",
] + [":tflite_with_ruy_only_and_caching_enabled"], ] + [":tflite_with_ruy_and_caching_enabled"],
) )
cc_library( cc_library(

View File

@ -26,7 +26,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/internal/types.h"
// b/131835803 forces us to include multithreaded_conv.h before optimized_ops.h // b/131835803 forces us to include multithreaded_conv.h before optimized_ops.h
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/internal/optimized/multithreaded_conv.h" #include "tensorflow/lite/kernels/internal/optimized/multithreaded_conv.h"
#endif #endif
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@ -765,8 +765,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
break; break;
} }
case kMultithreadOptimized: { case kMultithreadOptimized: {
#ifdef TFLITE_WITH_RUY_ONLY #ifdef TFLITE_WITH_RUY
// See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY_ONLY // See Register_CONV_2D: we should never be here when TFLITE_WITH_RUY
// was enabled. We #if out this code in order to get the corresponding // was enabled. We #if out this code in order to get the corresponding
// binary size benefits. // binary size benefits.
TFLITE_DCHECK(false); TFLITE_DCHECK(false);
@ -1051,8 +1051,8 @@ TfLiteRegistration* Register_CONVOLUTION_CBLAS_OPT() {
TfLiteRegistration* Register_CONV_2D() { TfLiteRegistration* Register_CONV_2D() {
#if defined TFLITE_USE_APPLE_ACCELERATE_FOR_CONV #if defined TFLITE_USE_APPLE_ACCELERATE_FOR_CONV
return Register_CONVOLUTION_CBLAS_OPT(); return Register_CONVOLUTION_CBLAS_OPT();
#elif defined TFLITE_WITH_RUY_ONLY #elif defined TFLITE_WITH_RUY
// TFLITE_WITH_RUY_ONLY optimizes the generic kernel type. // TFLITE_WITH_RUY optimizes the generic kernel type.
return Register_CONVOLUTION_GENERIC_OPT(); return Register_CONVOLUTION_GENERIC_OPT();
#else #else
return Register_CONVOLUTION_MULTITHREADED_OPT(); return Register_CONVOLUTION_MULTITHREADED_OPT();
@ -1063,8 +1063,8 @@ TfLiteRegistration* Register_CONV_2D() {
// models only need the UINT8 type. TFLite's op registration mechanism doesn't // models only need the UINT8 type. TFLite's op registration mechanism doesn't
// yet allow for more nuanced registration mechanisms. // yet allow for more nuanced registration mechanisms.
TfLiteRegistration* Register_CONV_2D_UINT8() { TfLiteRegistration* Register_CONV_2D_UINT8() {
#if defined TFLITE_WITH_RUY_ONLY #if defined TFLITE_WITH_RUY
// TFLITE_WITH_RUY_ONLY optimizes the generic kernel type. // TFLITE_WITH_RUY optimizes the generic kernel type.
return Register_CONVOLUTION_GENERIC_OPT_UINT8(); return Register_CONVOLUTION_GENERIC_OPT_UINT8();
#else #else
return Register_CONV_2D(); return Register_CONV_2D();

View File

@ -148,7 +148,7 @@ class ConvolutionOpModel : public BaseConvolutionOpModel<float> {
const auto kKernelMap = new std::map<string, TfLiteRegistration*>({ const auto kKernelMap = new std::map<string, TfLiteRegistration*>({
{"Reference", ops::builtin::Register_CONVOLUTION_REF()}, {"Reference", ops::builtin::Register_CONVOLUTION_REF()},
{"GenericOptimized", ops::builtin::Register_CONVOLUTION_GENERIC_OPT()}, {"GenericOptimized", ops::builtin::Register_CONVOLUTION_GENERIC_OPT()},
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
{"MultithreadedOptimized", {"MultithreadedOptimized",
ops::builtin::Register_CONVOLUTION_MULTITHREADED_OPT()}, ops::builtin::Register_CONVOLUTION_MULTITHREADED_OPT()},
#endif #endif

View File

@ -56,7 +56,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
// (see :cpu_backend_gemm), for now a CpuBackendContext always // (see :cpu_backend_gemm), for now a CpuBackendContext always
// stores both a gemmlowp context and a ruy context. // stores both a gemmlowp context and a ruy context.
// TODO(b/131416458): Once call sites all go through abstractions, // TODO(b/131416458): Once call sites all go through abstractions,
// elide what can be elided based on TFLITE_WITH_RUY_ONLY. // elide what can be elided based on TFLITE_WITH_RUY.
const std::unique_ptr<ruy::Context> ruy_context_; const std::unique_ptr<ruy::Context> ruy_context_;
const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_; const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;

View File

@ -24,7 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
#include "tensorflow/lite/kernels/cpu_backend_gemm_ruy.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_ruy.h"
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
#include "tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_gemmlowp.h"
#endif #endif
@ -42,7 +42,7 @@ template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
struct GemmImpl : detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar, struct GemmImpl : detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar,
DstScalar, quantization_flavor> {}; DstScalar, quantization_flavor> {};
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
/* Specializations using gemmlowp */ /* Specializations using gemmlowp */
@ -82,7 +82,7 @@ template <>
struct GemmImpl<float, float, float, float, QuantizationFlavor::kFloatingPoint> struct GemmImpl<float, float, float, float, QuantizationFlavor::kFloatingPoint>
: detail::GemmImplUsingEigen {}; : detail::GemmImplUsingEigen {};
#endif // not TFLITE_WITH_RUY_ONLY #endif // not TFLITE_WITH_RUY
/* Public entry point */ /* Public entry point */

View File

@ -591,10 +591,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,
// The float specialization below is unconditionally faster than ruy // The float specialization below is unconditionally faster than ruy
// because ruy does not currently have any Gemv path. // because ruy does not currently have any Gemv path.
// But it is not unconditionally faster than Eigen, which is what is used // But it is not unconditionally faster than Eigen, which is what is used
// unless TFLITE_WITH_RUY_ONLY is defined. Indeed, Eigen has decently efficient // unless TFLITE_WITH_RUY is defined. Indeed, Eigen has decently efficient
// Gemv paths, and they may use AVX instructions, while the present // Gemv paths, and they may use AVX instructions, while the present
// NEON intrinsics code maps at best to SSE4 on x86. // NEON intrinsics code maps at best to SSE4 on x86.
#ifdef TFLITE_WITH_RUY_ONLY #ifdef TFLITE_WITH_RUY
// We want to use fused multiply-add when it's available (that is, on A64 // We want to use fused multiply-add when it's available (that is, on A64
// unconditionally and on A32 with VFPv4) because it's often faster, and // unconditionally and on A32 with VFPv4) because it's often faster, and
@ -778,7 +778,7 @@ struct CustomGemvImpl<float, float, float, float,
} }
}; };
#endif // TFLITE_WITH_RUY_ONLY #endif // TFLITE_WITH_RUY
#endif // USE_NEON #endif // USE_NEON

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_eigen.h"
@ -78,4 +78,4 @@ void GemmImplUsingEigen::Run(
} // namespace cpu_backend_gemm } // namespace cpu_backend_gemm
} // namespace tflite } // namespace tflite
#endif // not TFLITE_WITH_RUY_ONLY #endif // not TFLITE_WITH_RUY

View File

@ -16,7 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_ #ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
#define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_ #define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
#include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
@ -37,6 +37,6 @@ struct GemmImplUsingEigen {
} // namespace cpu_backend_gemm } // namespace cpu_backend_gemm
} // namespace tflite } // namespace tflite
#endif // not TFLITE_WITH_RUY_ONLY #endif // not TFLITE_WITH_RUY
#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_ #endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_EIGEN_H_

View File

@ -19,7 +19,7 @@ limitations under the License.
#include <tuple> #include <tuple>
#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/internal/compatibility.h"
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
#include <cstdint> #include <cstdint>
#include <type_traits> #include <type_traits>
@ -190,6 +190,6 @@ struct GemmImplUsingGemmlowp<LhsScalar, RhsScalar, AccumScalar, DstScalar,
} // namespace cpu_backend_gemm } // namespace cpu_backend_gemm
} // namespace tflite } // namespace tflite
#endif // not TFLITE_WITH_RUY_ONLY #endif // not TFLITE_WITH_RUY
#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_ #endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_

View File

@ -19,7 +19,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/internal/compatibility.h"
#ifdef TFLITE_WITH_RUY_ONLY #ifdef TFLITE_WITH_RUY
#include "ruy/context.h" // from @ruy #include "ruy/context.h" // from @ruy
#include "ruy/thread_pool.h" // from @ruy #include "ruy/thread_pool.h" // from @ruy
#else #else
@ -29,7 +29,7 @@ limitations under the License.
namespace tflite { namespace tflite {
namespace cpu_backend_threadpool { namespace cpu_backend_threadpool {
#ifdef TFLITE_WITH_RUY_ONLY #ifdef TFLITE_WITH_RUY
using Task = ruy::Task; using Task = ruy::Task;
@ -41,7 +41,7 @@ void Execute(int tasks_count, TaskType* tasks,
tasks_count, tasks); tasks_count, tasks);
} }
#else // not TFLITE_WITH_RUY_ONLY #else // not TFLITE_WITH_RUY
using Task = gemmlowp::Task; using Task = gemmlowp::Task;

View File

@ -132,7 +132,7 @@ inline void DepthwiseConv(const DepthwiseParams& params,
int thread_count = HowManyConvThreads(output_shape, filter_shape); int thread_count = HowManyConvThreads(output_shape, filter_shape);
const int max_threads = cpu_backend_context->max_num_threads(); const int max_threads = cpu_backend_context->max_num_threads();
thread_count = std::max(1, std::min(thread_count, max_threads)); thread_count = std::max(1, std::min(thread_count, max_threads));
#ifndef TFLITE_WITH_RUY_ONLY #ifndef TFLITE_WITH_RUY
// Cap the number of threads to 2 for float path to avoid regression in // Cap the number of threads to 2 for float path to avoid regression in
// performance (b/132294857). // performance (b/132294857).
if (std::is_floating_point<T>::value) { if (std::is_floating_point<T>::value) {

View File

@ -187,7 +187,7 @@ ifeq ($(TARGET_ARCH),aarch64)
BUILD_WITH_RUY=true BUILD_WITH_RUY=true
endif endif
ifeq ($(BUILD_WITH_RUY),true) ifeq ($(BUILD_WITH_RUY),true)
CXXFLAGS += -DTFLITE_WITH_RUY_ONLY CXXFLAGS += -DTFLITE_WITH_RUY
endif endif
BUILD_WITH_RUY_PROFILER ?= false BUILD_WITH_RUY_PROFILER ?= false