diff --git a/tensorflow/lite/experimental/micro/tools/make/Makefile b/tensorflow/lite/experimental/micro/tools/make/Makefile index 6e100987b7d..8f5c8842021 100644 --- a/tensorflow/lite/experimental/micro/tools/make/Makefile +++ b/tensorflow/lite/experimental/micro/tools/make/Makefile @@ -105,7 +105,7 @@ tensorflow/lite/kernels/kernel_util.h \ tensorflow/lite/kernels/op_macros.h \ tensorflow/lite/kernels/padding.h \ tensorflow/lite/kernels/internal/common.h \ -tensorflow/lite/kernels/internal/compatibility.h \ +tensorflow/lite/kernels/internal/optimized/neon_check.h \ tensorflow/lite/kernels/internal/reference/conv.h \ tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h \ tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h \ diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 16d7f926793..44f2d023b73 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -807,6 +807,7 @@ cc_library( name = "cpu_check", hdrs = [ "optimized/cpu_check.h", + "optimized/neon_check.h", ], deps = [ "//tensorflow/lite/kernels:cpu_backend_context", diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h index 8ba2d92a315..0c4fbc1e84e 100644 --- a/tensorflow/lite/kernels/internal/common.h +++ b/tensorflow/lite/kernels/internal/common.h @@ -22,7 +22,7 @@ limitations under the License. #endif #include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h" +#include "tensorflow/lite/kernels/internal/optimized/neon_check.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { diff --git a/tensorflow/lite/kernels/internal/optimized/cpu_check.h b/tensorflow/lite/kernels/internal/optimized/cpu_check.h index 3847d3a0634..2c6a682f3b2 100644 --- a/tensorflow/lite/kernels/internal/optimized/cpu_check.h +++ b/tensorflow/lite/kernels/internal/optimized/cpu_check.h @@ -16,22 +16,7 @@ limitations under the License. #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_ #include "tensorflow/lite/kernels/cpu_backend_context.h" - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) -#define USE_NEON -#include -#endif // __ARM_NEON - -#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON -#define USE_NEON -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#pragma GCC diagnostic ignored "-Wattributes" -#pragma GCC diagnostic ignored "-Wnarrowing" -#pragma GCC diagnostic ignored "-Wsequence-point" -#include "NEON_2_SSE.h" -#pragma GCC diagnostic pop -#endif // __SSE4_1__ +#include "tensorflow/lite/kernels/internal/optimized/neon_check.h" namespace tflite { @@ -49,16 +34,4 @@ inline void GetCpuFlags(CpuBackendContext* cpu_backend_context, } // namespace tflite -// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is -// defined, PortableSomeFunc(args) otherwise. -#ifdef USE_NEON -// Always use Neon code -#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__) - -#else -// No NEON available: Use Portable code -#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__) - -#endif // defined(USE_NEON) - #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_ diff --git a/tensorflow/lite/kernels/internal/optimized/neon_check.h b/tensorflow/lite/kernels/internal/optimized/neon_check.h new file mode 100644 index 00000000000..a72af90f52b --- /dev/null +++ b/tensorflow/lite/kernels/internal/optimized/neon_check.h @@ -0,0 +1,46 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#include +#endif + +#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON +#define USE_NEON +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#pragma GCC diagnostic ignored "-Wattributes" +#pragma GCC diagnostic ignored "-Wnarrowing" +#pragma GCC diagnostic ignored "-Wsequence-point" +#include "NEON_2_SSE.h" +#pragma GCC diagnostic pop +#endif + +// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is +// defined, PortableSomeFunc(args) otherwise. +#ifdef USE_NEON +// Always use Neon code +#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__) + +#else +// No NEON available: Use Portable code +#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__) + +#endif // defined(USE_NEON) + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_