From 851b3f5a467bdd1f41c32a6b346940980530898d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 26 Jun 2019 07:11:29 -0700 Subject: [PATCH] Disable dot-product depthwise conv when not compiling for Android with Clang. PiperOrigin-RevId: 255181633 --- .../kernels/internal/depthwiseconv_quantized_test.cc | 9 ++++++--- .../kernels/internal/optimized/depthwiseconv_uint8.h | 5 ++++- .../internal/optimized/depthwiseconv_uint8_3x3_filter.h | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc index 8baf2c7253c..fd5b89eaf73 100644 --- a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc +++ b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc @@ -170,7 +170,8 @@ inline void DispatchDepthwiseConv( // This is compiled-in even if dot-product instructions are unavailable. // However, tests should skip dot-product testing in that case and not // call this code. -#if defined(__aarch64__) && !defined(GOOGLE_L4T) +#if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \ + defined(__clang__) DotProduct3x3KernelType kernel_type = optimized_ops::depthwise_conv::CategorizeDotProductKernel( input_shape, filter_shape, params); @@ -683,7 +684,8 @@ void TestOneDepthwiseConv3x3Filter( } void TestOneNeonDot3x3(const TestParam& test_param) { -#if defined(__aarch64__) && !defined(GOOGLE_L4T) +#if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \ + defined(__clang__) CpuBackendContext backend_context; ruy::Context* ruy_context = backend_context.ruy_context(); const auto ruy_paths = ruy_context != nullptr @@ -854,7 +856,8 @@ INSTANTIATE_TEST_SUITE_P( TestParam::TestNameSuffix); #endif -#if defined(__aarch64__) && !defined(GOOGLE_L4T) +#if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \ + defined(__clang__) INSTANTIATE_TEST_SUITE_P( NeonAsm, DepthwiseConvTest, testing::Combine( diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 23940e3c332..8b57c3ed65e 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -2006,7 +2006,8 @@ inline void DepthwiseConvWithRounding( // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. -#if defined(__aarch64__) && !defined(GOOGLE_L4T) +#if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \ + defined(__clang__) // Dispatch to dot-product 3x3 kernels when supported. if (cpu_flags.neon_dotprod) { using optimized_ops::depthwise_conv::DotProduct3x3KernelType; @@ -2025,6 +2026,8 @@ inline void DepthwiseConvWithRounding( } } +#elif defined(__aarch64__) && !defined(GOOGLE_L4T) + // Dispatch to non-dot-product 3x3 kernels when supported. const int stride_width = params.stride_width; diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index cf2bcb22798..9f827e988a4 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -5786,7 +5786,8 @@ struct WorkspacePrefetchWrite< #endif // __aarch64__ -#if defined(__aarch64__) && !defined(GOOGLE_L4T) +#if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \ + defined(__clang__) // Dot product ops hard-coded template <>