Merge pull request #14893 from powderluv/fix_xla_osx

[XLA] FIX XLA/tfcompile on OSX. #if Guard AVX, SSE and NEON instructions
This commit is contained in:
Yifei Feng 2017-12-19 18:17:23 -08:00 committed by GitHub
commit 8103945505
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 40 additions and 11 deletions

View File

@ -38,14 +38,16 @@ typedef float V8F32AVX __attribute__((__vector_size__(32)));
extern "C" {
#ifdef __AVX__
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
xla::cpu::runtime::V8F32AVX x);
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
xla::cpu::runtime::V8F32AVX x);
#endif
}
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_

View File

@ -49,14 +49,16 @@ struct V4F32NEON;
extern "C" {
#ifdef __ARM_NEON__
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
xla::cpu::runtime::V4F32NEON x);
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
xla::cpu::runtime::V4F32NEON x);
#endif // __ARM_NEON__
}
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_

View File

@ -39,14 +39,17 @@ typedef float V4F32SSE __attribute__((__vector_size__(16)));
extern "C" {
#ifdef __SSE4_1__
// The following functions are vectorized versions of a selection of libm
// library functions.
// References to these functions are created by the LLVM vectorizer.
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
xla::cpu::runtime::V4F32SSE x);
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
xla::cpu::runtime::V4F32SSE x);
#endif
}
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_

View File

@ -102,9 +102,21 @@ llvm::StringRef GetHostCpuName() {
CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
CompilerFunctor::VectorIntrinsics intrinsics;
intrinsics.sse_intrinsics = (&__xla_cpu_runtime_ExpV4F32SSE != nullptr);
intrinsics.avx_intrinsics = (&__xla_cpu_runtime_ExpV8F32AVX != nullptr);
intrinsics.neon_intrinsics = (&__xla_cpu_runtime_ExpV4F32NEON != nullptr);
#ifdef __SSE4_1__
intrinsics.sse_intrinsics = true;
#else
intrinsics.sse_intrinsics = false;
#endif
#ifdef __AVX__
intrinsics.avx_intrinsics = true;
#else
intrinsics.avx_intrinsics = false;
#endif
#ifdef __ARM_NEON__
intrinsics.neon_intrinsics = true;
#else
intrinsics.neon_intrinsics = false;
#endif
return intrinsics;
}
@ -201,12 +213,18 @@ bool RegisterKnownJITSymbols() {
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
#ifdef __ARM_NEON__
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON);
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON);
#endif
#ifdef __SSE4_1__
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE);
#endif
#ifdef __AVX__
REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX);
#endif
REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
@ -275,7 +293,11 @@ bool RegisterKnownJITSymbols() {
REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
REGISTER_LIBM_SYMBOL(sin, double (*)(double));
#ifdef __APPLE__
REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
#else
REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
#endif
REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
REGISTER_LIBM_SYMBOL(tan, double (*)(double));