Merge pull request #14893 from powderluv/fix_xla_osx
[XLA] FIX XLA/tfcompile on OSX. #if Guard AVX, SSE and NEON instructions
This commit is contained in:
commit
8103945505
@ -38,14 +38,16 @@ typedef float V8F32AVX __attribute__((__vector_size__(32)));
|
|||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
|
#ifdef __AVX__
|
||||||
// The following functions are vectorized versions of a selection of libm
|
// The following functions are vectorized versions of a selection of libm
|
||||||
// library functions.
|
// library functions.
|
||||||
// References to these functions are created by the LLVM vectorizer.
|
// References to these functions are created by the LLVM vectorizer.
|
||||||
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
|
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX(
|
||||||
xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
|
xla::cpu::runtime::V8F32AVX x);
|
||||||
|
|
||||||
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
|
xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX(
|
||||||
xla::cpu::runtime::V8F32AVX x) TF_ATTRIBUTE_WEAK;
|
xla::cpu::runtime::V8F32AVX x);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_
|
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_
|
||||||
|
@ -49,14 +49,16 @@ struct V4F32NEON;
|
|||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
// The following functions are vectorized versions of a selection of libm
|
// The following functions are vectorized versions of a selection of libm
|
||||||
// library functions.
|
// library functions.
|
||||||
// References to these functions are created by the LLVM vectorizer.
|
// References to these functions are created by the LLVM vectorizer.
|
||||||
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
|
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON(
|
||||||
xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
|
xla::cpu::runtime::V4F32NEON x);
|
||||||
|
|
||||||
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
|
xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON(
|
||||||
xla::cpu::runtime::V4F32NEON x) TF_ATTRIBUTE_WEAK;
|
xla::cpu::runtime::V4F32NEON x);
|
||||||
|
#endif // __ARM_NEON__
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_
|
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_
|
||||||
|
@ -39,14 +39,17 @@ typedef float V4F32SSE __attribute__((__vector_size__(16)));
|
|||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
|
#ifdef __SSE4_1__
|
||||||
// The following functions are vectorized versions of a selection of libm
|
// The following functions are vectorized versions of a selection of libm
|
||||||
// library functions.
|
// library functions.
|
||||||
// References to these functions are created by the LLVM vectorizer.
|
// References to these functions are created by the LLVM vectorizer.
|
||||||
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
|
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_ExpV4F32SSE(
|
||||||
xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
|
xla::cpu::runtime::V4F32SSE x);
|
||||||
|
|
||||||
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
|
xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE(
|
||||||
xla::cpu::runtime::V4F32SSE x) TF_ATTRIBUTE_WEAK;
|
xla::cpu::runtime::V4F32SSE x);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_
|
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_
|
||||||
|
@ -102,9 +102,21 @@ llvm::StringRef GetHostCpuName() {
|
|||||||
|
|
||||||
CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
|
CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
|
||||||
CompilerFunctor::VectorIntrinsics intrinsics;
|
CompilerFunctor::VectorIntrinsics intrinsics;
|
||||||
intrinsics.sse_intrinsics = (&__xla_cpu_runtime_ExpV4F32SSE != nullptr);
|
#ifdef __SSE4_1__
|
||||||
intrinsics.avx_intrinsics = (&__xla_cpu_runtime_ExpV8F32AVX != nullptr);
|
intrinsics.sse_intrinsics = true;
|
||||||
intrinsics.neon_intrinsics = (&__xla_cpu_runtime_ExpV4F32NEON != nullptr);
|
#else
|
||||||
|
intrinsics.sse_intrinsics = false;
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX__
|
||||||
|
intrinsics.avx_intrinsics = true;
|
||||||
|
#else
|
||||||
|
intrinsics.avx_intrinsics = false;
|
||||||
|
#endif
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
intrinsics.neon_intrinsics = true;
|
||||||
|
#else
|
||||||
|
intrinsics.neon_intrinsics = false;
|
||||||
|
#endif
|
||||||
return intrinsics;
|
return intrinsics;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,12 +213,18 @@ bool RegisterKnownJITSymbols() {
|
|||||||
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
|
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
|
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
|
REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON);
|
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
|
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
|
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON);
|
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON);
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE4_1__
|
||||||
|
REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE);
|
REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE);
|
||||||
|
#endif
|
||||||
|
#ifdef __AVX__
|
||||||
|
REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX);
|
REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX);
|
||||||
|
#endif
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
|
REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
|
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
|
||||||
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
|
REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
|
||||||
@ -275,7 +293,11 @@ bool RegisterKnownJITSymbols() {
|
|||||||
REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
|
REGISTER_LIBM_SYMBOL(scalbln, double (*)(double, long));
|
||||||
REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
|
REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
|
||||||
REGISTER_LIBM_SYMBOL(sin, double (*)(double));
|
REGISTER_LIBM_SYMBOL(sin, double (*)(double));
|
||||||
|
#ifdef __APPLE__
|
||||||
|
REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
|
||||||
|
#else
|
||||||
REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
|
REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
|
||||||
|
#endif
|
||||||
REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
|
REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
|
||||||
REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
|
REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
|
||||||
REGISTER_LIBM_SYMBOL(tan, double (*)(double));
|
REGISTER_LIBM_SYMBOL(tan, double (*)(double));
|
||||||
|
Loading…
Reference in New Issue
Block a user