From 2d1163582aa689cf0780547a0fb60dacca236b62 Mon Sep 17 00:00:00 2001 From: Vijay Vasudevan <vrv@google.com> Date: Tue, 8 Dec 2015 15:41:27 -0800 Subject: [PATCH] TensorFlow: upstream changes from Eigen to make build work with nvcc in debug mode. --- third_party/eigen3/Eigen/Core | 1 + third_party/eigen3/Eigen/src/Core/Functors.h | 77 +++++++++- .../eigen3/Eigen/src/Core/GenericPacketMath.h | 17 ++- .../eigen3/Eigen/src/Core/GlobalFunctions.h | 3 + .../eigen3/Eigen/src/Core/SpecialFunctions.h | 142 ++++++++++++++++++ .../Eigen/src/Core/arch/AVX/PacketMath.h | 8 +- .../Eigen/src/Core/arch/CUDA/MathFunctions.h | 37 +++++ .../Eigen/src/Core/arch/CUDA/PacketMath.h | 6 + .../Eigen/src/Core/functors/BinaryFunctors.h | 58 +++++++ .../Eigen/src/Core/functors/UnaryFunctors.h | 85 +++++++++++ .../eigen3/Eigen/src/Core/util/Constants.h | 16 ++ .../Eigen/src/Core/util/ForwardDeclarations.h | 7 + .../eigen3/Eigen/src/Core/util/StaticAssert.h | 3 +- .../Eigen/src/plugins/ArrayCwiseUnaryOps.h | 40 ++++- .../Eigen/CXX11/src/Tensor/TensorBase.h | 56 ++++--- .../Eigen/CXX11/src/Tensor/TensorDeviceType.h | 7 + 16 files changed, 536 insertions(+), 27 deletions(-) create mode 100644 third_party/eigen3/Eigen/src/Core/SpecialFunctions.h diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core index 68f29bc6939..c18105ad0ca 100644 --- a/third_party/eigen3/Eigen/Core +++ b/third_party/eigen3/Eigen/Core @@ -333,6 +333,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" #if defined EIGEN_VECTORIZE_AVX diff --git a/third_party/eigen3/Eigen/src/Core/Functors.h b/third_party/eigen3/Eigen/src/Core/Functors.h index 0a45fa31a9a..39088995bbd 100644 --- a/third_party/eigen3/Eigen/src/Core/Functors.h +++ b/third_party/eigen3/Eigen/src/Core/Functors.h @@ -769,7 +769,6 @@ struct functor_traits<scalar_sin_op<Scalar> > }; }; - /** \internal * \brief Template functor to compute the tan of a scalar * \sa class CwiseUnaryOp, ArrayBase::tan() @@ -827,6 +826,82 @@ struct functor_traits<scalar_asin_op<Scalar> > }; }; +/** \internal + * \brief Template functor to compute the lgamma of a scalar + * \sa class CwiseUnaryOp, ArrayBase::lgamma() + */ +template<typename Scalar> struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::lgamma; return lgamma(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { + return internal::plgamma(a); + } +}; + +template<typename Scalar> +struct functor_traits<scalar_lgamma_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute the erf of a scalar + * \sa class CwiseUnaryOp, ArrayBase::erf() + */ +template<typename Scalar> struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::erf; return erf(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { + return internal::perf(a); + } +}; + +template<typename Scalar> +struct functor_traits<scalar_erf_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasErf + }; +}; + +/** \internal + * \brief Template functor to compute the erfc of a scalar + * \sa class CwiseUnaryOp, ArrayBase::erfc() + */ +template<typename Scalar> struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { + using numext::erfc; return erfc(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { + return internal::perfc(a); + } +}; + +template<typename Scalar> +struct functor_traits<scalar_erfc_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasErfc + }; +}; + + /** \internal * \brief Template functor to raise a scalar to a power * \sa class CwiseUnaryOp, Cwise::pow diff --git a/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h b/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h index bf9d6f9c338..8417a5458aa 100644 --- a/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h +++ b/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h @@ -69,7 +69,10 @@ struct default_packet_traits HasASin = 0, HasACos = 0, HasATan = 0, - HasTanH = 0 + HasTanH = 0, + HasLGamma = 0, + HasErf = 0, + HasErfc = 0 }; }; @@ -453,6 +456,18 @@ Packet ptanh(const Packet& x) return pselect(med_approx, small_approx, ple(pabs(x), thresh)); } +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { return numext::lgamma(a); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { return numext::erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { return numext::erfc(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h b/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h index 0b1ce46ba29..d78978dec22 100644 --- a/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h +++ b/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h @@ -47,6 +47,9 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) diff --git a/third_party/eigen3/Eigen/src/Core/SpecialFunctions.h b/third_party/eigen3/Eigen/src/Core/SpecialFunctions.h new file mode 100644 index 00000000000..5fdcedb0322 --- /dev/null +++ b/third_party/eigen3/Eigen/src/Core/SpecialFunctions.h @@ -0,0 +1,142 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIALFUNCTIONS_H +#define EIGEN_SPECIALFUNCTIONS_H + +namespace Eigen { + +namespace internal { + +template <typename Scalar> +EIGEN_STRONG_INLINE Scalar __lgamma(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __lgamma<float>(float x) { return lgammaf(x); } +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __lgamma<double>(double x) { return lgamma(x); } + +template <typename Scalar> +EIGEN_STRONG_INLINE Scalar __erf(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erf<float>(float x) { return erff(x); } +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erf<double>(double x) { return erf(x); } + +template <typename Scalar> +EIGEN_STRONG_INLINE Scalar __erfc(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erfc<float>(float x) { return erfcf(x); } +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erfc<double>(double x) { return erfc(x); } + +} // end namespace internal + +/**************************************************************************** +* Implementations * +****************************************************************************/ + +namespace internal { + +/**************************************************************************** +* Implementation of lgamma * +****************************************************************************/ + +template<typename Scalar> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + return __lgamma<Scalar>(x); + } +}; + +template<typename Scalar> +struct lgamma_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of erf * +****************************************************************************/ + +template<typename Scalar> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + return __erf<Scalar>(x); + } +}; + +template<typename Scalar> +struct erf_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of erfc * +****************************************************************************/ + +template<typename Scalar> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + return __erfc<Scalar>(x); + } +}; + +template<typename Scalar> +struct erfc_retval +{ + typedef Scalar type; +}; + +} // end namespace internal + +namespace numext { + +template<typename Scalar> +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); +} + +template<typename Scalar> +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); +} + +template<typename Scalar> +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); +} + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_SPECIALFUNCTIONS_H diff --git a/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h index 6369a836ab9..03a7d5127cb 100644 --- a/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h @@ -69,7 +69,7 @@ template<> struct packet_traits<float> : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasSelect = 1, - HasEq = 1, + HasEq = 1 }; }; template<> struct packet_traits<double> : default_packet_traits @@ -155,11 +155,11 @@ template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const #ifdef __FMA__ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +#if EIGEN_GNUC_AT_MOST(4, 8) || EIGEN_COMP_CLANG // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // and gcc stupidly generates a vfmadd132ps instruction, // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate - // the result of the product. + // the result of the product. the issue has been fixed in gcc 4.9 Packet8f res = c; asm("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); return res; @@ -168,7 +168,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& #endif } template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +#if EIGEN_GNUC_AT_MOST(4, 8) || EIGEN_COMP_CLANG // see above Packet4d res = c; asm("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); diff --git a/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h index 675daae8f02..7e2fb7e699b 100644 --- a/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -66,6 +66,43 @@ double2 prsqrt<double2>(const double2& a) return make_double2(rsqrt(a.x), rsqrt(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plgamma<float4>(const float4& a) +{ + return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plgamma<double2>(const double2& a) +{ + return make_double2(lgamma(a.x), lgamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perf<float4>(const float4& a) +{ + return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perf<double2>(const double2& a) +{ + return make_double2(erf(a.x), erf(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perfc<float4>(const float4& a) +{ + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perfc<double2>(const double2& a) +{ + return make_double2(erfc(a.x), erfc(a.y)); +} + + #endif } // end namespace internal diff --git a/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h index d11f5ba4116..02aac06ed3f 100644 --- a/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -38,6 +38,9 @@ template<> struct packet_traits<float> : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, HasSelect = 1, @@ -60,6 +63,9 @@ template<> struct packet_traits<double> : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, HasSelect = 1, diff --git a/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h index d8ea0584313..bffc72151a1 100644 --- a/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h +++ b/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h @@ -156,6 +156,64 @@ struct functor_traits<scalar_max_op<Scalar> > { }; }; + +/** \internal + * \brief Template functors for comparison of two scalars + * \todo Implement packet-comparisons + */ +template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op; + +template<typename Scalar, ComparisonName cmp> +struct functor_traits<scalar_cmp_op<Scalar, cmp> > { + enum { + Cost = NumTraits<Scalar>::AddCost, + PacketAccess = false + }; +}; + +template<ComparisonName Cmp, typename Scalar> +struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> { + typedef bool type; +}; + + +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} +}; +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;} +}; +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} +}; +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GT> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} +}; +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GE> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} +}; +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} +}; +template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> { + typedef bool result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} +}; + + /** \internal * \brief Template functor to compute the hypot of two scalars * diff --git a/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h index 6feb2293391..8e181b60ff1 100644 --- a/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h +++ b/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h @@ -28,6 +28,12 @@ using std::asin; using std::atan; #endif +#if defined(__CUDA_ARCH__) +using std::lgamma; // Supported by all cuda compilers +using std::erf; // Supported by all cuda compilers +using std::erfc; // Supported by all cuda compilers +#endif + /** \internal * \brief Template functor to compute the opposite of a scalar * @@ -409,6 +415,85 @@ struct functor_traits<scalar_tanh_op<Scalar> > }; }; +/** \internal + * \brief Template functor to compute the natural log of the absolute value of Gamma of a scalar + * \sa class CwiseUnaryOp, Cwise::lgamma() + */ +template<typename Scalar> struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { +#if defined(__CUDA_ARCH__) + return lgamma(a); +#else + using numext::lgamma; return lgamma(a); +#endif + } + typedef typename packet_traits<Scalar>::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_lgamma_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute the Gauss error function of a scalar + * \sa class CwiseUnaryOp, Cwise::erf() + */ +template<typename Scalar> struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { +#if defined(__CUDA_ARCH__) + return erf(a); +#else + using numext::erf; return erf(a); +#endif + } + typedef typename packet_traits<Scalar>::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perf(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_erf_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasErf + }; +}; + +/** \internal + * \brief Template functor to compute the Complementary Error Function of a scalar + * \sa class CwiseUnaryOp, Cwise::erfc() + */ +template<typename Scalar> struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { +#if defined(__CUDA_ARCH__) + return erfc(a); +#else + using numext::erfc; return erfc(a); +#endif + } + typedef typename packet_traits<Scalar>::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_erfc_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasErfc + }; +}; + + /** \internal * \brief Template functor to compute the sigmoid of a scalar * \sa class CwiseUnaryOp, ArrayBase::sigmoid() diff --git a/third_party/eigen3/Eigen/src/Core/util/Constants.h b/third_party/eigen3/Eigen/src/Core/util/Constants.h index be14df01688..75b91cdcebb 100644 --- a/third_party/eigen3/Eigen/src/Core/util/Constants.h +++ b/third_party/eigen3/Eigen/src/Core/util/Constants.h @@ -448,6 +448,22 @@ struct MatrixXpr {}; /** The type used to identify an array expression */ struct ArrayXpr {}; +namespace internal { + +/** \internal + * Constants for comparison functors + */ +enum ComparisonName { + cmp_EQ = 0, + cmp_LT = 1, + cmp_LE = 2, + cmp_UNORD = 3, + cmp_NEQ = 4, + cmp_GT = 5, + cmp_GE = 6 +}; +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_CONSTANTS_H diff --git a/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h b/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h index be39d731ad3..f8cd6e47eed 100644 --- a/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h +++ b/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h @@ -296,6 +296,13 @@ template<typename MatrixType, unsigned int Mode> struct eigen2_part_return_type; } #endif +// SpecialFunctions forward declarations +namespace internal { +template <typename Scalar> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __lgamma(Scalar x); +template <typename Scalar> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erf(Scalar x); +template <typename Scalar> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erfc(Scalar x); +} + } // end namespace Eigen #endif // EIGEN_FORWARDDECLARATIONS_H diff --git a/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h b/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h index 396e27b9000..461c52fba91 100644 --- a/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h +++ b/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h @@ -90,7 +90,8 @@ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, - OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, + THIS_TYPE_IS_NOT_SUPPORTED }; }; diff --git a/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h index ea6778c3f54..fbf0d2031b9 100644 --- a/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -166,6 +166,45 @@ atan() const return derived(); } +/** \returns an expression of the coefficient-wise ln(|gamma(*this)|). + * + * Example: \include Cwise_lgamma.cpp + * Output: \verbinclude Cwise_lgamma.out + * + * \sa cos(), sin(), tan() + */ +inline const CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, Derived> +lgamma() const +{ + return derived(); +} + +/** \returns an expression of the coefficient-wise Gauss error function of *this. + * + * Example: \include Cwise_erf.cpp + * Output: \verbinclude Cwise_erf.out + * + * \sa cos(), sin(), tan() + */ +inline const CwiseUnaryOp<internal::scalar_erf_op<Scalar>, Derived> +erf() const +{ + return derived(); +} + +/** \returns an expression of the coefficient-wise Complementary error function of *this. + * + * Example: \include Cwise_erfc.cpp + * Output: \verbinclude Cwise_erfc.out + * + * \sa cos(), sin(), tan() + */ +inline const CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, Derived> +erfc() const +{ + return derived(); +} + /** \returns an expression of the coefficient-wise power of *this to the given exponent. * * Example: \include Cwise_pow.cpp @@ -181,7 +220,6 @@ pow(const Scalar& exponent) const (derived(), internal::scalar_pow_op<Scalar>(exponent)); } - /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include Cwise_inverse.cpp diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 720c3b6a822..723f17c2640 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -122,6 +122,24 @@ class TensorBase<Derived, ReadOnlyAccessors> return unaryExpr(internal::scalar_tanh_op<Scalar>()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op<Scalar>()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op<Scalar>()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op<Scalar>()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> sigmoid() const { @@ -286,67 +304,67 @@ class TensorBase<Derived, ReadOnlyAccessors> return binaryExpr(other.derived(), internal::scalar_boolean_xor_op()); } - // Comparisons and tests. + // Comparisons and tests. template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<std::less<Scalar>, const Derived, const OtherDerived> + const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::less<Scalar>()); + return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LT>()); } template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<std::less_equal<Scalar>, const Derived, const OtherDerived> + const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::less_equal<Scalar>()); + return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LE>()); } template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<std::greater<Scalar>, const Derived, const OtherDerived> + const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::greater<Scalar>()); + return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GT>()); } template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<std::greater_equal<Scalar>, const Derived, const OtherDerived> + const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::greater_equal<Scalar>()); + return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GE>()); } template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived> + const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::equal_to<Scalar>()); + return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_EQ>()); } template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived> + const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::not_equal_to<Scalar>()); + return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>()); } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::less<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > operator<(Scalar threshold) const { return operator<(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::less_equal<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > operator<=(Scalar threshold) const { return operator<=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::greater<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > operator>(Scalar threshold) const { return operator>(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::greater_equal<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > operator>=(Scalar threshold) const { return operator>=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::equal_to<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > operator==(Scalar threshold) const { return operator==(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > operator!=(Scalar threshold) const { return operator!=(constant(threshold)); } diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index 48859fe5fa9..ac2b2633ff8 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -451,6 +451,8 @@ class StreamInterface { static cudaDeviceProp* m_deviceProperties; static bool m_devicePropInitialized = false; + +#ifndef __CUDA_ARCH__ static tensorflow::mutex m_devicePropInitMutex(tensorflow::LINKER_INITIALIZED); static void initializeDeviceProp() { @@ -469,6 +471,11 @@ static void initializeDeviceProp() { } } } +#else +static void initializeDeviceProp() { + assert(false && "This function should never be called from within a CUDA kernel"); +} +#endif // __CUDA_ARCH__ static const cudaStream_t default_stream = cudaStreamDefault;