From 2d1163582aa689cf0780547a0fb60dacca236b62 Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Tue, 8 Dec 2015 15:41:27 -0800
Subject: [PATCH] TensorFlow: upstream changes from Eigen to make build work
 with nvcc in debug mode.

---
 third_party/eigen3/Eigen/Core                 |   1 +
 third_party/eigen3/Eigen/src/Core/Functors.h  |  77 +++++++++-
 .../eigen3/Eigen/src/Core/GenericPacketMath.h |  17 ++-
 .../eigen3/Eigen/src/Core/GlobalFunctions.h   |   3 +
 .../eigen3/Eigen/src/Core/SpecialFunctions.h  | 142 ++++++++++++++++++
 .../Eigen/src/Core/arch/AVX/PacketMath.h      |   8 +-
 .../Eigen/src/Core/arch/CUDA/MathFunctions.h  |  37 +++++
 .../Eigen/src/Core/arch/CUDA/PacketMath.h     |   6 +
 .../Eigen/src/Core/functors/BinaryFunctors.h  |  58 +++++++
 .../Eigen/src/Core/functors/UnaryFunctors.h   |  85 +++++++++++
 .../eigen3/Eigen/src/Core/util/Constants.h    |  16 ++
 .../Eigen/src/Core/util/ForwardDeclarations.h |   7 +
 .../eigen3/Eigen/src/Core/util/StaticAssert.h |   3 +-
 .../Eigen/src/plugins/ArrayCwiseUnaryOps.h    |  40 ++++-
 .../Eigen/CXX11/src/Tensor/TensorBase.h       |  56 ++++---
 .../Eigen/CXX11/src/Tensor/TensorDeviceType.h |   7 +
 16 files changed, 536 insertions(+), 27 deletions(-)
 create mode 100644 third_party/eigen3/Eigen/src/Core/SpecialFunctions.h

diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core
index 68f29bc6939..c18105ad0ca 100644
--- a/third_party/eigen3/Eigen/Core
+++ b/third_party/eigen3/Eigen/Core
@@ -333,6 +333,7 @@ using std::ptrdiff_t;
 
 #include "src/Core/NumTraits.h"
 #include "src/Core/MathFunctions.h"
+#include "src/Core/SpecialFunctions.h"
 #include "src/Core/GenericPacketMath.h"
 
 #if defined EIGEN_VECTORIZE_AVX
diff --git a/third_party/eigen3/Eigen/src/Core/Functors.h b/third_party/eigen3/Eigen/src/Core/Functors.h
index 0a45fa31a9a..39088995bbd 100644
--- a/third_party/eigen3/Eigen/src/Core/Functors.h
+++ b/third_party/eigen3/Eigen/src/Core/Functors.h
@@ -769,7 +769,6 @@ struct functor_traits<scalar_sin_op<Scalar> >
   };
 };
 
-
 /** \internal
   * \brief Template functor to compute the tan of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::tan()
@@ -827,6 +826,82 @@ struct functor_traits<scalar_asin_op<Scalar> >
   };
 };
 
+/** \internal
+  * \brief Template functor to compute the lgamma of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::lgamma()
+  */
+template<typename Scalar> struct scalar_lgamma_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
+    using numext::lgamma; return lgamma(a);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
+    return internal::plgamma(a);
+  }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_lgamma_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasLGamma
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the erf of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::erf()
+  */
+template<typename Scalar> struct scalar_erf_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
+    using numext::erf; return erf(a);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
+    return internal::perf(a);
+  }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_erf_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasErf
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the erfc of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::erfc()
+  */
+template<typename Scalar> struct scalar_erfc_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
+    using numext::erfc; return erfc(a);
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const {
+    return internal::perfc(a);
+  }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_erfc_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasErfc
+  };
+};
+
+
 /** \internal
   * \brief Template functor to raise a scalar to a power
   * \sa class CwiseUnaryOp, Cwise::pow
diff --git a/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h b/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h
index bf9d6f9c338..8417a5458aa 100644
--- a/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h
+++ b/third_party/eigen3/Eigen/src/Core/GenericPacketMath.h
@@ -69,7 +69,10 @@ struct default_packet_traits
     HasASin   = 0,
     HasACos   = 0,
     HasATan   = 0,
-    HasTanH   = 0
+    HasTanH   = 0,
+    HasLGamma = 0,
+    HasErf = 0,
+    HasErfc = 0
   };
 };
 
@@ -453,6 +456,18 @@ Packet ptanh(const Packet& x)
   return pselect(med_approx, small_approx, ple(pabs(x), thresh));
 }
 
+/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plgamma(const Packet& a) { return numext::lgamma(a); }
+
+/** \internal \returns the erf(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perf(const Packet& a) { return numext::erf(a); }
+
+/** \internal \returns the erfc(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perfc(const Packet& a) { return numext::erfc(a); }
+
 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
diff --git a/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h b/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h
index 0b1ce46ba29..d78978dec22 100644
--- a/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h
+++ b/third_party/eigen3/Eigen/src/Core/GlobalFunctions.h
@@ -47,6 +47,9 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
diff --git a/third_party/eigen3/Eigen/src/Core/SpecialFunctions.h b/third_party/eigen3/Eigen/src/Core/SpecialFunctions.h
new file mode 100644
index 00000000000..5fdcedb0322
--- /dev/null
+++ b/third_party/eigen3/Eigen/src/Core/SpecialFunctions.h
@@ -0,0 +1,142 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIALFUNCTIONS_H
+#define EIGEN_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename Scalar>
+EIGEN_STRONG_INLINE Scalar __lgamma(Scalar x) {
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED);
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __lgamma<float>(float x) { return lgammaf(x); }
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __lgamma<double>(double x) { return lgamma(x); }
+
+template <typename Scalar>
+EIGEN_STRONG_INLINE Scalar __erf(Scalar x) {
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED);
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erf<float>(float x) { return erff(x); }
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erf<double>(double x) { return erf(x); }
+
+template <typename Scalar>
+EIGEN_STRONG_INLINE Scalar __erfc(Scalar x) {
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+                      THIS_TYPE_IS_NOT_SUPPORTED);
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erfc<float>(float x) { return erfcf(x); }
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erfc<double>(double x) { return erfc(x); }
+
+}  // end namespace internal
+
+/****************************************************************************
+* Implementations                                                           *
+****************************************************************************/
+
+namespace internal {
+
+/****************************************************************************
+* Implementation of lgamma                                                  *
+****************************************************************************/
+
+template<typename Scalar>
+struct lgamma_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar& x)
+  {
+    return __lgamma<Scalar>(x);
+  }
+};
+
+template<typename Scalar>
+struct lgamma_retval
+{
+  typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of erf                                                  *
+****************************************************************************/
+
+template<typename Scalar>
+struct erf_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar& x)
+  {
+    return __erf<Scalar>(x);
+  }
+};
+
+template<typename Scalar>
+struct erf_retval
+{
+  typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of erfc                                                  *
+****************************************************************************/
+
+template<typename Scalar>
+struct erfc_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Scalar& x)
+  {
+    return __erfc<Scalar>(x);
+  }
+};
+
+template<typename Scalar>
+struct erfc_retval
+{
+  typedef Scalar type;
+};
+
+}  // end namespace internal
+
+namespace numext {
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
+}
+
+}  // end namespace numext
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_SPECIALFUNCTIONS_H
diff --git a/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
index 6369a836ab9..03a7d5127cb 100644
--- a/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/third_party/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -69,7 +69,7 @@ template<> struct packet_traits<float>  : default_packet_traits
     HasSqrt = 1,
     HasRsqrt = 1,
     HasSelect = 1,
-    HasEq = 1,
+    HasEq = 1
   };
  };
 template<> struct packet_traits<double> : default_packet_traits
@@ -155,11 +155,11 @@ template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const
 
 #ifdef __FMA__
 template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+#if EIGEN_GNUC_AT_MOST(4, 8) || EIGEN_COMP_CLANG
   // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
   // and gcc stupidly generates a vfmadd132ps instruction,
   // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
-  // the result of the product.
+  // the result of the product. the issue has been fixed in gcc 4.9
   Packet8f res = c;
   asm("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
   return res;
@@ -168,7 +168,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
 #endif
 }
 template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+#if EIGEN_GNUC_AT_MOST(4, 8) || EIGEN_COMP_CLANG
   // see above
   Packet4d res = c;
   asm("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
diff --git a/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h b/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
index 675daae8f02..7e2fb7e699b 100644
--- a/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/third_party/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -66,6 +66,43 @@ double2 prsqrt<double2>(const double2& a)
   return make_double2(rsqrt(a.x), rsqrt(a.y));
 }
 
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plgamma<float4>(const float4& a)
+{
+  return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plgamma<double2>(const double2& a)
+{
+  return make_double2(lgamma(a.x), lgamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perf<float4>(const float4& a)
+{
+  return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perf<double2>(const double2& a)
+{
+  return make_double2(erf(a.x), erf(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perfc<float4>(const float4& a)
+{
+  return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perfc<double2>(const double2& a)
+{
+  return make_double2(erfc(a.x), erfc(a.y));
+}
+
+
 #endif
 
 } // end namespace internal
diff --git a/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h b/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
index d11f5ba4116..02aac06ed3f 100644
--- a/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/third_party/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -38,6 +38,9 @@ template<> struct packet_traits<float> : default_packet_traits
     HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
+    HasLGamma = 1,
+    HasErf = 1,
+    HasErfc = 1,
 
     HasBlend = 0,
     HasSelect = 1,
@@ -60,6 +63,9 @@ template<> struct packet_traits<double> : default_packet_traits
     HasExp  = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
+    HasLGamma = 1,
+    HasErf = 1,
+    HasErfc = 1,
 
     HasBlend = 0,
     HasSelect = 1,
diff --git a/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
index d8ea0584313..bffc72151a1 100644
--- a/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/third_party/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
@@ -156,6 +156,64 @@ struct functor_traits<scalar_max_op<Scalar> > {
   };
 };
 
+
+/** \internal
+  * \brief Template functors for comparison of two scalars
+  * \todo Implement packet-comparisons
+  */
+template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op;
+
+template<typename Scalar, ComparisonName cmp>
+struct functor_traits<scalar_cmp_op<Scalar, cmp> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = false
+  };
+};
+
+template<ComparisonName Cmp, typename Scalar>
+struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {
+  typedef bool type;
+};
+
+
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GT> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GE> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
+};
+template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
+};
+
+
 /** \internal
   * \brief Template functor to compute the hypot of two scalars
   *
diff --git a/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h b/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h
index 6feb2293391..8e181b60ff1 100644
--- a/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/third_party/eigen3/Eigen/src/Core/functors/UnaryFunctors.h
@@ -28,6 +28,12 @@ using std::asin;
 using std::atan;
 #endif
 
+#if defined(__CUDA_ARCH__)
+using std::lgamma;  // Supported by all cuda compilers
+using std::erf;  // Supported by all cuda compilers
+using std::erfc;  // Supported by all cuda compilers
+#endif
+
 /** \internal
   * \brief Template functor to compute the opposite of a scalar
   *
@@ -409,6 +415,85 @@ struct functor_traits<scalar_tanh_op<Scalar> >
   };
 };
 
+/** \internal
+  * \brief Template functor to compute the natural log of the absolute value of Gamma of a scalar
+  * \sa class CwiseUnaryOp, Cwise::lgamma()
+  */
+template<typename Scalar> struct scalar_lgamma_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+#if defined(__CUDA_ARCH__)
+    return lgamma(a);
+#else
+    using numext::lgamma; return lgamma(a);
+#endif
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_lgamma_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasLGamma
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the Gauss error function of a scalar
+  * \sa class CwiseUnaryOp, Cwise::erf()
+  */
+template<typename Scalar> struct scalar_erf_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+#if defined(__CUDA_ARCH__)
+    return erf(a);
+#else
+    using numext::erf; return erf(a);
+#endif
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erf_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasErf
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the Complementary Error Function of a scalar
+  * \sa class CwiseUnaryOp, Cwise::erfc()
+  */
+template<typename Scalar> struct scalar_erfc_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+#if defined(__CUDA_ARCH__)
+    return erfc(a);
+#else
+    using numext::erfc; return erfc(a);
+#endif
+  }
+  typedef typename packet_traits<Scalar>::type Packet;
+  inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erfc_op<Scalar> >
+{
+  enum {
+    // Guesstimate
+    Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasErfc
+  };
+};
+
+
  /** \internal
   * \brief Template functor to compute the sigmoid of a scalar
   * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
diff --git a/third_party/eigen3/Eigen/src/Core/util/Constants.h b/third_party/eigen3/Eigen/src/Core/util/Constants.h
index be14df01688..75b91cdcebb 100644
--- a/third_party/eigen3/Eigen/src/Core/util/Constants.h
+++ b/third_party/eigen3/Eigen/src/Core/util/Constants.h
@@ -448,6 +448,22 @@ struct MatrixXpr {};
 /** The type used to identify an array expression */
 struct ArrayXpr {};
 
+namespace internal {
+
+/** \internal
+ * Constants for comparison functors
+ */
+enum ComparisonName {
+  cmp_EQ = 0,
+  cmp_LT = 1,
+  cmp_LE = 2,
+  cmp_UNORD = 3,
+  cmp_NEQ = 4,
+  cmp_GT = 5,
+  cmp_GE = 6
+};
+}  // end namespace internal
+
 } // end namespace Eigen
 
 #endif // EIGEN_CONSTANTS_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h b/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h
index be39d731ad3..f8cd6e47eed 100644
--- a/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/third_party/eigen3/Eigen/src/Core/util/ForwardDeclarations.h
@@ -296,6 +296,13 @@ template<typename MatrixType, unsigned int Mode> struct eigen2_part_return_type;
 }
 #endif
 
+// SpecialFunctions forward declarations
+namespace internal {
+template <typename Scalar> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __lgamma(Scalar x);
+template <typename Scalar> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erf(Scalar x);
+template <typename Scalar> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erfc(Scalar x);
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_FORWARDDECLARATIONS_H
diff --git a/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h b/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h
index 396e27b9000..461c52fba91 100644
--- a/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h
+++ b/third_party/eigen3/Eigen/src/Core/util/StaticAssert.h
@@ -90,7 +90,8 @@
         YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
         THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
         THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
-        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
+        THIS_TYPE_IS_NOT_SUPPORTED
       };
     };
 
diff --git a/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index ea6778c3f54..fbf0d2031b9 100644
--- a/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/third_party/eigen3/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -166,6 +166,45 @@ atan() const
   return derived();
 }
 
+/** \returns an expression of the coefficient-wise ln(|gamma(*this)|).
+  *
+  * Example: \include Cwise_lgamma.cpp
+  * Output: \verbinclude Cwise_lgamma.out
+  *
+  * \sa cos(), sin(), tan()
+  */
+inline const CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, Derived>
+lgamma() const
+{
+  return derived();
+}
+
+/** \returns an expression of the coefficient-wise Gauss error function of *this.
+  *
+  * Example: \include Cwise_erf.cpp
+  * Output: \verbinclude Cwise_erf.out
+  *
+  * \sa cos(), sin(), tan()
+  */
+inline const CwiseUnaryOp<internal::scalar_erf_op<Scalar>, Derived>
+erf() const
+{
+  return derived();
+}
+
+/** \returns an expression of the coefficient-wise Complementary error function of *this.
+  *
+  * Example: \include Cwise_erfc.cpp
+  * Output: \verbinclude Cwise_erfc.out
+  *
+  * \sa cos(), sin(), tan()
+  */
+inline const CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, Derived>
+erfc() const
+{
+  return derived();
+}
+
 /** \returns an expression of the coefficient-wise power of *this to the given exponent.
   *
   * Example: \include Cwise_pow.cpp
@@ -181,7 +220,6 @@ pow(const Scalar& exponent) const
           (derived(), internal::scalar_pow_op<Scalar>(exponent));
 }
 
-
 /** \returns an expression of the coefficient-wise inverse of *this.
   *
   * Example: \include Cwise_inverse.cpp
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 720c3b6a822..723f17c2640 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -122,6 +122,24 @@ class TensorBase<Derived, ReadOnlyAccessors>
       return unaryExpr(internal::scalar_tanh_op<Scalar>());
     }
 
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived>
+    lgamma() const {
+      return unaryExpr(internal::scalar_lgamma_op<Scalar>());
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
+    erf() const {
+      return unaryExpr(internal::scalar_erf_op<Scalar>());
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived>
+    erfc() const {
+      return unaryExpr(internal::scalar_erfc_op<Scalar>());
+    }
+
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived>
     sigmoid() const {
@@ -286,67 +304,67 @@ class TensorBase<Derived, ReadOnlyAccessors>
       return binaryExpr(other.derived(), internal::scalar_boolean_xor_op());
     }
 
-    // Comparisons and tests.
+   // Comparisons and tests.
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<std::less<Scalar>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
     operator<(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), std::less<Scalar>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LT>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<std::less_equal<Scalar>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
     operator<=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), std::less_equal<Scalar>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LE>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<std::greater<Scalar>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
     operator>(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), std::greater<Scalar>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GT>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<std::greater_equal<Scalar>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
     operator>=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), std::greater_equal<Scalar>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GE>());
     }
 
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
     operator==(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), std::equal_to<Scalar>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_EQ>());
     }
     template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
     operator!=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), std::not_equal_to<Scalar>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>());
     }
 
     // comparisons and tests for Scalars
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::less<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
     operator<(Scalar threshold) const {
       return operator<(constant(threshold));
     }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::less_equal<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
     operator<=(Scalar threshold) const {
       return operator<=(constant(threshold));
     }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::greater<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
     operator>(Scalar threshold) const {
       return operator>(constant(threshold));
     }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::greater_equal<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
     operator>=(Scalar threshold) const {
       return operator>=(constant(threshold));
     }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::equal_to<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
     operator==(Scalar threshold) const {
       return operator==(constant(threshold));
     }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
     operator!=(Scalar threshold) const {
       return operator!=(constant(threshold));
     }
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
index 48859fe5fa9..ac2b2633ff8 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
@@ -451,6 +451,8 @@ class StreamInterface {
 
 static cudaDeviceProp* m_deviceProperties;
 static bool m_devicePropInitialized = false;
+
+#ifndef __CUDA_ARCH__
 static tensorflow::mutex m_devicePropInitMutex(tensorflow::LINKER_INITIALIZED);
 
 static void initializeDeviceProp() {
@@ -469,6 +471,11 @@ static void initializeDeviceProp() {
     }
   }
 }
+#else
+static void initializeDeviceProp() {
+  assert(false && "This function should never be called from within a CUDA kernel");
+}
+#endif  // __CUDA_ARCH__
 
 static const cudaStream_t default_stream = cudaStreamDefault;