Update Eigen to c906e1142b6b.
PiperOrigin-RevId: 273609544
This commit is contained in:
parent
8f90210c1e
commit
18af93acca
@ -800,6 +800,7 @@ cc_library(
|
||||
}),
|
||||
deps = [
|
||||
"//third_party/eigen3",
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
] + select({
|
||||
"//tensorflow:android": [],
|
||||
"//tensorflow:arm": [],
|
||||
@ -814,7 +815,10 @@ cc_library(
|
||||
name = "eigen_contraction_kernel_no_mkl",
|
||||
srcs = ["eigen_contraction_kernel.cc"],
|
||||
hdrs = ["eigen_contraction_kernel.h"],
|
||||
deps = ["//third_party/eigen3"],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:dynamic_annotations",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
|
@ -43,6 +43,8 @@ limitations under the License.
|
||||
#include "mkldnn.h"
|
||||
#endif
|
||||
|
||||
#include "tensorflow/core/platform/dynamic_annotations.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
@ -139,7 +141,7 @@ struct mkldnn_gemm_kernel</*Scalar*/ float, IndexType, OutputMapper,
|
||||
void operator()(const OutputMapper& output, const float* blockA,
|
||||
const float* blockB, const IndexType rows,
|
||||
const IndexType depth, const IndexType cols, float alpha,
|
||||
int ldA = kComputeStrideFromBlockDimensions,
|
||||
float beta, int ldA = kComputeStrideFromBlockDimensions,
|
||||
int ldB = kComputeStrideFromBlockDimensions,
|
||||
char transposeA = 'N', char transposeB = 'N') {
|
||||
static const int max_index = (std::numeric_limits<int>::max)();
|
||||
@ -157,13 +159,13 @@ struct mkldnn_gemm_kernel</*Scalar*/ float, IndexType, OutputMapper,
|
||||
ldB = ldB == kComputeStrideFromBlockDimensions ? k : ldB;
|
||||
const int ldC = static_cast<int>(output.stride());
|
||||
|
||||
const float beta = 1.0;
|
||||
|
||||
mkldnn_status_t st = mkldnn_sgemm(&transposeA, &transposeB, &m, &n, &k,
|
||||
&alpha, blockA, &ldA, blockB, &ldB, &beta,
|
||||
const_cast<float*>(output.data()), &ldC);
|
||||
eigen_assert(st == 0);
|
||||
|
||||
TF_ANNOTATE_MEMORY_IS_INITIALIZED(output.data(), sizeof(float) * m * n);
|
||||
|
||||
// eigen_assert is a no-op in optimized mode so we add these to avoid
|
||||
// compiler's unused-variable errors.
|
||||
EIGEN_UNUSED_VARIABLE(max_index);
|
||||
@ -187,7 +189,7 @@ struct mkldnn_gemm_s8u8s32_kernel {
|
||||
void operator()(const OutputMapper& output, const LhsScalar* blockA,
|
||||
const RhsScalar* blockB, const IndexType rows,
|
||||
const IndexType depth, const IndexType cols, float alpha,
|
||||
int ldA = kComputeStrideFromBlockDimensions,
|
||||
float beta, int ldA = kComputeStrideFromBlockDimensions,
|
||||
int ldB = kComputeStrideFromBlockDimensions,
|
||||
char transposeA = 'N', char transposeB = 'N') {
|
||||
static const int max_index = (std::numeric_limits<int>::max)();
|
||||
@ -205,8 +207,6 @@ struct mkldnn_gemm_s8u8s32_kernel {
|
||||
ldB = ldB == kComputeStrideFromBlockDimensions ? k : ldB;
|
||||
const int ldC = static_cast<int>(output.stride());
|
||||
|
||||
const float beta = 1.0;
|
||||
|
||||
// Currently we support only symmetric quantization with zero point at 0.
|
||||
const int8_t ao = 0;
|
||||
const int8_t bo = 0;
|
||||
@ -229,6 +229,8 @@ struct mkldnn_gemm_s8u8s32_kernel {
|
||||
C, &ldC, &co);
|
||||
eigen_assert(st == 0);
|
||||
|
||||
TF_ANNOTATE_MEMORY_IS_INITIALIZED(C, sizeof(int32_t) * m * n);
|
||||
|
||||
// eigen_assert is a no-op in optimized mode so we add these to avoid
|
||||
// compiler's unused-variable errors.
|
||||
EIGEN_UNUSED_VARIABLE(max_index);
|
||||
@ -514,6 +516,8 @@ struct GemmKernelProvider<Eigen::QInt32, Eigen::QInt8, Eigen::QUInt8,
|
||||
nm0(bm > 0 ? divup(m, bm) : 0), \
|
||||
nn0(bn > 0 ? divup(n, bn) : 0) {} \
|
||||
\
|
||||
enum { HasBeta = true }; \
|
||||
\
|
||||
using ResScalar = RES_SCALAR; \
|
||||
using LhsScalar = LHS_SCALAR; \
|
||||
using RhsScalar = RHS_SCALAR; \
|
||||
@ -639,35 +643,50 @@ struct GemmKernelProvider<Eigen::QInt32, Eigen::QInt8, Eigen::QUInt8,
|
||||
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void invoke( \
|
||||
const OutputMapper& output_mapper, const LhsBlock& lhsBlock, \
|
||||
const RhsBlock& rhsBlock, const StorageIndex rows, \
|
||||
const StorageIndex depth, const StorageIndex cols, \
|
||||
const float alpha) { \
|
||||
const StorageIndex depth, const StorageIndex cols, const float alpha, \
|
||||
const float beta) { \
|
||||
if (UseCustomContractionKernels()) { \
|
||||
if ((DirectLhsAccess::value && lhsBlock.is_direct_access) && \
|
||||
(DirectRhsAccess::value && rhsBlock.is_direct_access)) { \
|
||||
GemmKernel()(output_mapper, lhsBlock.raw_data, rhsBlock.raw_data, \
|
||||
rows, depth, cols, alpha, /*ldA=*/lhsBlock.stride, \
|
||||
/*ldB=*/rhsBlock.stride, \
|
||||
rows, depth, cols, alpha, beta, \
|
||||
/*ldA=*/lhsBlock.stride, /*ldB=*/rhsBlock.stride, \
|
||||
/*transposeA=*/lhsBlock.transpose, \
|
||||
/*transposeB=*/rhsBlock.transpose); \
|
||||
\
|
||||
} else if (DirectLhsAccess::value && lhsBlock.is_direct_access) { \
|
||||
GemmKernel()(output_mapper, lhsBlock.raw_data, rhsBlock.packed_data, \
|
||||
rows, depth, cols, alpha, /*ldA=*/lhsBlock.stride, \
|
||||
rows, depth, cols, alpha, beta, \
|
||||
/*ldA=*/lhsBlock.stride, \
|
||||
/*ldB=*/GemmKernel::kComputeStrideFromBlockDimensions, \
|
||||
/*transposeA=*/lhsBlock.transpose, /*transposeB=*/'N'); \
|
||||
\
|
||||
} else if (DirectRhsAccess::value && rhsBlock.is_direct_access) { \
|
||||
GemmKernel()(output_mapper, lhsBlock.packed_data, rhsBlock.raw_data, \
|
||||
rows, depth, cols, alpha, \
|
||||
rows, depth, cols, alpha, beta, \
|
||||
/*ldA=*/GemmKernel::kComputeStrideFromBlockDimensions, \
|
||||
/*ldB=*/rhsBlock.stride, /*transposeA=*/'N', \
|
||||
/*transposeB=*/rhsBlock.transpose); \
|
||||
\
|
||||
} else { \
|
||||
GemmKernel()(output_mapper, lhsBlock.packed_data, \
|
||||
rhsBlock.packed_data, rows, depth, cols, alpha); \
|
||||
rhsBlock.packed_data, rows, depth, cols, alpha, beta); \
|
||||
} \
|
||||
} else { \
|
||||
/* Gebp kernel does not support beta, so we have to clear memory in */ \
|
||||
/* the output mapper manually. */ \
|
||||
/* WARNING(ezhulenev): This is optimized into a memset in a loop, */ \
|
||||
/* could be much slower for small matrices. Currently this code */ \
|
||||
/* path used only for testing, and perormance does not matter. */ \
|
||||
if (beta == 0.0) { \
|
||||
for (StorageIndex col = 0; col < cols; ++col) { \
|
||||
ResScalar* output_base = &output_mapper(0, col); \
|
||||
typedef Array<ResScalar, Dynamic, 1> OutputRow; \
|
||||
typedef Map<OutputRow, 0, InnerStride<1>> OutputRowMap; \
|
||||
OutputRowMap(output_base, rows).setZero(); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
GebpKernel()( \
|
||||
output_mapper, lhsBlock.packed_data, rhsBlock.packed_data, rows, \
|
||||
depth, cols, alpha, \
|
||||
@ -713,6 +732,8 @@ struct GemmKernelProvider<Eigen::QInt32, Eigen::QInt8, Eigen::QUInt8,
|
||||
nm0(bm > 0 ? divup(m, bm) : 0), \
|
||||
nn0(bn > 0 ? divup(n, bn) : 0) {} \
|
||||
\
|
||||
enum { HasBeta = true }; \
|
||||
\
|
||||
using ResScalar = RES_SCALAR; \
|
||||
using LhsScalar = LHS_SCALAR; \
|
||||
using RhsScalar = RHS_SCALAR; \
|
||||
@ -813,32 +834,32 @@ struct GemmKernelProvider<Eigen::QInt32, Eigen::QInt8, Eigen::QUInt8,
|
||||
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void invoke( \
|
||||
const OutputMapper& output_mapper, const LhsBlock& lhsBlock, \
|
||||
const RhsBlock& rhsBlock, const StorageIndex rows, \
|
||||
const StorageIndex depth, const StorageIndex cols, \
|
||||
const float alpha) { \
|
||||
const StorageIndex depth, const StorageIndex cols, const float alpha, \
|
||||
const float beta) { \
|
||||
if ((DirectLhsAccess::value && lhsBlock.is_direct_access) && \
|
||||
(DirectRhsAccess::value && rhsBlock.is_direct_access)) { \
|
||||
GemmKernel()(output_mapper, lhsBlock.raw_data, rhsBlock.raw_data, \
|
||||
rows, depth, cols, alpha, /*ldA=*/lhsBlock.stride, \
|
||||
rows, depth, cols, alpha, beta, /*ldA=*/lhsBlock.stride, \
|
||||
/*ldB=*/rhsBlock.stride, \
|
||||
/*transposeA=*/lhsBlock.transpose, \
|
||||
/*transposeB=*/rhsBlock.transpose); \
|
||||
\
|
||||
} else if (DirectLhsAccess::value && lhsBlock.is_direct_access) { \
|
||||
GemmKernel()(output_mapper, lhsBlock.raw_data, rhsBlock.packed_data, \
|
||||
rows, depth, cols, alpha, /*ldA=*/lhsBlock.stride, \
|
||||
rows, depth, cols, alpha, beta, /*ldA=*/lhsBlock.stride, \
|
||||
/*ldB=*/GemmKernel::kComputeStrideFromBlockDimensions, \
|
||||
/*transposeA=*/lhsBlock.transpose, /*transposeB=*/'N'); \
|
||||
\
|
||||
} else if (DirectRhsAccess::value && rhsBlock.is_direct_access) { \
|
||||
GemmKernel()(output_mapper, lhsBlock.packed_data, rhsBlock.raw_data, \
|
||||
rows, depth, cols, alpha, \
|
||||
rows, depth, cols, alpha, beta, \
|
||||
/*ldA=*/GemmKernel::kComputeStrideFromBlockDimensions, \
|
||||
/*ldB=*/rhsBlock.stride, /*transposeA=*/'N', \
|
||||
/*transposeB=*/rhsBlock.transpose); \
|
||||
\
|
||||
} else { \
|
||||
GemmKernel()(output_mapper, lhsBlock.packed_data, \
|
||||
rhsBlock.packed_data, rows, depth, cols, alpha); \
|
||||
rhsBlock.packed_data, rows, depth, cols, alpha, beta); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
@ -116,11 +116,12 @@ TEST(EigenMkldnnTest, MkldnnGemm) {
|
||||
mkldnn_gemm_kernel<Scalar, Index, OutputMapper, ColMajor>;
|
||||
|
||||
Tensor2d mkldnn_result(m, n);
|
||||
mkldnn_result.setZero();
|
||||
mkldnn_result.setRandom();
|
||||
OutputMapper output_mapper(mkldnn_result.data(), m);
|
||||
|
||||
MkldnnGemmKernel gemm_kernel;
|
||||
gemm_kernel(output_mapper, lhs.data(), rhs.data(), m, k, n, /*alpha=*/1.0);
|
||||
gemm_kernel(output_mapper, lhs.data(), rhs.data(), m, k, n, /*alpha=*/1.0,
|
||||
/*beta=*/0.0);
|
||||
|
||||
// Compute matmul with Eigen::Matrix.
|
||||
using Matrix = Eigen::Matrix<Scalar, Dynamic, Dynamic, ColMajor>;
|
||||
|
@ -172,11 +172,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
|
||||
name = "eigen_archive",
|
||||
build_file = clean_dep("//third_party:eigen.BUILD"),
|
||||
patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
|
||||
sha256 = "a126a1af9ec3b3f646c4896bf69a4bb71e9ebfb30c50c3182f01270a704a4093",
|
||||
strip_prefix = "eigen-eigen-89abeb806e2e",
|
||||
sha256 = "8091c578608ea2646e45926f863c9395d3f2873ea771fcd10eda851c91dbd447",
|
||||
strip_prefix = "eigen-eigen-c906e1142b6b",
|
||||
urls = [
|
||||
"https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/89abeb806e2e.tar.gz",
|
||||
"https://bitbucket.org/eigen/eigen/get/89abeb806e2e.tar.gz",
|
||||
"https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/c906e1142b6b.tar.gz",
|
||||
"https://bitbucket.org/eigen/eigen/get/c906e1142b6b.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
|
12
third_party/eigen3/gpu_packet_math.patch
vendored
12
third_party/eigen3/gpu_packet_math.patch
vendored
@ -22,14 +22,4 @@
|
||||
return res;
|
||||
}
|
||||
};
|
||||
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
|
||||
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
|
||||
@@ -590,7 +590,7 @@
|
||||
|
||||
x = psqrt(pmul(neg_two, plog(b)));
|
||||
x0 = psub(x, pdiv(plog(x), x));
|
||||
- z = one / x;
|
||||
+ z = pdiv(one, x);
|
||||
x1 = pmul(
|
||||
z, pselect(
|
||||
pcmp_lt(x, eight),
|
||||
|
Loading…
Reference in New Issue
Block a user