STT-tensorflow/third_party/cub.pr170.patch

49 lines
1.4 KiB
Diff

From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Mon, 23 Sep 2019 11:18:56 -0700
Subject: [PATCH] Fix CUDA version detection in CUB
This fixes the problem with CUB using deprecated shfl/vote instructions when CUB
is compiled with clang (e.g. some TensorFlow builds).
---
cub/util_arch.cuh | 3 ++-
cub/util_type.cuh | 4 ++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh
index 87c5ea2fb..9ad9d1cbb 100644
--- a/cub/util_arch.cuh
+++ b/cub/util_arch.cuh
@@ -44,7 +44,8 @@ namespace cub {
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
-#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
+#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \
+ (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
#define CUB_USE_COOPERATIVE_GROUPS
#endif
diff --git a/cub/util_type.cuh b/cub/util_type.cuh
index 0ba41e1ed..b2433d735 100644
--- a/cub/util_type.cuh
+++ b/cub/util_type.cuh
@@ -37,7 +37,7 @@
#include <limits>
#include <cfloat>
-#if (__CUDACC_VER_MAJOR__ >= 9)
+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
#include <cuda_fp16.h>
#endif
@@ -1063,7 +1063,7 @@ struct FpLimits<double>
};
-#if (__CUDACC_VER_MAJOR__ >= 9)
+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
template <>
struct FpLimits<__half>
{