fix for issue #5169 (#5207)

in nvcc -gencode we need to specify both sm_ and compute_, else we can not fallback to a lower compute capability.
2016-10-27 16:00:00 -07:00 · 2016-10-27 16:00:00 -07:00 · dde4e3caff
commit dde4e3caff
parent 32d1dcc10e
1 changed files with 2 additions and 1 deletions
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@ -139,7 +139,8 @@ if (tensorflow_ENABLE_GPU)

    # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
    # CUDA_NVCC_FLAGS and cuda_config.h below
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=sm_35;-gencode arch=compute_52,code=sm_52;--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
    set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
    include_directories(${CUDA_INCLUDE})
    add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,5.2)