fix for issue ()

in nvcc -gencode we need to specify both sm_ and compute_, else we can not
fallback to a lower compute capability.
This commit is contained in:
guschmue 2016-10-27 16:00:00 -07:00 committed by Derek Murray
parent 32d1dcc10e
commit dde4e3caff

View File

@ -139,7 +139,8 @@ if (tensorflow_ENABLE_GPU)
# by default we assume compute cabability 3.5 and 5.2. If you change this change it in
# CUDA_NVCC_FLAGS and cuda_config.h below
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=sm_35;-gencode arch=compute_52,code=sm_52;--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
include_directories(${CUDA_INCLUDE})
add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,5.2)