Fix compile bug in cuda_blas.cc in dbg mode.
This is reproducible with gcc 7.5 with the command: bazel build --per_file_copt=+tensorflow/stream_executor/cuda.cuda_blas.cc@-O0,-g -c opt --config=cuda //tensorflow/tools/pip_package:build_pip_package The error was: /usr/bin/ld: bazel-out/k8-opt/bin/tensorflow/stream_executor/cuda/libcublas_plugin.pic.lo(cuda_blas.pic.o): relocation R_X86_64_PC32 against undefined symbol `_ZN15stream_executor3gpu12_GLOBAL__N_120CUDABlasLtMatmulPlan14kMaxBatchCountE' can not be used when making a shared object; recompile with -fPIC @timshen91 helped me debug. We don't know why the error was occurring but this fixes it. PiperOrigin-RevId: 340989390 Change-Id: I860e1ac8ee8aecd1543c068b66f281d0e0c56acd
This commit is contained in:
parent
fd0fbc7ca7
commit
b54a2ddf68
@ -3338,7 +3338,9 @@ class CUDABlasLtMatmulPlan final : public blas::IBlasLtMatmulPlan {
|
||||
private:
|
||||
// In some cases cublasLt does not support large batch sizes, so we need to
|
||||
// split up such cases into multiple calls.
|
||||
static constexpr const int kMaxBatchCount = 65535;
|
||||
// TODO(reedwm): Making this static or constexpr causes a link error with gcc
|
||||
// in debug mode for unknown reasons. Investigate why.
|
||||
const int kMaxBatchCount = 65535;
|
||||
blas::BlasLtMatmulPlanParams params_;
|
||||
blas::DataType scale_type_;
|
||||
UniqueOpDesc op_desc_;
|
||||
|
Loading…
Reference in New Issue
Block a user