Revert PR #43178: Make fast builds work with MSVC

Fixes #43767 PiperOrigin-RevId: 335442309 Change-Id: I4d6d6ca4925886ee783ed6aa2e9db8610eeea5fb
2020-10-05 10:12:25 -07:00 · 2020-10-05 10:12:25 -07:00 · 651cb1d8e6
commit 651cb1d8e6
parent ab1478d380
8 changed files with 3 additions and 195 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -323,6 +323,8 @@ build:windows --copt=/experimental:preprocessor
 build:windows --host_copt=/experimental:preprocessor

 # Misc build options we need for windows.
+build:windows --linkopt=/DEBUG
+build:windows --host_linkopt=/DEBUG
 build:windows --linkopt=/OPT:REF
 build:windows --host_linkopt=/OPT:REF
 build:windows --linkopt=/OPT:ICF
--- a/tensorflow/core/kernels/concat_lib_gpu.cc
+++ b/tensorflow/core/kernels/concat_lib_gpu.cc
@ -104,37 +104,6 @@ TF_CALL_GPU_ALL_TYPES(REGISTER);

 #undef REGISTER

-#if defined(_MSC_VER)
-// Required by MSVC non-release build to ensure the compiler sees all the
-// template expansions that are needed.
-#define FORCE_CONCAT(TYPE)                                             \
-  template <>                                                          \
-  void ConcatGPU<TYPE>(                                                \
-      OpKernelContext * c,                                             \
-      const std::vector<                                               \
-          std::unique_ptr<typename TTypes<TYPE, 2>::ConstMatrix>>&     \
-          inputs_flat,                                                 \
-      Tensor* output, typename TTypes<TYPE, 2>::Tensor* output_flat) { \
-    LOG(FATAL) << "Should not be called";                              \
-  }
-
-FORCE_CONCAT(tensorflow::Variant)
-FORCE_CONCAT(tensorflow::ResourceHandle)
-FORCE_CONCAT(unsigned short)
-FORCE_CONCAT(signed char)
-FORCE_CONCAT(tensorflow::tstring)
-FORCE_CONCAT(Eigen::QUInt8)
-FORCE_CONCAT(Eigen::QInt8)
-FORCE_CONCAT(Eigen::QUInt16)
-FORCE_CONCAT(Eigen::QInt16)
-FORCE_CONCAT(Eigen::QInt32)
-FORCE_CONCAT(unsigned int)
-FORCE_CONCAT(unsigned __int64)
-
-#undef FORCE_CONCAT
-
-#endif
-
 }  // namespace tensorflow

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
@ -21,13 +21,6 @@ namespace tensorflow {
 namespace functor {
 DEFINE_UNARY1(conj, complex64);
 DEFINE_UNARY1(conj, complex128);
-
-#if defined(_MSC_VER)
-// Non-release build with MSVC needs these symbols.
-DEFINE_UNARY1(conj, float);
-DEFINE_UNARY1(conj, double);
-#endif
-
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc
@ -71,27 +71,6 @@ TF_CALL_int8(DEFINE_GPU_KERNELS);
 TF_CALL_uint32(DEFINE_GPU_KERNELS);
 #undef DEFINE_GPU_KERNELS

-#if defined(_MSC_VER)
-
-template <>
-struct functor::DenseUpdate<GPUDevice, tensorflow::Variant, ASSIGN> {
-  void operator()(const GPUDevice& d,
-                  typename TTypes<tensorflow::Variant>::Flat params,
-                  typename TTypes<tensorflow::Variant>::ConstFlat update) {
-    LOG(FATAL) << "Not handling type tensorflow::Variant";
-  }
-};
-
-// The function is required to force above template specialization. Without it
-// msvc compiler doesn't include the functor in the object file
-void _force_instantiation(
-    const GPUDevice& d, typename TTypes<tensorflow::Variant>::Flat params,
-    typename TTypes<tensorflow::Variant>::ConstFlat update) {
-  functor::DenseUpdate<GPUDevice, tensorflow::Variant, ASSIGN> x;
-  x(d, params, update);
-}
-#endif  // _MSC_VER
-
 }  // end namespace tensorflow

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc
@ -22,10 +22,6 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"

-#if defined(_MSC_VER)
-#include "tensorflow/core/framework/register_types.h"
-#endif
-
 namespace tensorflow {
 namespace {

@ -255,62 +251,6 @@ template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::half,
 // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
 template struct functor::DepthToSpaceOpFunctor<GPUDevice, int32, FORMAT_NCHW>;

-#if defined(_MSC_VER)
-#define FORCE_DEPTH(TYPE, NAME, NUM, DEVICE)                                   \
-  template <>                                                                  \
-  struct functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> {                   \
-    void operator()(const DEVICE& d,                                           \
-                    typename TTypes<TYPE, 4>::ConstTensor input,               \
-                    int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
-      LOG(FATAL) << "Should not be called.";                                   \
-    }                                                                          \
-    void operator()(const DEVICE& d,                                           \
-                    typename TTypes<TYPE, 5>::ConstTensor input,               \
-                    int block_size, typename TTypes<TYPE, 5>::Tensor output) { \
-      LOG(FATAL) << "Should not be called.";                                   \
-    }                                                                          \
-  };                                                                           \
-  void _force_DepthToSpaceOpFunctor##NAME(                                     \
-      const DEVICE& d, typename TTypes<TYPE, 4>::ConstTensor input,            \
-      int block_size, typename TTypes<TYPE, 4>::Tensor output) {               \
-    functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> op;                      \
-    op(d, input, block_size, output);                                          \
-  }                                                                            \
-  void _force_DepthToSpaceOpFunctor##NAME##_2(                                 \
-      const DEVICE& d, typename TTypes<TYPE, 5>::ConstTensor input,            \
-      int block_size, typename TTypes<TYPE, 5>::Tensor output) {               \
-    functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> op;                      \
-    op(d, input, block_size, output);                                          \
-  }
-
-FORCE_DEPTH(__int64, int64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(unsigned __int64, uint64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(unsigned int, uint, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(int, int, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(unsigned short, ushort, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(short, short, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(unsigned char, uchar, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(signed char, char, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(bfloat16, bfloat16, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(double, double, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(complex64, complex64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(complex128, complex128, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(bool, bool, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(tensorflow::tstring, tstring, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(tensorflow::ResourceHandle, ResourceHandle, FORMAT_NCHW,
-            Eigen::ThreadPoolDevice)
-FORCE_DEPTH(tensorflow::Variant, variant, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(Eigen::QInt8, qint8, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(Eigen::QInt8, qint8_2, FORMAT_NHWC, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(Eigen::half, half, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(float, float, FORMAT_NCHW, Eigen::ThreadPoolDevice)
-FORCE_DEPTH(Eigen::QInt8, qint8, FORMAT_NCHW, GPUDevice)
-FORCE_DEPTH(Eigen::QInt8, qint8_2, FORMAT_NHWC, GPUDevice)
-
-#undef FORCE_DEPTH
-
-#endif
-
 }  // end namespace tensorflow

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc
@ -530,11 +530,6 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC);

 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_GRAD_GPU_SPEC);

-#if defined(_MSC_VER)
-// Required for MSVC debug build
-TF_CALL_half(DEFINE_GRAD_GPU_SPEC)
-#endif
-
 #undef DEFINE_GPU_SPEC
 #undef DEFINE_GRAD_GPU_SPEC

--- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc
@ -22,10 +22,6 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"

-#if defined(_MSC_VER)
-#include "tensorflow/core/framework/register_types.h"
-#endif
-
 namespace tensorflow {

 typedef Eigen::GpuDevice GPUDevice;
@ -256,70 +252,6 @@ template struct functor::SpaceToDepthOpFunctor<GPUDevice, uint8, FORMAT_NHWC>;
 // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
 template struct functor::SpaceToDepthOpFunctor<GPUDevice, int32, FORMAT_NCHW>;

-#if defined(_MSC_VER)
-#define FORCE_DEPTH(TYPE, NAME, NUM, DEVICE)                                   \
-  template <>                                                                  \
-  struct functor::SpaceToDepthOpFunctor<DEVICE, TYPE, NUM> {                   \
-    void operator()(const DEVICE& d,                                           \
-                    typename TTypes<TYPE, 4>::ConstTensor input,               \
-                    int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
-      LOG(FATAL) << "Should not be called.";                                   \
-    }                                                                          \
-  };                                                                           \
-  void _force_SpaceToDepthOpFunctor##NAME(                                     \
-      const DEVICE& d, typename TTypes<TYPE, 4>::ConstTensor input,            \
-      int block_size, typename TTypes<TYPE, 4>::Tensor output) {               \
-    functor::SpaceToDepthOpFunctor<DEVICE, TYPE, NUM> op;                      \
-    op(d, input, block_size, output);                                          \
-  }
-
-#define FORCE_DEPTH2(TYPE, NAME, DEVICE)       \
-  FORCE_DEPTH(TYPE, NAME, FORMAT_NCHW, DEVICE) \
-  FORCE_DEPTH(TYPE, NAME##_2, FORMAT_NHWC, DEVICE)
-
-FORCE_DEPTH2(__int64, int64, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(unsigned __int64, uint64, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(unsigned int, uint, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(unsigned short, ushort, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(short, short, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(signed char, char, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(unsigned char, char, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(bfloat16, bfloat16, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(double, double, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(complex64, complex64, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(complex128, complex128, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(bool, bool, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(tensorflow::tstring, tstring, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(tensorflow::ResourceHandle, ResourceHandle,
-             Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(tensorflow::Variant, variant, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(Eigen::QInt8, qint8, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(Eigen::half, half, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(float, float, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(int, int, Eigen::ThreadPoolDevice)
-FORCE_DEPTH2(Eigen::QInt8, qint8gpu, GPUDevice)
-
-// Special case for int, FORMAT_NHWC
-template <>
-struct functor::SpaceToDepthOpFunctor<GPUDevice, int, FORMAT_NHWC> {
-  void operator()(const GPUDevice& d,
-                  typename TTypes<int, 4>::ConstTensor input, int block_size,
-                  typename TTypes<int, 4>::Tensor output) {
-    LOG(FATAL) << "Should not be called.";
-  }
-};
-void _force_SpaceToDepthOpFunctor_int(
-    const GPUDevice& d, typename TTypes<int, 4>::ConstTensor input,
-    int block_size, typename TTypes<int, 4>::Tensor output) {
-  functor::SpaceToDepthOpFunctor<GPUDevice, int, FORMAT_NHWC> op;
-  op(d, input, block_size, output);
-}
-
-#undef FORCE_DEPTH
-#undef FORCE_DEPTH2
-
-#endif
-
 }  // end namespace tensorflow

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
+++ b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
@ -117,9 +117,7 @@ def InvokeNvcc(argv, log=False):
  nvcc_compiler_options, argv = GetNvccOptions(argv)

  opt_option, argv = GetOptionValue(argv, '/O')
-  # Originally '-g' was provided as an initial switch. Howerver nvcc expands it for MSVC
-  # to /Zi which generates vcXXX.pdb file not known to bazel.
-  opt = []
+  opt = ['-g']
  if (len(opt_option) > 0 and opt_option[0] != 'd'):
    opt = ['-O2']