Revert PR #43178: Make fast builds work with MSVC

Fixes #43767

PiperOrigin-RevId: 335442309
Change-Id: I4d6d6ca4925886ee783ed6aa2e9db8610eeea5fb
This commit is contained in:
Mihai Maruseac 2020-10-05 10:12:25 -07:00 committed by TensorFlower Gardener
parent ab1478d380
commit 651cb1d8e6
8 changed files with 3 additions and 195 deletions

View File

@ -323,6 +323,8 @@ build:windows --copt=/experimental:preprocessor
build:windows --host_copt=/experimental:preprocessor
# Misc build options we need for windows.
build:windows --linkopt=/DEBUG
build:windows --host_linkopt=/DEBUG
build:windows --linkopt=/OPT:REF
build:windows --host_linkopt=/OPT:REF
build:windows --linkopt=/OPT:ICF

View File

@ -104,37 +104,6 @@ TF_CALL_GPU_ALL_TYPES(REGISTER);
#undef REGISTER
#if defined(_MSC_VER)
// Required by MSVC non-release build to ensure the compiler sees all the
// template expansions that are needed.
#define FORCE_CONCAT(TYPE) \
template <> \
void ConcatGPU<TYPE>( \
OpKernelContext * c, \
const std::vector< \
std::unique_ptr<typename TTypes<TYPE, 2>::ConstMatrix>>& \
inputs_flat, \
Tensor* output, typename TTypes<TYPE, 2>::Tensor* output_flat) { \
LOG(FATAL) << "Should not be called"; \
}
FORCE_CONCAT(tensorflow::Variant)
FORCE_CONCAT(tensorflow::ResourceHandle)
FORCE_CONCAT(unsigned short)
FORCE_CONCAT(signed char)
FORCE_CONCAT(tensorflow::tstring)
FORCE_CONCAT(Eigen::QUInt8)
FORCE_CONCAT(Eigen::QInt8)
FORCE_CONCAT(Eigen::QUInt16)
FORCE_CONCAT(Eigen::QInt16)
FORCE_CONCAT(Eigen::QInt32)
FORCE_CONCAT(unsigned int)
FORCE_CONCAT(unsigned __int64)
#undef FORCE_CONCAT
#endif
} // namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -21,13 +21,6 @@ namespace tensorflow {
namespace functor {
DEFINE_UNARY1(conj, complex64);
DEFINE_UNARY1(conj, complex128);
#if defined(_MSC_VER)
// Non-release build with MSVC needs these symbols.
DEFINE_UNARY1(conj, float);
DEFINE_UNARY1(conj, double);
#endif
} // namespace functor
} // namespace tensorflow

View File

@ -71,27 +71,6 @@ TF_CALL_int8(DEFINE_GPU_KERNELS);
TF_CALL_uint32(DEFINE_GPU_KERNELS);
#undef DEFINE_GPU_KERNELS
#if defined(_MSC_VER)
template <>
struct functor::DenseUpdate<GPUDevice, tensorflow::Variant, ASSIGN> {
void operator()(const GPUDevice& d,
typename TTypes<tensorflow::Variant>::Flat params,
typename TTypes<tensorflow::Variant>::ConstFlat update) {
LOG(FATAL) << "Not handling type tensorflow::Variant";
}
};
// The function is required to force above template specialization. Without it
// msvc compiler doesn't include the functor in the object file
void _force_instantiation(
const GPUDevice& d, typename TTypes<tensorflow::Variant>::Flat params,
typename TTypes<tensorflow::Variant>::ConstFlat update) {
functor::DenseUpdate<GPUDevice, tensorflow::Variant, ASSIGN> x;
x(d, params, update);
}
#endif // _MSC_VER
} // end namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -22,10 +22,6 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/gpu_kernel_helper.h"
#if defined(_MSC_VER)
#include "tensorflow/core/framework/register_types.h"
#endif
namespace tensorflow {
namespace {
@ -255,62 +251,6 @@ template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::half,
// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
template struct functor::DepthToSpaceOpFunctor<GPUDevice, int32, FORMAT_NCHW>;
#if defined(_MSC_VER)
#define FORCE_DEPTH(TYPE, NAME, NUM, DEVICE) \
template <> \
struct functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> { \
void operator()(const DEVICE& d, \
typename TTypes<TYPE, 4>::ConstTensor input, \
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
LOG(FATAL) << "Should not be called."; \
} \
void operator()(const DEVICE& d, \
typename TTypes<TYPE, 5>::ConstTensor input, \
int block_size, typename TTypes<TYPE, 5>::Tensor output) { \
LOG(FATAL) << "Should not be called."; \
} \
}; \
void _force_DepthToSpaceOpFunctor##NAME( \
const DEVICE& d, typename TTypes<TYPE, 4>::ConstTensor input, \
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> op; \
op(d, input, block_size, output); \
} \
void _force_DepthToSpaceOpFunctor##NAME##_2( \
const DEVICE& d, typename TTypes<TYPE, 5>::ConstTensor input, \
int block_size, typename TTypes<TYPE, 5>::Tensor output) { \
functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> op; \
op(d, input, block_size, output); \
}
FORCE_DEPTH(__int64, int64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(unsigned __int64, uint64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(unsigned int, uint, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(int, int, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(unsigned short, ushort, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(short, short, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(unsigned char, uchar, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(signed char, char, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(bfloat16, bfloat16, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(double, double, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(complex64, complex64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(complex128, complex128, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(bool, bool, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(tensorflow::tstring, tstring, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(tensorflow::ResourceHandle, ResourceHandle, FORMAT_NCHW,
Eigen::ThreadPoolDevice)
FORCE_DEPTH(tensorflow::Variant, variant, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(Eigen::QInt8, qint8, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(Eigen::QInt8, qint8_2, FORMAT_NHWC, Eigen::ThreadPoolDevice)
FORCE_DEPTH(Eigen::half, half, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(float, float, FORMAT_NCHW, Eigen::ThreadPoolDevice)
FORCE_DEPTH(Eigen::QInt8, qint8, FORMAT_NCHW, GPUDevice)
FORCE_DEPTH(Eigen::QInt8, qint8_2, FORMAT_NHWC, GPUDevice)
#undef FORCE_DEPTH
#endif
} // end namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -530,11 +530,6 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC);
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_GRAD_GPU_SPEC);
#if defined(_MSC_VER)
// Required for MSVC debug build
TF_CALL_half(DEFINE_GRAD_GPU_SPEC)
#endif
#undef DEFINE_GPU_SPEC
#undef DEFINE_GRAD_GPU_SPEC

View File

@ -22,10 +22,6 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/gpu_kernel_helper.h"
#if defined(_MSC_VER)
#include "tensorflow/core/framework/register_types.h"
#endif
namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice;
@ -256,70 +252,6 @@ template struct functor::SpaceToDepthOpFunctor<GPUDevice, uint8, FORMAT_NHWC>;
// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
template struct functor::SpaceToDepthOpFunctor<GPUDevice, int32, FORMAT_NCHW>;
#if defined(_MSC_VER)
#define FORCE_DEPTH(TYPE, NAME, NUM, DEVICE) \
template <> \
struct functor::SpaceToDepthOpFunctor<DEVICE, TYPE, NUM> { \
void operator()(const DEVICE& d, \
typename TTypes<TYPE, 4>::ConstTensor input, \
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
LOG(FATAL) << "Should not be called."; \
} \
}; \
void _force_SpaceToDepthOpFunctor##NAME( \
const DEVICE& d, typename TTypes<TYPE, 4>::ConstTensor input, \
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
functor::SpaceToDepthOpFunctor<DEVICE, TYPE, NUM> op; \
op(d, input, block_size, output); \
}
#define FORCE_DEPTH2(TYPE, NAME, DEVICE) \
FORCE_DEPTH(TYPE, NAME, FORMAT_NCHW, DEVICE) \
FORCE_DEPTH(TYPE, NAME##_2, FORMAT_NHWC, DEVICE)
FORCE_DEPTH2(__int64, int64, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(unsigned __int64, uint64, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(unsigned int, uint, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(unsigned short, ushort, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(short, short, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(signed char, char, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(unsigned char, char, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(bfloat16, bfloat16, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(double, double, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(complex64, complex64, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(complex128, complex128, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(bool, bool, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(tensorflow::tstring, tstring, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(tensorflow::ResourceHandle, ResourceHandle,
Eigen::ThreadPoolDevice)
FORCE_DEPTH2(tensorflow::Variant, variant, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(Eigen::QInt8, qint8, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(Eigen::half, half, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(float, float, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(int, int, Eigen::ThreadPoolDevice)
FORCE_DEPTH2(Eigen::QInt8, qint8gpu, GPUDevice)
// Special case for int, FORMAT_NHWC
template <>
struct functor::SpaceToDepthOpFunctor<GPUDevice, int, FORMAT_NHWC> {
void operator()(const GPUDevice& d,
typename TTypes<int, 4>::ConstTensor input, int block_size,
typename TTypes<int, 4>::Tensor output) {
LOG(FATAL) << "Should not be called.";
}
};
void _force_SpaceToDepthOpFunctor_int(
const GPUDevice& d, typename TTypes<int, 4>::ConstTensor input,
int block_size, typename TTypes<int, 4>::Tensor output) {
functor::SpaceToDepthOpFunctor<GPUDevice, int, FORMAT_NHWC> op;
op(d, input, block_size, output);
}
#undef FORCE_DEPTH
#undef FORCE_DEPTH2
#endif
} // end namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -117,9 +117,7 @@ def InvokeNvcc(argv, log=False):
nvcc_compiler_options, argv = GetNvccOptions(argv)
opt_option, argv = GetOptionValue(argv, '/O')
# Originally '-g' was provided as an initial switch. Howerver nvcc expands it for MSVC
# to /Zi which generates vcXXX.pdb file not known to bazel.
opt = []
opt = ['-g']
if (len(opt_option) > 0 and opt_option[0] != 'd'):
opt = ['-O2']