Revert PR #43178: Make fast builds work with MSVC
Fixes #43767 PiperOrigin-RevId: 335442309 Change-Id: I4d6d6ca4925886ee783ed6aa2e9db8610eeea5fb
This commit is contained in:
parent
ab1478d380
commit
651cb1d8e6
2
.bazelrc
2
.bazelrc
@ -323,6 +323,8 @@ build:windows --copt=/experimental:preprocessor
|
||||
build:windows --host_copt=/experimental:preprocessor
|
||||
|
||||
# Misc build options we need for windows.
|
||||
build:windows --linkopt=/DEBUG
|
||||
build:windows --host_linkopt=/DEBUG
|
||||
build:windows --linkopt=/OPT:REF
|
||||
build:windows --host_linkopt=/OPT:REF
|
||||
build:windows --linkopt=/OPT:ICF
|
||||
|
@ -104,37 +104,6 @@ TF_CALL_GPU_ALL_TYPES(REGISTER);
|
||||
|
||||
#undef REGISTER
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// Required by MSVC non-release build to ensure the compiler sees all the
|
||||
// template expansions that are needed.
|
||||
#define FORCE_CONCAT(TYPE) \
|
||||
template <> \
|
||||
void ConcatGPU<TYPE>( \
|
||||
OpKernelContext * c, \
|
||||
const std::vector< \
|
||||
std::unique_ptr<typename TTypes<TYPE, 2>::ConstMatrix>>& \
|
||||
inputs_flat, \
|
||||
Tensor* output, typename TTypes<TYPE, 2>::Tensor* output_flat) { \
|
||||
LOG(FATAL) << "Should not be called"; \
|
||||
}
|
||||
|
||||
FORCE_CONCAT(tensorflow::Variant)
|
||||
FORCE_CONCAT(tensorflow::ResourceHandle)
|
||||
FORCE_CONCAT(unsigned short)
|
||||
FORCE_CONCAT(signed char)
|
||||
FORCE_CONCAT(tensorflow::tstring)
|
||||
FORCE_CONCAT(Eigen::QUInt8)
|
||||
FORCE_CONCAT(Eigen::QInt8)
|
||||
FORCE_CONCAT(Eigen::QUInt16)
|
||||
FORCE_CONCAT(Eigen::QInt16)
|
||||
FORCE_CONCAT(Eigen::QInt32)
|
||||
FORCE_CONCAT(unsigned int)
|
||||
FORCE_CONCAT(unsigned __int64)
|
||||
|
||||
#undef FORCE_CONCAT
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -21,13 +21,6 @@ namespace tensorflow {
|
||||
namespace functor {
|
||||
DEFINE_UNARY1(conj, complex64);
|
||||
DEFINE_UNARY1(conj, complex128);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// Non-release build with MSVC needs these symbols.
|
||||
DEFINE_UNARY1(conj, float);
|
||||
DEFINE_UNARY1(conj, double);
|
||||
#endif
|
||||
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -71,27 +71,6 @@ TF_CALL_int8(DEFINE_GPU_KERNELS);
|
||||
TF_CALL_uint32(DEFINE_GPU_KERNELS);
|
||||
#undef DEFINE_GPU_KERNELS
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
template <>
|
||||
struct functor::DenseUpdate<GPUDevice, tensorflow::Variant, ASSIGN> {
|
||||
void operator()(const GPUDevice& d,
|
||||
typename TTypes<tensorflow::Variant>::Flat params,
|
||||
typename TTypes<tensorflow::Variant>::ConstFlat update) {
|
||||
LOG(FATAL) << "Not handling type tensorflow::Variant";
|
||||
}
|
||||
};
|
||||
|
||||
// The function is required to force above template specialization. Without it
|
||||
// msvc compiler doesn't include the functor in the object file
|
||||
void _force_instantiation(
|
||||
const GPUDevice& d, typename TTypes<tensorflow::Variant>::Flat params,
|
||||
typename TTypes<tensorflow::Variant>::ConstFlat update) {
|
||||
functor::DenseUpdate<GPUDevice, tensorflow::Variant, ASSIGN> x;
|
||||
x(d, params, update);
|
||||
}
|
||||
#endif // _MSC_VER
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -22,10 +22,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/util/gpu_kernel_helper.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
@ -255,62 +251,6 @@ template struct functor::DepthToSpaceOpFunctor<GPUDevice, Eigen::half,
|
||||
// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
|
||||
template struct functor::DepthToSpaceOpFunctor<GPUDevice, int32, FORMAT_NCHW>;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define FORCE_DEPTH(TYPE, NAME, NUM, DEVICE) \
|
||||
template <> \
|
||||
struct functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> { \
|
||||
void operator()(const DEVICE& d, \
|
||||
typename TTypes<TYPE, 4>::ConstTensor input, \
|
||||
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
|
||||
LOG(FATAL) << "Should not be called."; \
|
||||
} \
|
||||
void operator()(const DEVICE& d, \
|
||||
typename TTypes<TYPE, 5>::ConstTensor input, \
|
||||
int block_size, typename TTypes<TYPE, 5>::Tensor output) { \
|
||||
LOG(FATAL) << "Should not be called."; \
|
||||
} \
|
||||
}; \
|
||||
void _force_DepthToSpaceOpFunctor##NAME( \
|
||||
const DEVICE& d, typename TTypes<TYPE, 4>::ConstTensor input, \
|
||||
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
|
||||
functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> op; \
|
||||
op(d, input, block_size, output); \
|
||||
} \
|
||||
void _force_DepthToSpaceOpFunctor##NAME##_2( \
|
||||
const DEVICE& d, typename TTypes<TYPE, 5>::ConstTensor input, \
|
||||
int block_size, typename TTypes<TYPE, 5>::Tensor output) { \
|
||||
functor::DepthToSpaceOpFunctor<DEVICE, TYPE, NUM> op; \
|
||||
op(d, input, block_size, output); \
|
||||
}
|
||||
|
||||
FORCE_DEPTH(__int64, int64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(unsigned __int64, uint64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(unsigned int, uint, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(int, int, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(unsigned short, ushort, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(short, short, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(unsigned char, uchar, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(signed char, char, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(bfloat16, bfloat16, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(double, double, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(complex64, complex64, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(complex128, complex128, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(bool, bool, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(tensorflow::tstring, tstring, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(tensorflow::ResourceHandle, ResourceHandle, FORMAT_NCHW,
|
||||
Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(tensorflow::Variant, variant, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(Eigen::QInt8, qint8, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(Eigen::QInt8, qint8_2, FORMAT_NHWC, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(Eigen::half, half, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(float, float, FORMAT_NCHW, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH(Eigen::QInt8, qint8, FORMAT_NCHW, GPUDevice)
|
||||
FORCE_DEPTH(Eigen::QInt8, qint8_2, FORMAT_NHWC, GPUDevice)
|
||||
|
||||
#undef FORCE_DEPTH
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -530,11 +530,6 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC);
|
||||
|
||||
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_GRAD_GPU_SPEC);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// Required for MSVC debug build
|
||||
TF_CALL_half(DEFINE_GRAD_GPU_SPEC)
|
||||
#endif
|
||||
|
||||
#undef DEFINE_GPU_SPEC
|
||||
#undef DEFINE_GRAD_GPU_SPEC
|
||||
|
||||
|
@ -22,10 +22,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/util/gpu_kernel_helper.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
@ -256,70 +252,6 @@ template struct functor::SpaceToDepthOpFunctor<GPUDevice, uint8, FORMAT_NHWC>;
|
||||
// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
|
||||
template struct functor::SpaceToDepthOpFunctor<GPUDevice, int32, FORMAT_NCHW>;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define FORCE_DEPTH(TYPE, NAME, NUM, DEVICE) \
|
||||
template <> \
|
||||
struct functor::SpaceToDepthOpFunctor<DEVICE, TYPE, NUM> { \
|
||||
void operator()(const DEVICE& d, \
|
||||
typename TTypes<TYPE, 4>::ConstTensor input, \
|
||||
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
|
||||
LOG(FATAL) << "Should not be called."; \
|
||||
} \
|
||||
}; \
|
||||
void _force_SpaceToDepthOpFunctor##NAME( \
|
||||
const DEVICE& d, typename TTypes<TYPE, 4>::ConstTensor input, \
|
||||
int block_size, typename TTypes<TYPE, 4>::Tensor output) { \
|
||||
functor::SpaceToDepthOpFunctor<DEVICE, TYPE, NUM> op; \
|
||||
op(d, input, block_size, output); \
|
||||
}
|
||||
|
||||
#define FORCE_DEPTH2(TYPE, NAME, DEVICE) \
|
||||
FORCE_DEPTH(TYPE, NAME, FORMAT_NCHW, DEVICE) \
|
||||
FORCE_DEPTH(TYPE, NAME##_2, FORMAT_NHWC, DEVICE)
|
||||
|
||||
FORCE_DEPTH2(__int64, int64, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(unsigned __int64, uint64, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(unsigned int, uint, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(unsigned short, ushort, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(short, short, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(signed char, char, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(unsigned char, char, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(bfloat16, bfloat16, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(double, double, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(complex64, complex64, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(complex128, complex128, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(bool, bool, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(tensorflow::tstring, tstring, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(tensorflow::ResourceHandle, ResourceHandle,
|
||||
Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(tensorflow::Variant, variant, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(Eigen::QInt8, qint8, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(Eigen::half, half, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(float, float, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(int, int, Eigen::ThreadPoolDevice)
|
||||
FORCE_DEPTH2(Eigen::QInt8, qint8gpu, GPUDevice)
|
||||
|
||||
// Special case for int, FORMAT_NHWC
|
||||
template <>
|
||||
struct functor::SpaceToDepthOpFunctor<GPUDevice, int, FORMAT_NHWC> {
|
||||
void operator()(const GPUDevice& d,
|
||||
typename TTypes<int, 4>::ConstTensor input, int block_size,
|
||||
typename TTypes<int, 4>::Tensor output) {
|
||||
LOG(FATAL) << "Should not be called.";
|
||||
}
|
||||
};
|
||||
void _force_SpaceToDepthOpFunctor_int(
|
||||
const GPUDevice& d, typename TTypes<int, 4>::ConstTensor input,
|
||||
int block_size, typename TTypes<int, 4>::Tensor output) {
|
||||
functor::SpaceToDepthOpFunctor<GPUDevice, int, FORMAT_NHWC> op;
|
||||
op(d, input, block_size, output);
|
||||
}
|
||||
|
||||
#undef FORCE_DEPTH
|
||||
#undef FORCE_DEPTH2
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -117,9 +117,7 @@ def InvokeNvcc(argv, log=False):
|
||||
nvcc_compiler_options, argv = GetNvccOptions(argv)
|
||||
|
||||
opt_option, argv = GetOptionValue(argv, '/O')
|
||||
# Originally '-g' was provided as an initial switch. Howerver nvcc expands it for MSVC
|
||||
# to /Zi which generates vcXXX.pdb file not known to bazel.
|
||||
opt = []
|
||||
opt = ['-g']
|
||||
if (len(opt_option) > 0 and opt_option[0] != 'd'):
|
||||
opt = ['-O2']
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user