Check if GOOGLE_CUDA and TENSORFLOW_USE_ROCM are defined before using them.
This is necessary to avoid errors with stricter clang checks PiperOrigin-RevId: 245143683
This commit is contained in:
parent
468b5b8366
commit
29ecfbf1e7
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
|
#include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
|
||||||
|
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
|
#include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
|
|
||||||
// TODO(opensource): Use a more generic sounding preprocessor name than
|
// TODO(opensource): Use a more generic sounding preprocessor name than
|
||||||
// GOOGLE_CUDA
|
// GOOGLE_CUDA
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#if TENSORFLOW_USE_ROCM
|
#if TENSORFLOW_USE_ROCM
|
||||||
#include "rocm/include/hip/hip_runtime.h"
|
#include "rocm/include/hip/hip_runtime.h"
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -81,7 +81,8 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
|
|||||||
TfGpuId tf_gpu_id,
|
TfGpuId tf_gpu_id,
|
||||||
size_t total_bytes) {
|
size_t total_bytes) {
|
||||||
CHECK(process_state_);
|
CHECK(process_state_);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
const string& allocator_type = options.allocator_type();
|
const string& allocator_type = options.allocator_type();
|
||||||
mutex_lock lock(mu_);
|
mutex_lock lock(mu_);
|
||||||
GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
|
GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
|
||||||
@ -163,7 +164,8 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
|
|||||||
|
|
||||||
SharedCounter* GPUProcessState::GPUAllocatorCounter(TfGpuId tf_gpu_id) {
|
SharedCounter* GPUProcessState::GPUAllocatorCounter(TfGpuId tf_gpu_id) {
|
||||||
DCHECK(process_state_);
|
DCHECK(process_state_);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
|
GpuIdUtil::CheckValidTfGpuId(tf_gpu_id);
|
||||||
mutex_lock l(mu_);
|
mutex_lock l(mu_);
|
||||||
if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
|
if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
|
||||||
@ -275,7 +277,8 @@ Allocator* GPUProcessState::GetGpuHostAllocator(int numa_node) {
|
|||||||
|
|
||||||
void GPUProcessState::AddGPUAllocVisitor(int bus_id,
|
void GPUProcessState::AddGPUAllocVisitor(int bus_id,
|
||||||
const SubAllocator::Visitor& visitor) {
|
const SubAllocator::Visitor& visitor) {
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
mutex_lock lock(mu_);
|
mutex_lock lock(mu_);
|
||||||
CHECK(gpu_allocators_.empty()) // Crash OK
|
CHECK(gpu_allocators_.empty()) // Crash OK
|
||||||
<< "AddGPUAllocVisitor must be called before "
|
<< "AddGPUAllocVisitor must be called before "
|
||||||
@ -290,7 +293,8 @@ void GPUProcessState::AddGPUAllocVisitor(int bus_id,
|
|||||||
|
|
||||||
void GPUProcessState::AddGpuHostAllocVisitor(
|
void GPUProcessState::AddGpuHostAllocVisitor(
|
||||||
int numa_node, const SubAllocator::Visitor& visitor) {
|
int numa_node, const SubAllocator::Visitor& visitor) {
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
mutex_lock lock(mu_);
|
mutex_lock lock(mu_);
|
||||||
CHECK(gpu_host_allocators_.empty()) // Crash OK
|
CHECK(gpu_host_allocators_.empty()) // Crash OK
|
||||||
<< "AddGpuHostAllocVisitor must be called before "
|
<< "AddGpuHostAllocVisitor must be called before "
|
||||||
@ -304,7 +308,8 @@ void GPUProcessState::AddGpuHostAllocVisitor(
|
|||||||
|
|
||||||
void GPUProcessState::AddGpuHostFreeVisitor(
|
void GPUProcessState::AddGpuHostFreeVisitor(
|
||||||
int numa_node, const SubAllocator::Visitor& visitor) {
|
int numa_node, const SubAllocator::Visitor& visitor) {
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
mutex_lock lock(mu_);
|
mutex_lock lock(mu_);
|
||||||
CHECK(gpu_host_allocators_.empty()) // Crash OK
|
CHECK(gpu_host_allocators_.empty()) // Crash OK
|
||||||
<< "AddGpuHostFreeVisitor must be called before "
|
<< "AddGpuHostFreeVisitor must be called before "
|
||||||
|
@ -40,7 +40,8 @@ const char* const DEVICE_GPU = "GPU";
|
|||||||
const char* const DEVICE_SYCL = "SYCL";
|
const char* const DEVICE_SYCL = "SYCL";
|
||||||
|
|
||||||
const std::string DeviceName<Eigen::ThreadPoolDevice>::value = DEVICE_CPU;
|
const std::string DeviceName<Eigen::ThreadPoolDevice>::value = DEVICE_CPU;
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
const std::string DeviceName<Eigen::GpuDevice>::value = DEVICE_GPU;
|
const std::string DeviceName<Eigen::GpuDevice>::value = DEVICE_GPU;
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
@ -83,7 +83,8 @@ struct DeviceName<Eigen::ThreadPoolDevice> {
|
|||||||
static const std::string value;
|
static const std::string value;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
template <>
|
template <>
|
||||||
struct DeviceName<Eigen::GpuDevice> {
|
struct DeviceName<Eigen::GpuDevice> {
|
||||||
static const std::string value;
|
static const std::string value;
|
||||||
|
@ -99,7 +99,8 @@ REGISTER_KERNEL(float);
|
|||||||
REGISTER_KERNEL(double);
|
REGISTER_KERNEL(double);
|
||||||
#undef REGISTER_KERNEL
|
#undef REGISTER_KERNEL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Forward declarations of the function specializations for GPU (to prevent
|
// Forward declarations of the function specializations for GPU (to prevent
|
||||||
// building the GPU versions here, they will be built compiling _gpu.cu.cc).
|
// building the GPU versions here, they will be built compiling _gpu.cu.cc).
|
||||||
namespace functor {
|
namespace functor {
|
||||||
@ -382,7 +383,8 @@ REGISTER_KERNEL_BUILDER(
|
|||||||
Name("AdjustContrastv2").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
Name("AdjustContrastv2").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||||
AdjustContrastOpv2<CPUDevice, float>);
|
AdjustContrastOpv2<CPUDevice, float>);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Forward declarations of the function specializations for GPU (to prevent
|
// Forward declarations of the function specializations for GPU (to prevent
|
||||||
// building the GPU versions here, they will be built compiling _gpu.cu.cc).
|
// building the GPU versions here, they will be built compiling _gpu.cu.cc).
|
||||||
namespace functor {
|
namespace functor {
|
||||||
|
@ -56,7 +56,8 @@ static Graph* BM_AdjustContrast(int batches, int width, int height) {
|
|||||||
// BM_AdjustContrast_cpu_1_299_299 179084 340186 2181 751.9M items/s
|
// BM_AdjustContrast_cpu_1_299_299 179084 340186 2181 751.9M items/s
|
||||||
// BM_AdjustContrast_gpu_32_299_299 85276 123665 4189 2.9G items/s
|
// BM_AdjustContrast_gpu_32_299_299 85276 123665 4189 2.9G items/s
|
||||||
BM_AdjustContrastDev(cpu, 1, 299, 299);
|
BM_AdjustContrastDev(cpu, 1, 299, 299);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
BM_AdjustContrastDev(gpu, 32, 299, 299);
|
BM_AdjustContrastDev(gpu, 32, 299, 299);
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -211,7 +211,8 @@ REGISTER_ADDN_CPU(Variant);
|
|||||||
|
|
||||||
#undef REGISTER_ADDN_CPU
|
#undef REGISTER_ADDN_CPU
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define REGISTER_ADDN_GPU(type) REGISTER_ADDN(type, GPU)
|
#define REGISTER_ADDN_GPU(type) REGISTER_ADDN(type, GPU)
|
||||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU);
|
TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU);
|
||||||
TF_CALL_int64(REGISTER_ADDN_GPU);
|
TF_CALL_int64(REGISTER_ADDN_GPU);
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -17,7 +17,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
@ -150,7 +151,8 @@ class ArgMinOp
|
|||||||
|
|
||||||
TF_CALL_REAL_NUMBER_TYPES(REGISTER_ARGMAX);
|
TF_CALL_REAL_NUMBER_TYPES(REGISTER_ARGMAX);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -83,7 +83,8 @@ Status Concat(OpKernelContext* context, const gtl::ArraySlice<Tensor>& inputs,
|
|||||||
context->allocate_temp(DataTypeToEnum<T>::value, output_shape, output));
|
context->allocate_temp(DataTypeToEnum<T>::value, output_shape, output));
|
||||||
if (output->NumElements() > 0) {
|
if (output->NumElements() > 0) {
|
||||||
auto output_flat = output->shaped<T, 2>({1, output->NumElements()});
|
auto output_flat = output->shaped<T, 2>({1, output->NumElements()});
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
if (std::is_same<Device, GPUDevice>::value) {
|
if (std::is_same<Device, GPUDevice>::value) {
|
||||||
ConcatGPU<T>(context, inputs_flat, output, &output_flat);
|
ConcatGPU<T>(context, inputs_flat, output, &output_flat);
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
@ -173,7 +174,8 @@ Status SplitCPU(OpKernelContext* context, const Tensor& input,
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
// Handles the general case, on GPU.
|
// Handles the general case, on GPU.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -198,7 +200,8 @@ Status Split(OpKernelContext* context, const Tensor& input,
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// TODO(olston, apassos): Handle non-CPU cases.
|
// TODO(olston, apassos): Handle non-CPU cases.
|
||||||
// return SplitGPU<T>(context, input, sizes, outputs);
|
// return SplitGPU<T>(context, input, sizes, outputs);
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
@ -122,7 +122,8 @@ REGISTER_KERNELS(float);
|
|||||||
REGISTER_KERNELS(double);
|
REGISTER_KERNELS(double);
|
||||||
#undef REGISTER_KERNELS
|
#undef REGISTER_KERNELS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
#define DECLARE_GPU_SPEC_NDIM(T, NDIM) \
|
#define DECLARE_GPU_SPEC_NDIM(T, NDIM) \
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
@ -100,7 +101,8 @@ class BroadcastToOp : public OpKernel {
|
|||||||
TF_CALL_ALL_TYPES(REGISTER_KERNEL);
|
TF_CALL_ALL_TYPES(REGISTER_KERNEL);
|
||||||
#undef REGISTER_KERNEL
|
#undef REGISTER_KERNEL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
namespace functor {
|
namespace functor {
|
||||||
#define DECLARE_GPU_TEMPLATE(Type) \
|
#define DECLARE_GPU_TEMPLATE(Type) \
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -168,7 +168,8 @@ Status CpuCastOp::Prepare() {
|
|||||||
return work_ == nullptr ? Unimplemented() : Status::OK();
|
return work_ == nullptr ? Unimplemented() : Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
class GpuCastOp : public CastOpBase {
|
class GpuCastOp : public CastOpBase {
|
||||||
public:
|
public:
|
||||||
explicit GpuCastOp(OpKernelConstruction* ctx) : CastOpBase(ctx) {
|
explicit GpuCastOp(OpKernelConstruction* ctx) : CastOpBase(ctx) {
|
||||||
@ -222,7 +223,8 @@ class GpuCastOp : public CastOpBase {
|
|||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("Cast").Device(DEVICE_CPU), CpuCastOp);
|
REGISTER_KERNEL_BUILDER(Name("Cast").Device(DEVICE_CPU), CpuCastOp);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define REGISTER_CAST_GPU(srctype, dsttype) \
|
#define REGISTER_CAST_GPU(srctype, dsttype) \
|
||||||
REGISTER_KERNEL_BUILDER(Name("Cast") \
|
REGISTER_KERNEL_BUILDER(Name("Cast") \
|
||||||
.TypeConstraint<srctype>("SrcT") \
|
.TypeConstraint<srctype>("SrcT") \
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -99,7 +99,8 @@ CastFunctorType GetCpuCastFromComplex128(DataType dst_dtype);
|
|||||||
|
|
||||||
CastFunctorType GetCpuCastFromBfloat(DataType dst_dtype);
|
CastFunctorType GetCpuCastFromBfloat(DataType dst_dtype);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Same, for GPU.
|
// Same, for GPU.
|
||||||
CastFunctorType GetGpuCastFromBool(DataType dst_dtype);
|
CastFunctorType GetGpuCastFromBool(DataType dst_dtype);
|
||||||
|
|
||||||
|
@ -27,7 +27,8 @@ CastFunctorType GetCpuCastFromBfloat(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromBfloat(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromBfloat(DataType dst_dtype) {
|
||||||
if (dst_dtype == DT_FLOAT) {
|
if (dst_dtype == DT_FLOAT) {
|
||||||
return [](OpKernelContext* ctx, const Tensor& inp, Tensor* out,
|
return [](OpKernelContext* ctx, const Tensor& inp, Tensor* out,
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromBool(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromBool(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromBool(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, bool);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, bool);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromComplex128(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromComplex128(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromComplex128(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, std::complex<double>);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, std::complex<double>);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromComplex64(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromComplex64(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromComplex64(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, std::complex<float>);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, std::complex<float>);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromDouble(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromDouble(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromDouble(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, double);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, double);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -27,7 +27,8 @@ CastFunctorType GetCpuCastFromFloat(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromFloat(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromFloat(DataType dst_dtype) {
|
||||||
CURRY_TYPES3(CAST_CASE, GPUDevice, float);
|
CURRY_TYPES3(CAST_CASE, GPUDevice, float);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromHalf(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromHalf(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromHalf(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, Eigen::half);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, Eigen::half);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt16(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromInt16(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromInt16(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int16);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int16);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt32(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromInt32(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromInt32(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int32);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int32);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt64(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromInt64(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromInt64(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int64);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int64);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt8(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromInt8(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromInt8(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int8);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int8);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint16(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromUint16(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromUint16(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint16);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint16);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint32(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromUint32(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromUint32(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint32);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint32);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint64(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromUint64(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromUint64(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint64);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint64);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint8(DataType dst_dtype) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
CastFunctorType GetGpuCastFromUint8(DataType dst_dtype) {
|
CastFunctorType GetGpuCastFromUint8(DataType dst_dtype) {
|
||||||
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint8);
|
CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint8);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -119,7 +119,8 @@ class HSVToRGBOp : public OpKernel {
|
|||||||
TF_CALL_float(REGISTER_CPU);
|
TF_CALL_float(REGISTER_CPU);
|
||||||
TF_CALL_double(REGISTER_CPU);
|
TF_CALL_double(REGISTER_CPU);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Forward declarations of the function specializations for GPU (to prevent
|
// Forward declarations of the function specializations for GPU (to prevent
|
||||||
// building the GPU versions here, they will be built compiling _gpu.cu.cc).
|
// building the GPU versions here, they will be built compiling _gpu.cu.cc).
|
||||||
namespace functor {
|
namespace functor {
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -47,7 +47,8 @@ void ConcatCPU(
|
|||||||
const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>&
|
const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>&
|
||||||
inputs,
|
inputs,
|
||||||
typename TTypes<T, 2>::Matrix* output);
|
typename TTypes<T, 2>::Matrix* output);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void ConcatGPU(
|
void ConcatGPU(
|
||||||
OpKernelContext* c,
|
OpKernelContext* c,
|
||||||
|
@ -17,7 +17,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -92,7 +93,8 @@ ConstantOp::~ConstantOp() {}
|
|||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
|
REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define REGISTER_KERNEL(D, TYPE) \
|
#define REGISTER_KERNEL(D, TYPE) \
|
||||||
REGISTER_KERNEL_BUILDER( \
|
REGISTER_KERNEL_BUILDER( \
|
||||||
Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
|
Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
|
||||||
@ -216,7 +218,8 @@ REGISTER_KERNEL_BUILDER(Name("Fill")
|
|||||||
#undef REGISTER_KERNEL_SYCL
|
#undef REGISTER_KERNEL_SYCL
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL(GPU, Eigen::half);
|
REGISTER_KERNEL(GPU, Eigen::half);
|
||||||
REGISTER_KERNEL(GPU, bfloat16);
|
REGISTER_KERNEL(GPU, bfloat16);
|
||||||
REGISTER_KERNEL(GPU, float);
|
REGISTER_KERNEL(GPU, float);
|
||||||
@ -300,7 +303,8 @@ REGISTER_KERNEL_BUILDER(Name("ZerosLike")
|
|||||||
ZerosLikeOp<CPUDevice, int32>);
|
ZerosLikeOp<CPUDevice, int32>);
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL(bool, GPU);
|
REGISTER_KERNEL(bool, GPU);
|
||||||
REGISTER_KERNEL(Eigen::half, GPU);
|
REGISTER_KERNEL(Eigen::half, GPU);
|
||||||
REGISTER_KERNEL(bfloat16, GPU);
|
REGISTER_KERNEL(bfloat16, GPU);
|
||||||
@ -353,7 +357,8 @@ REGISTER_KERNEL_BUILDER(Name("OnesLike")
|
|||||||
OnesLikeOp<CPUDevice, int32>);
|
OnesLikeOp<CPUDevice, int32>);
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL(bool, GPU);
|
REGISTER_KERNEL(bool, GPU);
|
||||||
REGISTER_KERNEL(Eigen::half, GPU);
|
REGISTER_KERNEL(Eigen::half, GPU);
|
||||||
REGISTER_KERNEL(bfloat16, GPU);
|
REGISTER_KERNEL(bfloat16, GPU);
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -86,7 +86,8 @@ void ConstantOpTest::PersistentMemoryTrackingTest(bool on_gpu) {
|
|||||||
|
|
||||||
TEST_F(ConstantOpTest, PersistentMemoryTracking) {
|
TEST_F(ConstantOpTest, PersistentMemoryTracking) {
|
||||||
PersistentMemoryTrackingTest(false);
|
PersistentMemoryTrackingTest(false);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
PersistentMemoryTrackingTest(true);
|
PersistentMemoryTrackingTest(true);
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
}
|
}
|
||||||
|
@ -130,7 +130,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER);
|
|||||||
|
|
||||||
#undef REGISTER
|
#undef REGISTER
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
@ -28,7 +29,8 @@ limitations under the License.
|
|||||||
|
|
||||||
using tensorflow::BinaryElementWiseOp;
|
using tensorflow::BinaryElementWiseOp;
|
||||||
using tensorflow::DEVICE_CPU;
|
using tensorflow::DEVICE_CPU;
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
using tensorflow::DEVICE_GPU;
|
using tensorflow::DEVICE_GPU;
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
using tensorflow::OpKernel;
|
using tensorflow::OpKernel;
|
||||||
@ -143,7 +145,8 @@ REGISTER_KERNEL_BUILDER(
|
|||||||
Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_CPU),
|
Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_CPU),
|
||||||
FakeQuantWithMinMaxArgsGradientOp<CPUDevice>);
|
FakeQuantWithMinMaxArgsGradientOp<CPUDevice>);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
typedef Eigen::GpuDevice GPUDevice;
|
typedef Eigen::GpuDevice GPUDevice;
|
||||||
|
|
||||||
// Forward declarations for functor specializations for GPU.
|
// Forward declarations for functor specializations for GPU.
|
||||||
@ -265,7 +268,8 @@ REGISTER_KERNEL_BUILDER(
|
|||||||
Name("FakeQuantWithMinMaxVarsGradient").Device(DEVICE_CPU),
|
Name("FakeQuantWithMinMaxVarsGradient").Device(DEVICE_CPU),
|
||||||
FakeQuantWithMinMaxVarsGradientOp<CPUDevice>);
|
FakeQuantWithMinMaxVarsGradientOp<CPUDevice>);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
template <>
|
template <>
|
||||||
void FakeQuantWithMinMaxVarsFunctor<GPUDevice>::operator()(
|
void FakeQuantWithMinMaxVarsFunctor<GPUDevice>::operator()(
|
||||||
const GPUDevice& d, typename TTypes<float>::ConstFlat inputs,
|
const GPUDevice& d, typename TTypes<float>::ConstFlat inputs,
|
||||||
@ -411,7 +415,8 @@ REGISTER_KERNEL_BUILDER(
|
|||||||
Name("FakeQuantWithMinMaxVarsPerChannelGradient").Device(DEVICE_CPU),
|
Name("FakeQuantWithMinMaxVarsPerChannelGradient").Device(DEVICE_CPU),
|
||||||
FakeQuantWithMinMaxVarsPerChannelGradientOp<CPUDevice>);
|
FakeQuantWithMinMaxVarsPerChannelGradientOp<CPUDevice>);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
template <>
|
template <>
|
||||||
void FakeQuantWithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
|
void FakeQuantWithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
|
||||||
const GPUDevice& d, typename TTypes<float>::ConstMatrix inputs,
|
const GPUDevice& d, typename TTypes<float>::ConstMatrix inputs,
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define FAKE_QUANT_NO_DEBUG
|
#define FAKE_QUANT_NO_DEBUG
|
||||||
|
|
||||||
|
@ -28,7 +28,8 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/util/env_var.h"
|
#include "tensorflow/core/util/env_var.h"
|
||||||
#include "tensorflow/core/util/work_sharder.h"
|
#include "tensorflow/core/util/work_sharder.h"
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#include "tensorflow/core/platform/stream_executor.h"
|
#include "tensorflow/core/platform/stream_executor.h"
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
@ -286,7 +287,8 @@ REGISTER_KERNEL_BUILDER(Name("IRFFT3D").Device(DEVICE_CPU).Label(FFT_LABEL),
|
|||||||
|
|
||||||
#undef FFT_LABEL
|
#undef FFT_LABEL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
#ifndef TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_H_
|
#ifndef TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_H_
|
||||||
#define TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_H_
|
#define TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_H_
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
|
#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
|
||||||
#include "tensorflow/core/framework/op_kernel.h"
|
#include "tensorflow/core/framework/op_kernel.h"
|
||||||
|
@ -18,7 +18,8 @@ limitations under the License.
|
|||||||
#ifndef TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_GPU_H_
|
#ifndef TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_GPU_H_
|
||||||
#define TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_GPU_H_
|
#define TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_GPU_H_
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
|
@ -112,7 +112,8 @@ REGISTER_GPU_KERNEL(Variant);
|
|||||||
|
|
||||||
#undef REGISTER_GPU_KERNEL
|
#undef REGISTER_GPU_KERNEL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// A special GPU kernel for int32 and bool.
|
// A special GPU kernel for int32 and bool.
|
||||||
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
||||||
// registration requires all int32 inputs and outputs to be in host memory.
|
// registration requires all int32 inputs and outputs to be in host memory.
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
|
@ -17,7 +17,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
@ -140,7 +141,8 @@ class OneHotOp : public OpKernel {
|
|||||||
|
|
||||||
TF_CALL_ALL_TYPES(REGISTER_ONE_HOT);
|
TF_CALL_ALL_TYPES(REGISTER_ONE_HOT);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
|
|
||||||
// See docs in ../ops/array_ops.cc
|
// See docs in ../ops/array_ops.cc
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -294,7 +294,8 @@ TF_CALL_POD_TYPES(REGISTER_KERNEL);
|
|||||||
TF_CALL_string(REGISTER_KERNEL);
|
TF_CALL_string(REGISTER_KERNEL);
|
||||||
#undef REGISTER_KERNEL
|
#undef REGISTER_KERNEL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
#define DECLARE_GPU_SPEC(T, Dims) \
|
#define DECLARE_GPU_SPEC(T, Dims) \
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
|
||||||
@ -241,7 +242,8 @@ TF_CALL_float(REGISTER_CPU_KERNEL);
|
|||||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||||
#undef REGISTER_CPU_KERNEL
|
#undef REGISTER_CPU_KERNEL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
#define REGISTER_GPU_KERNEL(T) \
|
#define REGISTER_GPU_KERNEL(T) \
|
||||||
REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV2") \
|
REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV2") \
|
||||||
.Device(DEVICE_GPU) \
|
.Device(DEVICE_GPU) \
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -86,7 +86,8 @@ REGISTER_KERNEL_BUILDER(Name("Reshape")
|
|||||||
#undef REGISTER_SYCL_KERNEL
|
#undef REGISTER_SYCL_KERNEL
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// A special GPU kernel for int32.
|
// A special GPU kernel for int32.
|
||||||
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
// TODO(b/25387198): Also enable int32 in device memory. This kernel
|
||||||
// registration requires all int32 inputs and outputs to be in host memory.
|
// registration requires all int32 inputs and outputs to be in host memory.
|
||||||
|
@ -87,7 +87,8 @@ void SoftplusGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
|
|||||||
TF_CALL_FLOAT_TYPES(REGISTER_KERNELS);
|
TF_CALL_FLOAT_TYPES(REGISTER_KERNELS);
|
||||||
#undef REGISTER_KERNELS
|
#undef REGISTER_KERNELS
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
// Forward declarations of the functor specializations for GPU.
|
// Forward declarations of the functor specializations for GPU.
|
||||||
namespace functor {
|
namespace functor {
|
||||||
#define DECLARE_GPU_SPEC(T) \
|
#define DECLARE_GPU_SPEC(T) \
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -216,7 +216,8 @@ class StageOp : public OpKernel {
|
|||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_CPU), StageOp);
|
REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_CPU), StageOp);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_GPU), StageOp);
|
REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_GPU), StageOp);
|
||||||
#endif
|
#endif
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
@ -249,7 +250,8 @@ class UnstageOp : public OpKernel {
|
|||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_CPU), UnstageOp);
|
REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_CPU), UnstageOp);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_GPU), UnstageOp);
|
REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_GPU), UnstageOp);
|
||||||
#endif
|
#endif
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
@ -284,7 +286,8 @@ class StagePeekOp : public OpKernel {
|
|||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("StagePeek").Device(DEVICE_CPU), StagePeekOp);
|
REGISTER_KERNEL_BUILDER(Name("StagePeek").Device(DEVICE_CPU), StagePeekOp);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL_BUILDER(
|
REGISTER_KERNEL_BUILDER(
|
||||||
Name("StagePeek").HostMemory("index").Device(DEVICE_GPU), StagePeekOp);
|
Name("StagePeek").HostMemory("index").Device(DEVICE_GPU), StagePeekOp);
|
||||||
#endif
|
#endif
|
||||||
@ -314,7 +317,8 @@ class StageSizeOp : public OpKernel {
|
|||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("StageSize").Device(DEVICE_CPU), StageSizeOp);
|
REGISTER_KERNEL_BUILDER(Name("StageSize").Device(DEVICE_CPU), StageSizeOp);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL_BUILDER(Name("StageSize").HostMemory("size").Device(DEVICE_GPU),
|
REGISTER_KERNEL_BUILDER(Name("StageSize").HostMemory("size").Device(DEVICE_GPU),
|
||||||
StageSizeOp);
|
StageSizeOp);
|
||||||
#endif
|
#endif
|
||||||
@ -339,7 +343,8 @@ class StageClearOp : public OpKernel {
|
|||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_CPU), StageClearOp);
|
REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_CPU), StageClearOp);
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_GPU), StageClearOp);
|
REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_GPU), StageClearOp);
|
||||||
#endif
|
#endif
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
@ -134,7 +134,8 @@ TF_CALL_half(REGISTER_CPU);
|
|||||||
TF_CALL_float(REGISTER_CPU);
|
TF_CALL_float(REGISTER_CPU);
|
||||||
TF_CALL_double(REGISTER_CPU);
|
TF_CALL_double(REGISTER_CPU);
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
|
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
|
||||||
.Device(DEVICE_GPU)
|
.Device(DEVICE_GPU)
|
||||||
.TypeConstraint<Eigen::half>("T"),
|
.TypeConstraint<Eigen::half>("T"),
|
||||||
|
@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
|
|
||||||
#define EIGEN_USE_GPU
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
@ -35,7 +35,8 @@ bool IsBuiltWithROCm() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool GpuSupportsHalfMatMulAndConv() {
|
bool GpuSupportsHalfMatMulAndConv() {
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
|
||||||
|
(defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
|
Loading…
Reference in New Issue
Block a user