diff --git a/tensorflow/core/kernels/stack_ops.cc b/tensorflow/core/kernels/stack_ops.cc index b4698a80530..2db3e5ef777 100644 --- a/tensorflow/core/kernels/stack_ops.cc +++ b/tensorflow/core/kernels/stack_ops.cc @@ -40,6 +40,9 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; +#ifdef TENSORFLOW_USE_SYCL +typedef Eigen::SyclDevice SYCLDevice; +#endif // TENSORFLOW_USE_SYCL class Stack : public ResourceBase { public: @@ -182,6 +185,10 @@ class StackOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_CPU), StackOp); REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_GPU).HostMemory("handle"), StackOp); +#ifdef TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_SYCL).HostMemory("handle"), + StackOp); +#endif // TENSORFLOW_USE_SYCL template class StackPushOp : public AsyncOpKernel { @@ -213,7 +220,11 @@ class StackPushOp : public AsyncOpKernel { static constexpr int kCopyThreshold = 2048; static constexpr double kOccupancy = 0.7; if (swap_memory_ && !alloc_attrs.on_host() && - std::is_same::value && + ( std::is_same::value +#ifdef TENSORFLOW_USE_SYCL + || std::is_same::value +#endif // TENSORFLOW_USE_SYCL + ) && tensor.TotalBytes() > kCopyThreshold && stack->IsUsefulToSwap(tensor)) { DeviceContext* device_ctxt = ctx->op_device_context(); auto device = static_cast(ctx->device()); @@ -289,6 +300,31 @@ REGISTER_GPU_HOST_KERNEL(bool); #undef REGISTER_GPU_HOST_KERNEL +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("StackPush") \ + .Device(DEVICE_SYCL) \ + .HostMemory("handle") \ + .TypeConstraint("T"), \ + StackPushOp); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL_KERNEL); + +#define REGISTER_SYCL_HOST_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("StackPush") \ + .Device(DEVICE_SYCL) \ + .HostMemory("handle") \ + .HostMemory("elem") \ + .HostMemory("output") \ + .TypeConstraint("T"), \ + StackPushOp) + +REGISTER_SYCL_HOST_KERNEL(int32); +REGISTER_SYCL_HOST_KERNEL(bool); +#undef REGISTER_SYCL_KERNEL +#undef REGISTER_SYCL_HOST_KERNEL +#endif // TENSORFLOW_USE_SYCL + class StackPopOp : public AsyncOpKernel { public: explicit StackPopOp(OpKernelConstruction* context) : AsyncOpKernel(context) {} @@ -359,6 +395,31 @@ REGISTER_GPU_HOST_KERNEL(bool); #undef REGISTER_GPU_HOST_KERNEL +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("StackPop") \ + .Device(DEVICE_SYCL) \ + .HostMemory("handle") \ + .TypeConstraint("elem_type"), \ + StackPopOp) + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL_KERNEL); + +#define REGISTER_SYCL_HOST_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("StackPop") \ + .Device(DEVICE_SYCL) \ + .HostMemory("handle") \ + .HostMemory("elem") \ + .TypeConstraint("elem_type"), \ + StackPopOp) + +REGISTER_SYCL_HOST_KERNEL(int32); +REGISTER_SYCL_HOST_KERNEL(bool); + +#undef REGISTER_SYCL_KERNEL +#undef REGISTER_SYCL_HOST_KERNEL +#endif // TENSORFLOW_USE_SYCL + class StackCloseOp : public OpKernel { public: explicit StackCloseOp(OpKernelConstruction* context) : OpKernel(context) {} @@ -376,5 +437,8 @@ class StackCloseOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("StackClose").Device(DEVICE_CPU), StackCloseOp); REGISTER_KERNEL_BUILDER( Name("StackClose").Device(DEVICE_GPU).HostMemory("handle"), StackCloseOp); - +#ifdef TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER( + Name("StackClose").Device(DEVICE_SYCL).HostMemory("handle"), StackCloseOp); +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow