[OpenCL] Registers stack op (#10324)

This commit is contained in:
Luke Iwanski 2017-06-20 02:45:07 +01:00 committed by gunan
parent 6457f0df53
commit 0f17a54d4d

View File

@ -40,6 +40,9 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice; typedef Eigen::GpuDevice GPUDevice;
#ifdef TENSORFLOW_USE_SYCL
typedef Eigen::SyclDevice SYCLDevice;
#endif // TENSORFLOW_USE_SYCL
class Stack : public ResourceBase { class Stack : public ResourceBase {
public: public:
@ -182,6 +185,10 @@ class StackOp : public OpKernel {
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_CPU), StackOp); REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_CPU), StackOp);
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_GPU).HostMemory("handle"), REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_GPU).HostMemory("handle"),
StackOp); StackOp);
#ifdef TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_SYCL).HostMemory("handle"),
StackOp);
#endif // TENSORFLOW_USE_SYCL
template <typename Device> template <typename Device>
class StackPushOp : public AsyncOpKernel { class StackPushOp : public AsyncOpKernel {
@ -213,7 +220,11 @@ class StackPushOp : public AsyncOpKernel {
static constexpr int kCopyThreshold = 2048; static constexpr int kCopyThreshold = 2048;
static constexpr double kOccupancy = 0.7; static constexpr double kOccupancy = 0.7;
if (swap_memory_ && !alloc_attrs.on_host() && if (swap_memory_ && !alloc_attrs.on_host() &&
std::is_same<Device, GPUDevice>::value && ( std::is_same<Device, GPUDevice>::value
#ifdef TENSORFLOW_USE_SYCL
|| std::is_same<Device, SYCLDevice>::value
#endif // TENSORFLOW_USE_SYCL
) &&
tensor.TotalBytes() > kCopyThreshold && stack->IsUsefulToSwap(tensor)) { tensor.TotalBytes() > kCopyThreshold && stack->IsUsefulToSwap(tensor)) {
DeviceContext* device_ctxt = ctx->op_device_context(); DeviceContext* device_ctxt = ctx->op_device_context();
auto device = static_cast<tensorflow::Device*>(ctx->device()); auto device = static_cast<tensorflow::Device*>(ctx->device());
@ -289,6 +300,31 @@ REGISTER_GPU_HOST_KERNEL(bool);
#undef REGISTER_GPU_HOST_KERNEL #undef REGISTER_GPU_HOST_KERNEL
#ifdef TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER(Name("StackPush") \
.Device(DEVICE_SYCL) \
.HostMemory("handle") \
.TypeConstraint<type>("T"), \
StackPushOp<SYCLDevice>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#define REGISTER_SYCL_HOST_KERNEL(type) \
REGISTER_KERNEL_BUILDER(Name("StackPush") \
.Device(DEVICE_SYCL) \
.HostMemory("handle") \
.HostMemory("elem") \
.HostMemory("output") \
.TypeConstraint<type>("T"), \
StackPushOp<SYCLDevice>)
REGISTER_SYCL_HOST_KERNEL(int32);
REGISTER_SYCL_HOST_KERNEL(bool);
#undef REGISTER_SYCL_KERNEL
#undef REGISTER_SYCL_HOST_KERNEL
#endif // TENSORFLOW_USE_SYCL
class StackPopOp : public AsyncOpKernel { class StackPopOp : public AsyncOpKernel {
public: public:
explicit StackPopOp(OpKernelConstruction* context) : AsyncOpKernel(context) {} explicit StackPopOp(OpKernelConstruction* context) : AsyncOpKernel(context) {}
@ -359,6 +395,31 @@ REGISTER_GPU_HOST_KERNEL(bool);
#undef REGISTER_GPU_HOST_KERNEL #undef REGISTER_GPU_HOST_KERNEL
#ifdef TENSORFLOW_USE_SYCL
#define REGISTER_SYCL_KERNEL(type) \
REGISTER_KERNEL_BUILDER(Name("StackPop") \
.Device(DEVICE_SYCL) \
.HostMemory("handle") \
.TypeConstraint<type>("elem_type"), \
StackPopOp)
TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
#define REGISTER_SYCL_HOST_KERNEL(type) \
REGISTER_KERNEL_BUILDER(Name("StackPop") \
.Device(DEVICE_SYCL) \
.HostMemory("handle") \
.HostMemory("elem") \
.TypeConstraint<type>("elem_type"), \
StackPopOp)
REGISTER_SYCL_HOST_KERNEL(int32);
REGISTER_SYCL_HOST_KERNEL(bool);
#undef REGISTER_SYCL_KERNEL
#undef REGISTER_SYCL_HOST_KERNEL
#endif // TENSORFLOW_USE_SYCL
class StackCloseOp : public OpKernel { class StackCloseOp : public OpKernel {
public: public:
explicit StackCloseOp(OpKernelConstruction* context) : OpKernel(context) {} explicit StackCloseOp(OpKernelConstruction* context) : OpKernel(context) {}
@ -376,5 +437,8 @@ class StackCloseOp : public OpKernel {
REGISTER_KERNEL_BUILDER(Name("StackClose").Device(DEVICE_CPU), StackCloseOp); REGISTER_KERNEL_BUILDER(Name("StackClose").Device(DEVICE_CPU), StackCloseOp);
REGISTER_KERNEL_BUILDER( REGISTER_KERNEL_BUILDER(
Name("StackClose").Device(DEVICE_GPU).HostMemory("handle"), StackCloseOp); Name("StackClose").Device(DEVICE_GPU).HostMemory("handle"), StackCloseOp);
#ifdef TENSORFLOW_USE_SYCL
REGISTER_KERNEL_BUILDER(
Name("StackClose").Device(DEVICE_SYCL).HostMemory("handle"), StackCloseOp);
#endif // TENSORFLOW_USE_SYCL
} // namespace tensorflow } // namespace tensorflow