[OpenCL] Registers stack op (#10324)
This commit is contained in:
parent
6457f0df53
commit
0f17a54d4d
@ -40,6 +40,9 @@ namespace tensorflow {
|
|||||||
|
|
||||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||||
typedef Eigen::GpuDevice GPUDevice;
|
typedef Eigen::GpuDevice GPUDevice;
|
||||||
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
typedef Eigen::SyclDevice SYCLDevice;
|
||||||
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
class Stack : public ResourceBase {
|
class Stack : public ResourceBase {
|
||||||
public:
|
public:
|
||||||
@ -182,6 +185,10 @@ class StackOp : public OpKernel {
|
|||||||
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_CPU), StackOp);
|
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_CPU), StackOp);
|
||||||
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_GPU).HostMemory("handle"),
|
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_GPU).HostMemory("handle"),
|
||||||
StackOp);
|
StackOp);
|
||||||
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("Stack").Device(DEVICE_SYCL).HostMemory("handle"),
|
||||||
|
StackOp);
|
||||||
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
template <typename Device>
|
template <typename Device>
|
||||||
class StackPushOp : public AsyncOpKernel {
|
class StackPushOp : public AsyncOpKernel {
|
||||||
@ -213,7 +220,11 @@ class StackPushOp : public AsyncOpKernel {
|
|||||||
static constexpr int kCopyThreshold = 2048;
|
static constexpr int kCopyThreshold = 2048;
|
||||||
static constexpr double kOccupancy = 0.7;
|
static constexpr double kOccupancy = 0.7;
|
||||||
if (swap_memory_ && !alloc_attrs.on_host() &&
|
if (swap_memory_ && !alloc_attrs.on_host() &&
|
||||||
std::is_same<Device, GPUDevice>::value &&
|
( std::is_same<Device, GPUDevice>::value
|
||||||
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
|| std::is_same<Device, SYCLDevice>::value
|
||||||
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
) &&
|
||||||
tensor.TotalBytes() > kCopyThreshold && stack->IsUsefulToSwap(tensor)) {
|
tensor.TotalBytes() > kCopyThreshold && stack->IsUsefulToSwap(tensor)) {
|
||||||
DeviceContext* device_ctxt = ctx->op_device_context();
|
DeviceContext* device_ctxt = ctx->op_device_context();
|
||||||
auto device = static_cast<tensorflow::Device*>(ctx->device());
|
auto device = static_cast<tensorflow::Device*>(ctx->device());
|
||||||
@ -289,6 +300,31 @@ REGISTER_GPU_HOST_KERNEL(bool);
|
|||||||
|
|
||||||
#undef REGISTER_GPU_HOST_KERNEL
|
#undef REGISTER_GPU_HOST_KERNEL
|
||||||
|
|
||||||
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
#define REGISTER_SYCL_KERNEL(type) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("StackPush") \
|
||||||
|
.Device(DEVICE_SYCL) \
|
||||||
|
.HostMemory("handle") \
|
||||||
|
.TypeConstraint<type>("T"), \
|
||||||
|
StackPushOp<SYCLDevice>);
|
||||||
|
|
||||||
|
TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||||
|
|
||||||
|
#define REGISTER_SYCL_HOST_KERNEL(type) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("StackPush") \
|
||||||
|
.Device(DEVICE_SYCL) \
|
||||||
|
.HostMemory("handle") \
|
||||||
|
.HostMemory("elem") \
|
||||||
|
.HostMemory("output") \
|
||||||
|
.TypeConstraint<type>("T"), \
|
||||||
|
StackPushOp<SYCLDevice>)
|
||||||
|
|
||||||
|
REGISTER_SYCL_HOST_KERNEL(int32);
|
||||||
|
REGISTER_SYCL_HOST_KERNEL(bool);
|
||||||
|
#undef REGISTER_SYCL_KERNEL
|
||||||
|
#undef REGISTER_SYCL_HOST_KERNEL
|
||||||
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
class StackPopOp : public AsyncOpKernel {
|
class StackPopOp : public AsyncOpKernel {
|
||||||
public:
|
public:
|
||||||
explicit StackPopOp(OpKernelConstruction* context) : AsyncOpKernel(context) {}
|
explicit StackPopOp(OpKernelConstruction* context) : AsyncOpKernel(context) {}
|
||||||
@ -359,6 +395,31 @@ REGISTER_GPU_HOST_KERNEL(bool);
|
|||||||
|
|
||||||
#undef REGISTER_GPU_HOST_KERNEL
|
#undef REGISTER_GPU_HOST_KERNEL
|
||||||
|
|
||||||
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
#define REGISTER_SYCL_KERNEL(type) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("StackPop") \
|
||||||
|
.Device(DEVICE_SYCL) \
|
||||||
|
.HostMemory("handle") \
|
||||||
|
.TypeConstraint<type>("elem_type"), \
|
||||||
|
StackPopOp)
|
||||||
|
|
||||||
|
TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
|
||||||
|
|
||||||
|
#define REGISTER_SYCL_HOST_KERNEL(type) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("StackPop") \
|
||||||
|
.Device(DEVICE_SYCL) \
|
||||||
|
.HostMemory("handle") \
|
||||||
|
.HostMemory("elem") \
|
||||||
|
.TypeConstraint<type>("elem_type"), \
|
||||||
|
StackPopOp)
|
||||||
|
|
||||||
|
REGISTER_SYCL_HOST_KERNEL(int32);
|
||||||
|
REGISTER_SYCL_HOST_KERNEL(bool);
|
||||||
|
|
||||||
|
#undef REGISTER_SYCL_KERNEL
|
||||||
|
#undef REGISTER_SYCL_HOST_KERNEL
|
||||||
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
class StackCloseOp : public OpKernel {
|
class StackCloseOp : public OpKernel {
|
||||||
public:
|
public:
|
||||||
explicit StackCloseOp(OpKernelConstruction* context) : OpKernel(context) {}
|
explicit StackCloseOp(OpKernelConstruction* context) : OpKernel(context) {}
|
||||||
@ -376,5 +437,8 @@ class StackCloseOp : public OpKernel {
|
|||||||
REGISTER_KERNEL_BUILDER(Name("StackClose").Device(DEVICE_CPU), StackCloseOp);
|
REGISTER_KERNEL_BUILDER(Name("StackClose").Device(DEVICE_CPU), StackCloseOp);
|
||||||
REGISTER_KERNEL_BUILDER(
|
REGISTER_KERNEL_BUILDER(
|
||||||
Name("StackClose").Device(DEVICE_GPU).HostMemory("handle"), StackCloseOp);
|
Name("StackClose").Device(DEVICE_GPU).HostMemory("handle"), StackCloseOp);
|
||||||
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
REGISTER_KERNEL_BUILDER(
|
||||||
|
Name("StackClose").Device(DEVICE_SYCL).HostMemory("handle"), StackCloseOp);
|
||||||
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
Loading…
Reference in New Issue
Block a user