From 4a34c4e3b08de5eeee6bc1fe9ff0f32f8b890a73 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 5 Feb 2020 21:22:38 +0000 Subject: [PATCH] working around a known gcc/hcc interface kernel args passing bug --- tensorflow/core/kernels/relu_op.cc | 4 +--- tensorflow/core/kernels/relu_op.h | 4 ++-- tensorflow/core/kernels/relu_op_functor.h | 18 ++++++++++++++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index 75f6649e983..f5d23ca9d25 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -105,9 +105,7 @@ namespace functor { extern template struct Relu6Grad; \ \ template <> \ - void LeakyRelu::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor features, T alpha, \ - typename TTypes::Tensor activations); \ + void LeakyRelu::operator()(LeakyReluArgs args); \ extern template struct LeakyRelu; \ \ template <> \ diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index a4638c70c2c..2ef38a62e40 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -143,8 +143,8 @@ class LeakyReluOp : public UnaryElementWiseOp> { void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { functor::LeakyRelu functor; - functor(context->eigen_device(), input.flat(), alpha_, - output->flat()); + functor({context->eigen_device(), input.flat(), alpha_, + output->flat()}); } private: diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 9337282b3d3..913d5f7ced0 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -98,11 +98,21 @@ struct LeakyRelu { // // features: any shape. // activations: same shape as "features". - void operator()(const Device& d, typename TTypes::ConstTensor features, - T alpha, typename TTypes::Tensor activations) { + + // Need to bundle the args (to the LeakyRelu functor) within a struct + // Not doing so leads to Eigen kernel args not getting populated + // corretly for Eigen::half type (when building on the ROCM platform) + struct LeakyReluArgs { + const Device& d; + typename TTypes::ConstTensor features; + T alpha; + typename TTypes::Tensor activations; + }; + void operator()(LeakyReluArgs args) { // Note that alpha might be > 1 or < 0, so we don't use cwiseMax here. - activations.device(d) = - (features > static_cast(0)).select(features, features * alpha); + args.activations.device(args.d) = + (args.features > static_cast(0)) + .select(args.features, args.features * args.alpha); } };