Implemented selu activation #10612 (#10818)

* Implemented selu activation #10612 * fix the error in _SeluGradGrad * update golden file for api change * add XLA kernels for Selu and SeluGrad
2017-07-26 10:18:01 +05:30 · 2017-07-26 10:18:01 +05:30 · c2ce4f68c7
commit c2ce4f68c7
parent 80d57aeadd
21 changed files with 465 additions and 14 deletions
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@ -86,6 +86,15 @@ Status EluGradHelper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Elu", EluGradHelper);
 Status SeluGradHelper(const Scope& scope, const Operation& op,
                      const std::vector<Output>& grad_inputs,
                      std::vector<Output>* grad_outputs) {
  auto dx = internal::SeluGrad(scope, grad_inputs[0], op.output(0));
  grad_outputs->push_back(dx);
  return scope.status();
 }
 REGISTER_GRADIENT_OP("Selu", SeluGradHelper);
 }  // anonymous namespace
 }  // namespace ops
 }  // namespace tensorflow
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@ -103,5 +103,15 @@ TEST_F(NNGradTest, EluGrad) {
  RunTest(x, x_init_value, y, shape);
 }
 TEST_F(NNGradTest, SeluGrad) {
  TensorShape shape({5, 2});
  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
  auto y = Selu(scope_, x);
  Tensor x_init_value = test::AsTensor<float>(
      {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
      {5, 2});
  RunTest(x, x_init_value, y, shape);
 }
 }  // namespace
 }  // namespace tensorflow
--- a/tensorflow/cc/ops/op_gen_overrides.pbtxt
+++ b/tensorflow/cc/ops/op_gen_overrides.pbtxt
@ -177,6 +177,7 @@ op { name: "MaxPoolGradWithArgmax" hide: true }
 op { name: "ReluGrad" hide: true }
 op { name: "Relu6Grad" hide: true }
 op { name: "EluGrad" hide: true }
 op { name: "SeluGrad" hide: true }
 op { name: "SoftplusGrad" hide: true }
 op { name: "SoftsignGrad" hide: true }
 op { name: "FractionalAvgPoolGrad" hide: true }
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@ -113,6 +113,14 @@ class BinaryOpsTest(XLATestCase):
          np.array([-.6, -.4, -.2, 0, .2, .4], dtype=dtype),
          expected=np.array([0.4, 1.2, 2.4, 4, 5, 6], dtype=dtype))
      self._testBinary(
          gen_nn_ops._selu_grad,
          np.array([1, 2, 3, 4, 5, 6], dtype=dtype),
          np.array([-.6, -.4, -.2, .2, .4, .6], dtype=dtype),
          expected=np.array(
              [1.158099340847, 2.7161986816948, 4.67429802254,
               4.202803949422, 5.2535049367774, 6.30420592413], dtype=dtype))
      self._testBinary(
          gen_nn_ops._relu_grad,
          np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@ -1434,6 +1434,23 @@ TEST_F(OpTest, EluGrad) {
  });
 }
 TEST_F(OpTest, Selu) {
  Repeatedly([this]() {
    return ExpectTfAndXlaOutputsAreClose(
        OpTestBuilder("Selu").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
  });
 }
 TEST_F(OpTest, SeluGrad) {
  Repeatedly([this]() {
    auto dims = RandomDims();
    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("SeluGrad")
                                             .RandomInput(DT_FLOAT, dims)
                                             .RandomInput(DT_FLOAT, dims)
                                             .Attr("T", DT_FLOAT));
  });
 }
 TEST_F(OpTest, Equal) {
  Repeatedly([this]() {
    DataType type = Choose<DataType>({DT_INT32, DT_FLOAT});
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@ -229,6 +229,11 @@ class UnaryOpsTest(XLATestCase):
          np.array([[-1, 0, 1]], dtype=dtype),
          expected=np.array([[-0.63212056, 0, 1]], dtype=dtype))
      self._assertOpOutputMatchesExpected(
          nn_ops.selu,
          np.array([[-1, 0, 1]], dtype=dtype),
          expected=np.array([[-1.11133074, 0., 1.05070099]], dtype=dtype))
      self._assertOpOutputMatchesExpected(
          nn_ops.relu,
          np.array([[-1, 1]], dtype=dtype),
--- a/tensorflow/compiler/tf2xla/kernels/elu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/elu_op.cc
@ -61,5 +61,49 @@ class EluGradOp : public XlaOpKernel {
 REGISTER_XLA_OP(Name("Elu"), EluOp);
 REGISTER_XLA_OP(Name("EluGrad"), EluGradOp);
 class SeluOp : public XlaOpKernel {
 public:
  explicit SeluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
  // Computes the max of the scalar input x and 0.
  void Compile(XlaOpKernelContext* ctx) override {
    xla::ComputationBuilder* b = ctx->builder();
    const auto zero = XlaHelpers::Zero(b, input_type(0));
    const auto one = XlaHelpers::One(b, input_type(0));
    const auto scale = XlaHelpers::FloatLiteral(b, input_type(0),
            1.0507009873554804934193349852946);
    const auto scale_alpha = XlaHelpers::FloatLiteral(b, input_type(0),
            1.7580993408473768599402175208123);
    const auto pred = b->Gt(ctx->Input(0), zero);
    const auto expm1 = b->Sub(b->Exp(ctx->Input(0)), one);
    ctx->SetOutput(0, b->Select(pred, b->Mul(scale, ctx->Input(0)),
                                      b->Mul(scale_alpha, expm1)));
  }
 };
 class SeluGradOp : public XlaOpKernel {
 public:
  explicit SeluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
  // Return the lhs (incoming gradient) if the rhs (input feature) > 0,
  // otherwise return lhs * (1 + rhs).
  void Compile(XlaOpKernelContext* ctx) override {
    xla::ComputationBuilder* b = ctx->builder();
    const auto zero = XlaHelpers::Zero(b, input_type(0));
    const auto one = XlaHelpers::One(b, input_type(0));
    const auto scale = XlaHelpers::FloatLiteral(b, input_type(0),
            1.0507009873554804934193349852946);
    const auto scale_alpha = XlaHelpers::FloatLiteral(b, input_type(0),
            1.7580993408473768599402175208123);
    const auto grad = ctx->Input(0);
    const auto activation = ctx->Input(1);
    const auto lin_grad = b->Mul(grad, scale);
    const auto exp_grad = b->Mul(grad, b->Add(activation, scale_alpha));
    const auto pred = b->Gt(activation, zero);
    ctx->SetOutput(0, b->Select(pred, lin_grad, exp_grad));
  }
 };
 REGISTER_XLA_OP(Name("Selu"), SeluOp);
 REGISTER_XLA_OP(Name("SeluGrad"), SeluGradOp);
 }  // namespace
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/relu_op.cc
+++ b/tensorflow/core/kernels/relu_op.cc
@ -50,15 +50,21 @@ typedef Eigen::SyclDevice SYCLDevice;
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS);
 #undef REGISTER_RELU_KERNELS
-#define REGISTER_ELU_KERNELS(type)                                  \
+#define REGISTER_ELU_KERNELS(type)                                   \
-  REGISTER_KERNEL_BUILDER(                                          \
+  REGISTER_KERNEL_BUILDER(                                           \
-      Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
-      EluOp<CPUDevice, type>);                                      \
+      EluOp<CPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                          \
+  REGISTER_KERNEL_BUILDER(                                           \
-      Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),  \
-      EluGradOp<CPUDevice, type>)
+      EluGradOp<CPUDevice, type>);                                   \
  REGISTER_KERNEL_BUILDER(                                           \
      Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
      SeluOp<CPUDevice, type>);                                      \
  REGISTER_KERNEL_BUILDER(                                           \
      Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
      SeluGradOp<CPUDevice, type>)
-// Elu only makes sense with float or double.
+// Elu and Selu only make sense with float or double.
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_ELU_KERNELS);
 #undef REGISTER_ELU_KERNELS
@ -103,7 +109,23 @@ namespace functor {
      const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
      typename TTypes<T>::ConstTensor activations,                             \
      typename TTypes<T>::Tensor backprops);                                   \
-  extern template struct EluGrad<GPUDevice, T>;
+  extern template struct EluGrad<GPUDevice, T>;                                \
                                                                               \
  template <>                                                                  \
  void Selu<GPUDevice, T>::operator()(                                         \
      const GPUDevice& d,                                                      \
      typename TTypes<T>::ConstTensor features,                                \
      typename TTypes<T>::Tensor activations);                                 \
  extern template struct Selu<GPUDevice, T>;                                   \
                                                                               \
  template <>                                                                  \
  void SeluGrad<GPUDevice, T>::operator()(                                     \
      const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
      typename TTypes<T>::ConstTensor activations,                             \
      typename TTypes<T>::Tensor backprops);                                   \
  extern template struct SeluGrad<GPUDevice, T>;
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
 }  // namespace functor
@ -127,7 +149,15 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
      EluOp<GPUDevice, type>);                                        \
  REGISTER_KERNEL_BUILDER(                                            \
      Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),   \
-      EluGradOp<GPUDevice, type>)
+      EluGradOp<GPUDevice, type>);                                    \
  REGISTER_KERNEL_BUILDER(                                            \
      Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
      SeluOp<GPUDevice, type>);                                       \
  REGISTER_KERNEL_BUILDER(                                            \
      Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),  \
      SeluGradOp<GPUDevice, type>)
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
@ -154,7 +184,15 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
      EluOp<SYCLDevice, type>);                                        \
  REGISTER_KERNEL_BUILDER(                                             \
      Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),   \
-      EluGradOp<SYCLDevice, type>)
+      EluGradOp<SYCLDevice, type>);                                    \
  REGISTER_KERNEL_BUILDER(                                             \
      Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
      SeluOp<SYCLDevice, type>);                                       \
  REGISTER_KERNEL_BUILDER(                                             \
      Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),  \
      SeluGradOp<SYCLDevice, type>)
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
 #undef REGISTER_SYCL_KERNELS
--- a/tensorflow/core/kernels/relu_op.h
+++ b/tensorflow/core/kernels/relu_op.h
@ -173,6 +173,48 @@ void EluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
          output->flat<T>());
 }
 template <typename Device, typename T>
 class SeluOp : public UnaryElementWiseOp<T, SeluOp<Device, T>> {
 public:
  using UnaryElementWiseOp<T, SeluOp<Device, T>>::UnaryElementWiseOp;
  void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
    functor::Selu<Device, T> functor;
    functor(context->eigen_device<Device>(), input.flat<T>(),
            output->flat<T>());
  }
 };
 template <typename Device, typename T>
 class SeluGradOp : public BinaryElementWiseOp<T, SeluGradOp<Device, T>> {
 public:
  using BinaryElementWiseOp<T, SeluGradOp<Device, T>>::BinaryElementWiseOp;
  void OperateNoTemplate(OpKernelContext* context, const Tensor& g,
                         const Tensor& a, Tensor* output);
  // INPUTS:
  //   g (gradients): backpropagated gradients
  //   a (outputs): outputs of the SeluOp()
  // OUTPUT:
  //   gradients to backprop
  template <int NDIMS>
  void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a,
               Tensor* output) {
    OperateNoTemplate(context, g, a, output);
  }
 };
 template <typename Device, typename T>
 void SeluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
                                              const Tensor& g, const Tensor& a,
                                              Tensor* output) {
  if (!ReluHelpers::ValidateSameSize(context, g, a)) return;
  functor::SeluGrad<Device, T> functor;
  functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(),
          output->flat<T>());
 }
 }  // namespace tensorflow
 #undef EIGEN_USE_THREADS
--- a/tensorflow/core/kernels/relu_op_functor.h
+++ b/tensorflow/core/kernels/relu_op_functor.h
@ -125,6 +125,46 @@ struct EluGrad {
  }
 };
 // Functor used by SeluOp to do the computations.
 template <typename Device, typename T>
 struct Selu {
  // Computes Selu activation.
  //
  // features: any shape.
  // activations: same shape as "features".
  void operator()(const Device& d, typename TTypes<T>::ConstTensor features,
                  typename TTypes<T>::Tensor activations) {
    // features.constant(?)
    const auto scale = static_cast<T>(1.0507009873554804934193349852946);
    const auto scale_alpha = static_cast<T>(1.7580993408473768599402175208123);
    const auto one = static_cast<T>(1);
    const auto zero = static_cast<T>(0);
    activations.device(d) =
        (features < zero)
            .select(scale_alpha * (features.exp() - features.constant(one)),
                    scale * features);
  }
 };
 // Functor used by SeluGradOp to do the computations.
 template <typename Device, typename T>
 struct SeluGrad {
  // Computes SeluGrad backprops.
  //
  // gradients: gradients backpropagated to the Selu op.
  // activations: outputs of the Selu op.
  // backprops: gradients to backpropagate to the Selu inputs.
  void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
                  typename TTypes<T>::ConstTensor activations,
                  typename TTypes<T>::Tensor backprops) {
    const auto scale = static_cast<T>(1.0507009873554804934193349852946);
    const auto scale_alpha = static_cast<T>(1.7580993408473768599402175208123);
    backprops.device(d) =
        (activations < static_cast<T>(0)).select(
            gradients * (activations + scale_alpha), gradients * scale);
  }
 };
 }  // namespace functor
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/relu_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc
@ -35,7 +35,9 @@ typedef Eigen::GpuDevice GPUDevice;
  template struct functor::Relu6<GPUDevice, T>;     \
  template struct functor::Relu6Grad<GPUDevice, T>; \
  template struct functor::Elu<GPUDevice, T>;       \
-  template struct functor::EluGrad<GPUDevice, T>;
+  template struct functor::EluGrad<GPUDevice, T>;   \
  template struct functor::Selu<GPUDevice, T>;      \
  template struct functor::SeluGrad<GPUDevice, T>;
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@ -1779,6 +1779,33 @@ backprops: The gradients: `gradients * (outputs + 1)` if outputs < 0,
 `gradients` otherwise.
 )doc");
 REGISTER_OP("Selu")
    .Input("features: T")
    .Output("activations: T")
    .Attr("T: {half, float, double}")
    .SetShapeFn(shape_inference::UnchangedShape)
    .Doc(R"doc(
 Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
 if < 0, `scale * features` otherwise.
 See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
 )doc");
 REGISTER_OP("SeluGrad")
    .Input("gradients: T")
    .Input("outputs: T")
    .Output("backprops: T")
    .Attr("T: {half, float, double}")
    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
    .Doc(R"doc(
 Computes gradients for the scaled exponential linear (Selu) operation.
 gradients: The backpropagated gradients to the corresponding Selu operation.
 outputs: The outputs of the corresponding Selu operation.
 backprops: The gradients: `gradients * (outputs + scale * alpha)`
 if outputs < 0, `scale * gradients` otherwise.
 )doc");
 REGISTER_OP("Softplus")
    .Input("features: T")
    .Output("activations: T")
--- a/tensorflow/core/ops/nn_ops_test.cc
+++ b/tensorflow/core/ops/nn_ops_test.cc
@ -412,7 +412,8 @@ TEST(NNOpsTest, Dilation2DBackpropFilter_ShapeFn) {
 TEST(NNOpsTest, MergeBothInputs_ShapeFn) {
  for (const char* op_name :
-       {"ReluGrad", "Relu6Grad", "EluGrad", "SoftplusGrad", "SoftsignGrad"}) {
+       {"ReluGrad", "Relu6Grad", "EluGrad", "SeluGrad", "SoftplusGrad",
        "SoftsignGrad"}) {
    ShapeInferenceTestOp op(op_name);
    INFER_OK(op, "?;?", "in0|in1");
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@ -23383,6 +23383,60 @@ op {
  summary: "Computes the eigen decomposition of one or more square self-adjoint matrices."
  description: "Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in\n`input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`.\n\n```python\n# a is a tensor.\n# e is a tensor of eigenvalues.\n# v is a tensor of eigenvectors.\ne, v = self_adjoint_eig(a)\ne = self_adjoint_eig(a, compute_v=False)\n```"
 }
 op {
  name: "Selu"
  input_arg {
    name: "features"
    type_attr: "T"
  }
  output_arg {
    name: "activations"
    type_attr: "T"
  }
  attr {
    name: "T"
    type: "type"
    allowed_values {
      list {
        type: DT_HALF
        type: DT_FLOAT
        type: DT_DOUBLE
      }
    }
  }
  summary: "Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` if < 0, `scale * features` otherwise."
  description: "See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)"
 }
 op {
  name: "SeluGrad"
  input_arg {
    name: "gradients"
    description: "The backpropagated gradients to the corresponding Selu operation."
    type_attr: "T"
  }
  input_arg {
    name: "outputs"
    description: "The outputs of the corresponding Selu operation."
    type_attr: "T"
  }
  output_arg {
    name: "backprops"
    description: "The gradients: `gradients * (outputs + scale * alpha)` if outputs < 0,\n`scale * gradients` otherwise."
    type_attr: "T"
  }
  attr {
    name: "T"
    type: "type"
    allowed_values {
      list {
        type: DT_HALF
        type: DT_FLOAT
        type: DT_DOUBLE
      }
    }
  }
  summary: "Computes gradients for the scaled exponential linear (Selu) operation."
 }
 op {
  name: "SerializeManySparse"
  input_arg {
--- a/tensorflow/docs_src/api_guides/python/nn.md
+++ b/tensorflow/docs_src/api_guides/python/nn.md
@ -8,7 +8,7 @@ Note: Functions taking `Tensor` arguments can also take anything accepted by
 ## Activation Functions
 The activation ops provide different types of nonlinearities for use in neural
-networks.  These include smooth nonlinearities (`sigmoid`, `tanh`, `elu`,
+networks. These include smooth nonlinearities (`sigmoid`, `tanh`, `elu`, `selu`,
 `softplus`, and `softsign`), continuous but not everywhere differentiable
 functions (`relu`, `relu6`, `crelu` and `relu_x`), and random regularization
 (`dropout`).
@ -20,6 +20,7 @@ shape as the input tensor.
 *   @{tf.nn.relu6}
 *   @{tf.nn.crelu}
 *   @{tf.nn.elu}
 *   @{tf.nn.selu}
 *   @{tf.nn.softplus}
 *   @{tf.nn.softsign}
 *   @{tf.nn.dropout}
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@ -16265,6 +16265,28 @@ func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyReso
 	return scope.AddOperation(opspec)
 }
 // Computes gradients for the scaled exponential linear (Selu) operation.
 //
 // Arguments:
 //	gradients: The backpropagated gradients to the corresponding Selu operation.
 //	outputs: The outputs of the corresponding Selu operation.
 //
 // Returns The gradients: `gradients * (outputs + scale * alpha)` if outputs < 0,
 // `scale * gradients` otherwise.
 func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
 		Type: "SeluGrad",
 		Input: []tf.Input{
 			gradients, outputs,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 // Converts each string in the input Tensor to its hash mod by a number of buckets.
 //
 // The hash function is deterministic on the content of the string within the
@ -20541,6 +20563,24 @@ func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
 	return op.Output(0)
 }
 // Computes scaled exponential linear: `1.758099 * (exp(features) - 1)` if < 0,
 // `1.050701 * features` otherwise.
 //
 // See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
 func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
 		Type: "Selu",
 		Input: []tf.Input{
 			features,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 // Computes square of x element-wise.
 //
 // I.e., \\(y = x * x = x^2\\).
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@ -320,6 +320,97 @@ class EluTest(test.TestCase):
    self.assertLess(err, 1e-6)
 class SeluTest(test.TestCase):
  def _npSelu(self, np_features):
    scale = 1.0507009873554804934193349852946
    scale_alpha = 1.7580993408473768599402175208123
    return np.where(np_features < 0, scale_alpha * (np.exp(np_features) - 1),
                    scale * np_features)
  def testNpSelu(self):
    self.assertAllClose(
        np.array([[-1.0433095, 0.73549069, -0.6917582, 0.3152103 , -0.16730527],
                 [0.1050701 , -0.45566732, 0.5253505, -0.88505305, 0.9456309]]),
        self._npSelu(
            np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7, 0.9]
                     ])))
  def _testSelu(self, np_features, use_gpu=False):
    np_selu = self._npSelu(np_features)
    with self.test_session(use_gpu=use_gpu):
      selu = nn_ops.selu(np_features)
      tf_selu = selu.eval()
    self.assertAllClose(np_selu, tf_selu)
    self.assertShapeEqual(np_selu, selu)
  def testNumbers(self):
    for t in [np.float16, np.float32, np.float64]:
      self._testSelu(
          np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
          use_gpu=False)
      self._testSelu(
          np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
          use_gpu=True)
  def testGradientFloat32(self):
    with self.test_session():
      x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
      x = constant_op.constant(x_val, name="x")
      y = nn_ops.selu(x, name="selu")
      x_init = np.asarray(x_val, dtype=np.float32, order="F")
      err = gradient_checker.compute_gradient_error(
          x, [2, 5], y, [2, 5], x_init_value=x_init)
    print("selu (float32) gradient err = ", err)
    self.assertLess(err, 1e-4)
  def testGradientFloat64(self):
    with self.test_session():
      x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]]
      x = constant_op.constant(x_val, dtype=dtypes.float64, name="x")
      y = nn_ops.selu(x, name="selu")
      x_init = np.asarray(x_val, dtype=np.float64, order="F")
      err = gradient_checker.compute_gradient_error(
          x, [2, 5], y, [2, 5], x_init_value=x_init)
    print("selu (float64) gradient err = ", err)
    self.assertLess(err, 1e-6)
  def testGradGradFloat32(self):
    with self.test_session():
      x = constant_op.constant(
          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
          shape=[2, 5],
          name="x")
      y = nn_ops.selu(x, name="selu")
      z = gradients_impl.gradients(y, x)
      x_init = np.asarray(
          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
          dtype=np.float32,
          order="F")
      err = gradient_checker.compute_gradient_error(
          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
    print("selu (float32) gradient of gradient err = ", err)
    self.assertLess(err, 1e-4)
  def testGradGradFloat64(self):
    with self.test_session():
      x = constant_op.constant(
          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
          shape=[2, 5],
          dtype=dtypes.float64,
          name="x")
      y = nn_ops.selu(x, name="selu")
      z = gradients_impl.gradients(y, x)
      x_init = np.asarray(
          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
          dtype=np.float64,
          order="F")
      err = gradient_checker.compute_gradient_error(
          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
    print("selu (float64) gradient of gradient err = ", err)
    self.assertLess(err, 1e-6)
 class CreluTest(test.TestCase):
  def testCreluShape(self):
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@ -290,6 +290,7 @@ MaxPool3DGradGrad
 ReluGrad
 Relu6Grad
 EluGrad
 SeluGrad
 SoftplusGrad
 SoftsignGrad
 TopK
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@ -22,6 +22,7 @@ See the @{$python/nn} guide.
@@relu6
@@crelu
@@elu
@@selu
@@softplus
@@softsign
@@dropout
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@ -335,6 +335,16 @@ def _EluGradGrad(op, grad):
                                          dtype=elu_x.dtype)))
@ops.RegisterGradient("SeluGrad")
 def _SeluGradGrad(op, grad):
  x = op.inputs[1]
  scale_alpha = 1.7580993408473768599402175208123
  return (gen_nn_ops._elu_grad(grad, op.outputs[0]),
          array_ops.where(
              x < 0., gen_nn_ops._elu_grad(grad, op.outputs[0] + scale_alpha),
              array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)))
@ops.RegisterGradient("Relu6")
 def _Relu6Grad(op, grad):
  return gen_nn_ops._relu6_grad(grad, op.inputs[0])
@ -345,6 +355,11 @@ def _EluGrad(op, grad):
  return gen_nn_ops._elu_grad(grad, op.outputs[0])
@ops.RegisterGradient("Selu")
 def _SeluGrad(op, grad):
  return gen_nn_ops._selu_grad(grad, op.outputs[0])
@ops.RegisterGradient("Softplus")
 def _SoftplusGrad(op, grad):
  return gen_nn_ops._softplus_grad(grad, op.inputs[0])
--- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt
@ -256,6 +256,10 @@ tf_module {
    name: "sampled_softmax_loss"
    argspec: "args=[\'weights\', \'biases\', \'labels\', \'inputs\', \'num_sampled\', \'num_classes\', \'num_true\', \'sampled_values\', \'remove_accidental_hits\', \'partition_strategy\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'None\', \'True\', \'mod\', \'sampled_softmax_loss\'], "
  }
  member_method {
    name: "selu"
    argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
  }
  member_method {
    name: "separable_conv2d"
    argspec: "args=[\'input\', \'depthwise_filter\', \'pointwise_filter\', \'strides\', \'padding\', \'rate\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "