Generate and test kernels for Greater(Equal), Less(Equal) and NotEqual

PiperOrigin-RevId: 346587201 Change-Id: Ic1aa8d27d687756ec0634390754c238289bd0d2c
2020-12-09 10:48:56 -08:00 · 2020-12-09 10:48:56 -08:00 · 193e2c295d
commit 193e2c295d
parent 8fe56f0807
13 changed files with 221 additions and 0 deletions
--- a/tensorflow/core/kernels/cwise_op_greater.cc
+++ b/tensorflow/core/kernels/cwise_op_greater.cc
@ -19,8 +19,14 @@ namespace tensorflow {
 REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half,
          double, int32, int64, uint8, int8, int16, bfloat16);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER7(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half,
          double, int64, uint8, int8, int16);
+#else
+// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
+REGISTER(BinaryOp, GPU, "Greater", functor::greater, uint8);
+#endif

 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
--- a/tensorflow/core/kernels/cwise_op_greater_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc
@ -19,8 +19,14 @@ namespace tensorflow {
 REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float,
          Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER7(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float,
          Eigen::half, double, int64, uint8, int8, int16);
+#else
+// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
+REGISTER(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, uint8);
+#endif

 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
--- a/tensorflow/core/kernels/cwise_op_less.cc
+++ b/tensorflow/core/kernels/cwise_op_less.cc
@ -21,8 +21,14 @@ REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double,
 REGISTER4(BinaryOp, CPU, "Less", functor::less, int64, uint8, int8, int16);

 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double,
          int64, uint8, int8, int16);
+#else
+// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
+REGISTER(BinaryOp, GPU, "Less", functor::less, uint8);
+#endif

 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
--- a/tensorflow/core/kernels/cwise_op_less_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_less_equal.cc
@ -22,8 +22,14 @@ REGISTER4(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, int8,
          int16);

 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half,
          double, int64, uint8, int8, int16);
+#else
+// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
+REGISTER(BinaryOp, GPU, "LessEqual", functor::less_equal, uint8);
+#endif

 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
--- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
+++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
@ -21,8 +21,13 @@ REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half,
 REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, uint16, uint32,
          uint64, qint8, qint16, quint8, quint16);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER4(BinaryOp, GPU, "NotEqual", functor::not_equal_to, float, Eigen::half,
          double, uint8);
+#else
+REGISTER(BinaryOp, GPU, "NotEqual", functor::not_equal_to, uint8);
+#endif
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
--- a/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc
+++ b/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc
@ -25,8 +25,14 @@ namespace tensorflow {
 REGISTER6(BinaryOp, CPU, "NotEqual", functor::not_equal_to, int32, int64,
          complex64, complex128, tstring, bool);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER6(BinaryOp, GPU, "NotEqual", functor::not_equal_to, int8, int16, int64,
          complex64, complex128, bool);
+#else
+REGISTER2(BinaryOp, GPU, "NotEqual", functor::not_equal_to, complex64,
+          complex128);
+#endif

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@ -162,8 +162,13 @@ tf_kernel_library(
        "unranked_op_gpu_bitwise_or.cc",
        "unranked_op_gpu_bitwise_xor.cc",
        "unranked_op_gpu_equal.cc",
+        "unranked_op_gpu_greater.cc",
+        "unranked_op_gpu_greater_equal.cc",
+        "unranked_op_gpu_less.cc",
+        "unranked_op_gpu_less_equal.cc",
        "unranked_op_gpu_logical_and.cc",
        "unranked_op_gpu_logical_or.cc",
+        "unranked_op_gpu_not_equal.cc",
    ],
    tags = [
        "manual",
--- a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
@ -374,6 +374,21 @@ class ParametricGpuBinaryOpsTest
    if (GetParam().op_name == "Equal") {
      return static_cast<BaselineOutT>(lhs == rhs);
    }
+    if (GetParam().op_name == "NotEqual") {
+      return static_cast<BaselineOutT>(lhs != rhs);
+    }
+    if (GetParam().op_name == "Greater") {
+      return static_cast<BaselineOutT>(lhs > rhs);
+    }
+    if (GetParam().op_name == "GreaterEqual") {
+      return static_cast<BaselineOutT>(lhs >= rhs);
+    }
+    if (GetParam().op_name == "Less") {
+      return static_cast<BaselineOutT>(lhs < rhs);
+    }
+    if (GetParam().op_name == "LessEqual") {
+      return static_cast<BaselineOutT>(lhs <= rhs);
+    }
    if (GetParam().op_name == "LogicalAnd") {
      if (auto val = LogicalAnd(lhs, rhs)) {
        return static_cast<BaselineOutT>(val.value());
@ -411,6 +426,14 @@ std::vector<BinaryTestParam> GetBinaryTestParameters() {
       std::vector<DataType>{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_BOOL, DT_INT8,
                             DT_INT16, DT_INT64}) {
    parameters.emplace_back("Equal", dt, DT_BOOL);
+    parameters.emplace_back("NotEqual", dt, DT_BOOL);
+  }
+  for (DataType dt :
+       {DT_FLOAT, DT_DOUBLE, DT_HALF, DT_INT8, DT_INT16, DT_INT64}) {
+    parameters.emplace_back("Greater", dt, DT_BOOL);
+    parameters.emplace_back("GreaterEqual", dt, DT_BOOL);
+    parameters.emplace_back("Less", dt, DT_BOOL);
+    parameters.emplace_back("LessEqual", dt, DT_BOOL);
  }
  parameters.emplace_back("LogicalAnd", DT_BOOL, DT_BOOL,
                          /*use_constraint=*/false);
--- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_greater.cc
+++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_greater.cc
@ -0,0 +1,31 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <complex>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, f16, DT_BOOL, bool, Eigen::half);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, f32, DT_BOOL, bool, float);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, f64, DT_BOOL, bool, double);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, i8, DT_BOOL, bool, int8);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, i16, DT_BOOL, bool, int16);
+// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, i64, DT_BOOL, bool, int64);
+
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_greater_equal.cc
+++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_greater_equal.cc
@ -0,0 +1,32 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <complex>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, f16, DT_BOOL, bool,
+                                     Eigen::half);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, f32, DT_BOOL, bool, float);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, f64, DT_BOOL, bool, double);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, i8, DT_BOOL, bool, int8);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, i16, DT_BOOL, bool, int16);
+// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
+GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, i64, DT_BOOL, bool, int64);
+
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_less.cc
+++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_less.cc
@ -0,0 +1,31 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <complex>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, f16, DT_BOOL, bool, Eigen::half);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, f32, DT_BOOL, bool, float);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, f64, DT_BOOL, bool, double);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, i8, DT_BOOL, bool, int8);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, i16, DT_BOOL, bool, int16);
+// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
+GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, i64, DT_BOOL, bool, int64);
+
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_less_equal.cc
+++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_less_equal.cc
@ -0,0 +1,32 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <complex>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, f16, DT_BOOL, bool,
+                                     Eigen::half);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, f32, DT_BOOL, bool, float);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, f64, DT_BOOL, bool, double);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, i8, DT_BOOL, bool, int8);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, i16, DT_BOOL, bool, int16);
+// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
+GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, i64, DT_BOOL, bool, int64);
+
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_not_equal.cc
+++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_not_equal.cc
@ -0,0 +1,32 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <complex>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, f16, DT_BOOL, bool, Eigen::half);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, f32, DT_BOOL, bool, float);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, f64, DT_BOOL, bool, double);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i1, DT_BOOL, bool, bool);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i8, DT_BOOL, bool, int8);
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i16, DT_BOOL, bool, int16);
+// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
+GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i64, DT_BOOL, bool, int64);
+
+}  // namespace tensorflow