Add bitwise_and operation definition to kernel generator.

PiperOrigin-RevId: 346530044 Change-Id: I7c475b42abfc1b3f1db72aac971e830531769143
2020-12-09 05:29:23 -08:00 · 2020-12-09 05:29:23 -08:00 · 80df7cb64d
commit 80df7cb64d
parent 6e28513ec1
5 changed files with 92 additions and 1 deletions
--- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
@ -21,8 +21,16 @@ REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,


 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
 REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
          int64, uint8, uint16, uint32, uint64);
+#else
+// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
+REGISTER4(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, uint8, uint16,
+          uint32, uint64);
+#endif  // !MLIR_GENERATED_GPU_KERNELS_ENABLED ||
+        // !MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 }  // namespace tensorflow
--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@ -158,6 +158,7 @@ tf_kernel_library(
    name = "cwise_binary_op",
    srcs = [
        "unranked_op_gpu_add.cc",
+        "unranked_op_gpu_bitwise_and.cc",
        "unranked_op_gpu_equal.cc",
    ],
    tags = [
@ -165,6 +166,7 @@ tf_kernel_library(
    ],
    deps = [
        ":add_v2_unranked_kernels",
+        ":bitwise_and_unranked_kernels",
        ":equal_unranked_kernels",
        ":greater_equal_unranked_kernels",
        ":greater_unranked_kernels",
@ -373,6 +375,26 @@ gen_kernel_library(
    # unroll_factors = "4",
 )

+gen_kernel_library(
+    name = "bitwise_and",
+    generate_ranked = False,
+    generate_unranked = True,
+    tile_size = "256,1,1",
+    types = [
+        "i8",
+        "i16",
+        "i32",
+        "i64",
+        # TODO(b/172804967): Enable once fixed.
+        # "ui8",
+        # "ui16",
+        # "ui32",
+        # "ui64",
+    ],
+    # TODO(b/174543802): Enable once fusion heursitics is better.
+    # unroll_factors = "4",
+)
+
 [
    gen_kernel_library(
        name = name,
--- a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>

 #include "absl/container/inlined_vector.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/framework/fake_input.h"
@ -46,6 +47,18 @@ struct BinaryTestParam {
 // To add additional tests for other kernels, search for PLACEHOLDER in this
 // file.

+// Some templates to have versions of the operations that are conditional on
+// the used types. C++17 would make this easier.
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+absl::optional<T> BitwiseAnd(T lhs, T rhs) {
+  return lhs & rhs;
+}
+template <typename T,
+          std::enable_if_t<!std::is_integral<T>::value, bool> = true>
+absl::optional<T> BitwiseAnd(T /*lhs*/, T /*rhs*/) {
+  return absl::nullopt;
+}
+
 class ParametricGpuBinaryOpsTest
    : public OpsTestBase,
      public ::testing::WithParamInterface<BinaryTestParam> {
@ -298,6 +311,11 @@ class ParametricGpuBinaryOpsTest
    if (GetParam().op_name == "AddV2") {
      return static_cast<BaselineOutT>(lhs + rhs);
    }
+    if (GetParam().op_name == "BitwiseAnd") {
+      if (auto val = BitwiseAnd(lhs, rhs)) {
+        return static_cast<BaselineOutT>(val.value());
+      }
+    }
    if (GetParam().op_name == "Equal") {
      return static_cast<BaselineOutT>(lhs == rhs);
    }
@ -305,7 +323,8 @@ class ParametricGpuBinaryOpsTest
    // test here.
    // <PLACEHOLDER>
    LOG(FATAL) << "Cannot generate expected result for op "
-               << GetParam().op_name;
+               << GetParam().op_name << " on input type "
+               << typeid(BaselineType).name();
    return static_cast<BaselineOutT>(lhs);
  }
 };
@ -316,6 +335,11 @@ std::vector<BinaryTestParam> GetBinaryTestParameters() {
       std::vector<DataType>{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_INT64}) {
    parameters.emplace_back("AddV2", dt, dt);
  }
+  // TODO(b/172804967): Expand to unsigned once fixed.
+  for (DataType dt :
+       std::vector<DataType>{DT_INT8, DT_INT16, DT_INT32, DT_INT64}) {
+    parameters.emplace_back("BitwiseAnd", dt, dt);
+  }
  for (DataType dt :
       std::vector<DataType>{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_BOOL, DT_INT8,
                             DT_INT16, DT_INT64}) {
--- a/tensorflow/core/kernels/mlir_generated/op_definitions/bitwise_and.mlir.tmpl
+++ b/tensorflow/core/kernels/mlir_generated/op_definitions/bitwise_and.mlir.tmpl
@ -0,0 +1,6 @@
+func @BitwiseAnd_elem_type(%arg0: tensor<*xelem_type>, %arg1: tensor<*xelem_type>)
+    -> tensor<*xelem_type> attributes {tf_entry, llvm.emit_c_interface} {
+  %0 = "tf.BitwiseAnd"(%arg0, %arg1)
+    : (tensor<*xelem_type>, tensor<*xelem_type>) -> tensor<*xelem_type>
+  return %0 : tensor<*xelem_type>
+}
--- a/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_bitwise_and.cc
+++ b/tensorflow/core/kernels/mlir_generated/unranked_op_gpu_bitwise_and.cc
@ -0,0 +1,31 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i8, DT_INT8, int8);
+GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i16, DT_INT16, int16);
+GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i32, DT_INT32, int32);
+GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i64, DT_INT64, int64);
+
+// TODO(b/172804967): Enable once fixed.
+// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui8, DT_UINT8, uint8);
+// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui16, DT_UINT16, uint16);
+// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui32, DT_UINT32, uint32);
+// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui64, DT_UINT64, uint64);
+
+}  // namespace tensorflow