Add bitwise_and operation definition to kernel generator.

PiperOrigin-RevId: 346530044
Change-Id: I7c475b42abfc1b3f1db72aac971e830531769143
This commit is contained in:
Stephan Herhut 2020-12-09 05:29:23 -08:00 committed by TensorFlower Gardener
parent 6e28513ec1
commit 80df7cb64d
5 changed files with 92 additions and 1 deletions

View File

@ -21,8 +21,16 @@ REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
int64, uint8, uint16, uint32, uint64);
#else
// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
REGISTER4(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, uint8, uint16,
uint32, uint64);
#endif // !MLIR_GENERATED_GPU_KERNELS_ENABLED ||
// !MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
} // namespace tensorflow

View File

@ -158,6 +158,7 @@ tf_kernel_library(
name = "cwise_binary_op",
srcs = [
"unranked_op_gpu_add.cc",
"unranked_op_gpu_bitwise_and.cc",
"unranked_op_gpu_equal.cc",
],
tags = [
@ -165,6 +166,7 @@ tf_kernel_library(
],
deps = [
":add_v2_unranked_kernels",
":bitwise_and_unranked_kernels",
":equal_unranked_kernels",
":greater_equal_unranked_kernels",
":greater_unranked_kernels",
@ -373,6 +375,26 @@ gen_kernel_library(
# unroll_factors = "4",
)
gen_kernel_library(
name = "bitwise_and",
generate_ranked = False,
generate_unranked = True,
tile_size = "256,1,1",
types = [
"i8",
"i16",
"i32",
"i64",
# TODO(b/172804967): Enable once fixed.
# "ui8",
# "ui16",
# "ui32",
# "ui64",
],
# TODO(b/174543802): Enable once fusion heursitics is better.
# unroll_factors = "4",
)
[
gen_kernel_library(
name = name,

View File

@ -19,6 +19,7 @@ limitations under the License.
#include <vector>
#include "absl/container/inlined_vector.h"
#include "absl/types/optional.h"
#include "tensorflow/core/common_runtime/device.h"
#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/framework/fake_input.h"
@ -46,6 +47,18 @@ struct BinaryTestParam {
// To add additional tests for other kernels, search for PLACEHOLDER in this
// file.
// Some templates to have versions of the operations that are conditional on
// the used types. C++17 would make this easier.
template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
absl::optional<T> BitwiseAnd(T lhs, T rhs) {
return lhs & rhs;
}
template <typename T,
std::enable_if_t<!std::is_integral<T>::value, bool> = true>
absl::optional<T> BitwiseAnd(T /*lhs*/, T /*rhs*/) {
return absl::nullopt;
}
class ParametricGpuBinaryOpsTest
: public OpsTestBase,
public ::testing::WithParamInterface<BinaryTestParam> {
@ -298,6 +311,11 @@ class ParametricGpuBinaryOpsTest
if (GetParam().op_name == "AddV2") {
return static_cast<BaselineOutT>(lhs + rhs);
}
if (GetParam().op_name == "BitwiseAnd") {
if (auto val = BitwiseAnd(lhs, rhs)) {
return static_cast<BaselineOutT>(val.value());
}
}
if (GetParam().op_name == "Equal") {
return static_cast<BaselineOutT>(lhs == rhs);
}
@ -305,7 +323,8 @@ class ParametricGpuBinaryOpsTest
// test here.
// <PLACEHOLDER>
LOG(FATAL) << "Cannot generate expected result for op "
<< GetParam().op_name;
<< GetParam().op_name << " on input type "
<< typeid(BaselineType).name();
return static_cast<BaselineOutT>(lhs);
}
};
@ -316,6 +335,11 @@ std::vector<BinaryTestParam> GetBinaryTestParameters() {
std::vector<DataType>{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_INT64}) {
parameters.emplace_back("AddV2", dt, dt);
}
// TODO(b/172804967): Expand to unsigned once fixed.
for (DataType dt :
std::vector<DataType>{DT_INT8, DT_INT16, DT_INT32, DT_INT64}) {
parameters.emplace_back("BitwiseAnd", dt, dt);
}
for (DataType dt :
std::vector<DataType>{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_BOOL, DT_INT8,
DT_INT16, DT_INT64}) {

View File

@ -0,0 +1,6 @@
func @BitwiseAnd_elem_type(%arg0: tensor<*xelem_type>, %arg1: tensor<*xelem_type>)
-> tensor<*xelem_type> attributes {tf_entry, llvm.emit_c_interface} {
%0 = "tf.BitwiseAnd"(%arg0, %arg1)
: (tensor<*xelem_type>, tensor<*xelem_type>) -> tensor<*xelem_type>
return %0 : tensor<*xelem_type>
}

View File

@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i8, DT_INT8, int8);
GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i16, DT_INT16, int16);
GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i32, DT_INT32, int32);
GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, i64, DT_INT64, int64);
// TODO(b/172804967): Enable once fixed.
// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui8, DT_UINT8, uint8);
// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui16, DT_UINT16, uint16);
// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui32, DT_UINT32, uint32);
// GENERATE_AND_REGISTER_BINARY_KERNEL(BitwiseAnd, ui64, DT_UINT64, uint64);
} // namespace tensorflow