Generate and test kernels for Greater(Equal), Less(Equal) and NotEqual

PiperOrigin-RevId: 346587201
Change-Id: Ic1aa8d27d687756ec0634390754c238289bd0d2c
This commit is contained in:
Benjamin Kramer 2020-12-09 10:48:56 -08:00 committed by TensorFlower Gardener
parent 8fe56f0807
commit 193e2c295d
13 changed files with 221 additions and 0 deletions

View File

@ -19,8 +19,14 @@ namespace tensorflow {
REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half,
double, int32, int64, uint8, int8, int16, bfloat16);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER7(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half,
double, int64, uint8, int8, int16);
#else
// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
REGISTER(BinaryOp, GPU, "Greater", functor::greater, uint8);
#endif
// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel

View File

@ -19,8 +19,14 @@ namespace tensorflow {
REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float,
Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER7(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float,
Eigen::half, double, int64, uint8, int8, int16);
#else
// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
REGISTER(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, uint8);
#endif
// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel

View File

@ -21,8 +21,14 @@ REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double,
REGISTER4(BinaryOp, CPU, "Less", functor::less, int64, uint8, int8, int16);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double,
int64, uint8, int8, int16);
#else
// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
REGISTER(BinaryOp, GPU, "Less", functor::less, uint8);
#endif
// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel

View File

@ -22,8 +22,14 @@ REGISTER4(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, int8,
int16);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half,
double, int64, uint8, int8, int16);
#else
// TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
REGISTER(BinaryOp, GPU, "LessEqual", functor::less_equal, uint8);
#endif
// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel

View File

@ -21,8 +21,13 @@ REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half,
REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, uint16, uint32,
uint64, qint8, qint16, quint8, quint16);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER4(BinaryOp, GPU, "NotEqual", functor::not_equal_to, float, Eigen::half,
double, uint8);
#else
REGISTER(BinaryOp, GPU, "NotEqual", functor::not_equal_to, uint8);
#endif
// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
// registration requires all int32 inputs and outputs to be in host memory.

View File

@ -25,8 +25,14 @@ namespace tensorflow {
REGISTER6(BinaryOp, CPU, "NotEqual", functor::not_equal_to, int32, int64,
complex64, complex128, tstring, bool);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_UNRANKED_GPU_KERNELS_ENABLED)
REGISTER6(BinaryOp, GPU, "NotEqual", functor::not_equal_to, int8, int16, int64,
complex64, complex128, bool);
#else
REGISTER2(BinaryOp, GPU, "NotEqual", functor::not_equal_to, complex64,
complex128);
#endif
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -162,8 +162,13 @@ tf_kernel_library(
"unranked_op_gpu_bitwise_or.cc",
"unranked_op_gpu_bitwise_xor.cc",
"unranked_op_gpu_equal.cc",
"unranked_op_gpu_greater.cc",
"unranked_op_gpu_greater_equal.cc",
"unranked_op_gpu_less.cc",
"unranked_op_gpu_less_equal.cc",
"unranked_op_gpu_logical_and.cc",
"unranked_op_gpu_logical_or.cc",
"unranked_op_gpu_not_equal.cc",
],
tags = [
"manual",

View File

@ -374,6 +374,21 @@ class ParametricGpuBinaryOpsTest
if (GetParam().op_name == "Equal") {
return static_cast<BaselineOutT>(lhs == rhs);
}
if (GetParam().op_name == "NotEqual") {
return static_cast<BaselineOutT>(lhs != rhs);
}
if (GetParam().op_name == "Greater") {
return static_cast<BaselineOutT>(lhs > rhs);
}
if (GetParam().op_name == "GreaterEqual") {
return static_cast<BaselineOutT>(lhs >= rhs);
}
if (GetParam().op_name == "Less") {
return static_cast<BaselineOutT>(lhs < rhs);
}
if (GetParam().op_name == "LessEqual") {
return static_cast<BaselineOutT>(lhs <= rhs);
}
if (GetParam().op_name == "LogicalAnd") {
if (auto val = LogicalAnd(lhs, rhs)) {
return static_cast<BaselineOutT>(val.value());
@ -411,6 +426,14 @@ std::vector<BinaryTestParam> GetBinaryTestParameters() {
std::vector<DataType>{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_BOOL, DT_INT8,
DT_INT16, DT_INT64}) {
parameters.emplace_back("Equal", dt, DT_BOOL);
parameters.emplace_back("NotEqual", dt, DT_BOOL);
}
for (DataType dt :
{DT_FLOAT, DT_DOUBLE, DT_HALF, DT_INT8, DT_INT16, DT_INT64}) {
parameters.emplace_back("Greater", dt, DT_BOOL);
parameters.emplace_back("GreaterEqual", dt, DT_BOOL);
parameters.emplace_back("Less", dt, DT_BOOL);
parameters.emplace_back("LessEqual", dt, DT_BOOL);
}
parameters.emplace_back("LogicalAnd", DT_BOOL, DT_BOOL,
/*use_constraint=*/false);

View File

@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <complex>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, f16, DT_BOOL, bool, Eigen::half);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, f32, DT_BOOL, bool, float);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, f64, DT_BOOL, bool, double);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, i8, DT_BOOL, bool, int8);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, i16, DT_BOOL, bool, int16);
// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
GENERATE_AND_REGISTER_BINARY_KERNEL2(Greater, i64, DT_BOOL, bool, int64);
} // namespace tensorflow

View File

@ -0,0 +1,32 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <complex>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, f16, DT_BOOL, bool,
Eigen::half);
GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, f32, DT_BOOL, bool, float);
GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, f64, DT_BOOL, bool, double);
GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, i8, DT_BOOL, bool, int8);
GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, i16, DT_BOOL, bool, int16);
// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
GENERATE_AND_REGISTER_BINARY_KERNEL2(GreaterEqual, i64, DT_BOOL, bool, int64);
} // namespace tensorflow

View File

@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <complex>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, f16, DT_BOOL, bool, Eigen::half);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, f32, DT_BOOL, bool, float);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, f64, DT_BOOL, bool, double);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, i8, DT_BOOL, bool, int8);
GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, i16, DT_BOOL, bool, int16);
// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
GENERATE_AND_REGISTER_BINARY_KERNEL2(Less, i64, DT_BOOL, bool, int64);
} // namespace tensorflow

View File

@ -0,0 +1,32 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <complex>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, f16, DT_BOOL, bool,
Eigen::half);
GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, f32, DT_BOOL, bool, float);
GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, f64, DT_BOOL, bool, double);
GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, i8, DT_BOOL, bool, int8);
GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, i16, DT_BOOL, bool, int16);
// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
GENERATE_AND_REGISTER_BINARY_KERNEL2(LessEqual, i64, DT_BOOL, bool, int64);
} // namespace tensorflow

View File

@ -0,0 +1,32 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <complex>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/unranked_op_gpu_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, f16, DT_BOOL, bool, Eigen::half);
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, f32, DT_BOOL, bool, float);
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, f64, DT_BOOL, bool, double);
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i1, DT_BOOL, bool, bool);
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i8, DT_BOOL, bool, int8);
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i16, DT_BOOL, bool, int16);
// TODO(b/25387198): We cannot use a regular GPU kernel for int32.
GENERATE_AND_REGISTER_BINARY_KERNEL2(NotEqual, i64, DT_BOOL, bool, int64);
} // namespace tensorflow