[KERNEL_GEN] Add kernel generation for FloorDiv.
PiperOrigin-RevId: 348516873 Change-Id: I5b207454fe7bcc010804cd1652260b3b1a3c07fc
This commit is contained in:
parent
265454ac0f
commit
4e7e6df7d7
@ -24,9 +24,12 @@ REGISTER4(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
|
|||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
|
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
|
||||||
int64);
|
int64);
|
||||||
|
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
|
||||||
|
!defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
|
||||||
REGISTER3(BinaryOp, GPU, "FloorDiv", functor::floor_div_real, float,
|
REGISTER3(BinaryOp, GPU, "FloorDiv", functor::floor_div_real, float,
|
||||||
Eigen::half, double);
|
Eigen::half, double);
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
// A special GPU kernel for int32.
|
// A special GPU kernel for int32.
|
||||||
|
@ -135,6 +135,7 @@ tf_kernel_library(
|
|||||||
"gpu_op_bitwise_or.cc",
|
"gpu_op_bitwise_or.cc",
|
||||||
"gpu_op_bitwise_xor.cc",
|
"gpu_op_bitwise_xor.cc",
|
||||||
"gpu_op_equal.cc",
|
"gpu_op_equal.cc",
|
||||||
|
"gpu_op_floor_div.cc",
|
||||||
"gpu_op_greater.cc",
|
"gpu_op_greater.cc",
|
||||||
"gpu_op_greater_equal.cc",
|
"gpu_op_greater_equal.cc",
|
||||||
"gpu_op_left_shift.cc",
|
"gpu_op_left_shift.cc",
|
||||||
@ -155,6 +156,7 @@ tf_kernel_library(
|
|||||||
":bitwise_or_kernels",
|
":bitwise_or_kernels",
|
||||||
":bitwise_xor_kernels",
|
":bitwise_xor_kernels",
|
||||||
":equal_kernels",
|
":equal_kernels",
|
||||||
|
":floor_div_kernels",
|
||||||
":gpu_ops_base",
|
":gpu_ops_base",
|
||||||
":greater_equal_kernels",
|
":greater_equal_kernels",
|
||||||
":greater_kernels",
|
":greater_kernels",
|
||||||
@ -537,6 +539,20 @@ gen_kernel_library(
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
gen_kernel_library(
|
||||||
|
name = "floor_div",
|
||||||
|
tile_size = "256",
|
||||||
|
# TODO(172804967): Enable for integer types also once unsigned integers are
|
||||||
|
# supported.
|
||||||
|
types = [
|
||||||
|
"f16",
|
||||||
|
"f32",
|
||||||
|
"f64",
|
||||||
|
],
|
||||||
|
# TODO(b/174543802): Enable once fusion heursitics is better.
|
||||||
|
# unroll_factors = "4",
|
||||||
|
)
|
||||||
|
|
||||||
# Kernels that support all floating-point types.
|
# Kernels that support all floating-point types.
|
||||||
[
|
[
|
||||||
gen_kernel_library(
|
gen_kernel_library(
|
||||||
|
@ -597,5 +597,25 @@ GENERATE_DEFAULT_TESTS_2(LogicalOr, /*test_name=*/Bool, /*T=*/bool,
|
|||||||
/*BaselineOutT=*/bool, baseline_logical_or,
|
/*BaselineOutT=*/bool, baseline_logical_or,
|
||||||
/*use_constraint=*/false)
|
/*use_constraint=*/false)
|
||||||
|
|
||||||
|
/// Test `tf.FloorDiv`.
|
||||||
|
template <typename T>
|
||||||
|
T baseline_floor_div(T lhs, T rhs) {
|
||||||
|
return std::floor(lhs / rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
Eigen::half baseline_floor_div(Eigen::half lhs, Eigen::half rhs) {
|
||||||
|
return static_cast<Eigen::half>(std::floor(static_cast<float>(lhs / rhs)));
|
||||||
|
}
|
||||||
|
|
||||||
|
GENERATE_DEFAULT_TESTS(FloorDiv,
|
||||||
|
/*test_name=*/Half, Eigen::half, Eigen::half,
|
||||||
|
baseline_floor_div);
|
||||||
|
GENERATE_DEFAULT_TESTS(FloorDiv,
|
||||||
|
/*test_name=*/Float, float, float, baseline_floor_div);
|
||||||
|
GENERATE_DEFAULT_TESTS(FloorDiv,
|
||||||
|
/*test_name=*/Double, double, double,
|
||||||
|
baseline_floor_div);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // end namespace tensorflow
|
} // end namespace tensorflow
|
||||||
|
24
tensorflow/core/kernels/mlir_generated/gpu_op_floor_div.cc
Normal file
24
tensorflow/core/kernels/mlir_generated/gpu_op_floor_div.cc
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||||
|
#include "tensorflow/core/kernels/mlir_generated/gpu_ops_base.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
GENERATE_AND_REGISTER_BINARY_KERNEL(FloorDiv, f16, DT_HALF, Eigen::half);
|
||||||
|
GENERATE_AND_REGISTER_BINARY_KERNEL(FloorDiv, f32, DT_FLOAT, float);
|
||||||
|
GENERATE_AND_REGISTER_BINARY_KERNEL(FloorDiv, f64, DT_DOUBLE, double);
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
@ -92,6 +92,9 @@ template <typename T, std::enable_if_t<
|
|||||||
llvm::is_one_of<T, Eigen::half, float, double>::value,
|
llvm::is_one_of<T, Eigen::half, float, double>::value,
|
||||||
bool> = true>
|
bool> = true>
|
||||||
absl::InlinedVector<T, 10> DefaultInput(absl::string_view op_name = "") {
|
absl::InlinedVector<T, 10> DefaultInput(absl::string_view op_name = "") {
|
||||||
|
if (op_name == "FloorDiv")
|
||||||
|
return InputAsVector<T, double>({-18.0, -9.0, -1e-6, -0.1, 0.1, 1e-6, 0.1,
|
||||||
|
0.2, 0.3, 0.5, 0.7, 0.9, 9.0, 18.0});
|
||||||
return InputAsVector<T, double>({-18.0, -9.0, -1e-6, -0.0, 0.0, 1e-6, 0.1,
|
return InputAsVector<T, double>({-18.0, -9.0, -1e-6, -0.0, 0.0, 1e-6, 0.1,
|
||||||
0.2, 0.3, 0.5, 0.7, 0.9, 9.0, 18.0});
|
0.2, 0.3, 0.5, 0.7, 0.9, 9.0, 18.0});
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
func @FloorDiv_elem_type(%arg0: tensor<*xelem_type>, %arg1: tensor<*xelem_type>)
|
||||||
|
-> tensor<*xelem_type> attributes {tf_entry, llvm.emit_c_interface} {
|
||||||
|
%0 = "tf.FloorDiv"(%arg0, %arg1) {T = elem_type, device = ""}
|
||||||
|
: (tensor<*xelem_type>, tensor<*xelem_type>) -> tensor<*xelem_type>
|
||||||
|
return %0 : tensor<*xelem_type>
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user