[MLIR][KernelGen] Add cosh kernels and tests

Allow for relative tolerance in unary kernel tests. In case of the cosh kernels,
this allows to accept an observed difference of 5.6e-8 between the kernel and
the `std::cosh` reference (32829984.568665262 vs. 32829984.568665318) in one of
the test cases.

PiperOrigin-RevId: 351983698
Change-Id: I4b0f345cda2925a10619cba24d4427797db49599
This commit is contained in:
A. Unique TensorFlower 2021-01-15 04:30:14 -08:00 committed by TensorFlower Gardener
parent 489133d42a
commit 1af6b4e6a5
8 changed files with 102 additions and 7 deletions

View File

@ -398,6 +398,19 @@ def HLOClient_ConjOp : HLOClient_UnaryElementwiseOp<"conj", [],
}];
}
def HLOClient_CoshOp : HLOClient_UnaryElementwiseOp<"cosh", [],
HLO_FpOrComplexTensor> {
let summary = "Cosh operator";
let description = [{
Returns `Cosh(operand)` element-wise.
$$
\cosh(x) = (e^x + e^-x) / 2
$$
}];
}
def HLOClient_SinhOp : HLOClient_UnaryElementwiseOp<"sinh", [],
HLO_FpOrComplexTensor> {
let summary = "Sinh operation";

View File

@ -62,7 +62,7 @@ def : Pat<(HLOClient_AcosOp NonComplexElementType:$input),
// Expand asin to MHLO dialect as follows:
// asin(x) = 2 * atan(x / (1 + sqrt(1 - x^2)))
def : Pat<(HLOClient_AsinOp $input),
def : Pat<(HLOClient_AsinOp NonComplexElementType:$input),
(HLO_MulOp
(HLO_ConstantLike<"2"> $input),
(HLO_Atan2Op
@ -92,6 +92,36 @@ def : Pat<(HLOClient_AtanOp $input),
def : Pat<(HLOClient_ConjOp $v),
(HLO_ComplexOp (HLO_RealOp $v), (HLO_NegOp (HLO_ImagOp $v)))>;
// Express `cosh` as
// cosh(x) = (e^x + e^-x) / 2
// = e^(x + log(1/2)) + e^(-x + log(1/2))
//
// The second formulation avoids overflowing when e^x = inf but (e^x)/2 is not.
//
// This incorrectly overflows to inf for two f32 input values, namely
// +/-89.4159851, due to rounding error when computing x +/- log(1/2). The
// correct answer of 3.40281961e+38 (0x7f7fffec) is very close to max-float, so
// we deem this acceptable.
def : Pat<(HLOClient_CoshOp NonComplexElementType:$input),
(HLO_AddOp
(HLO_ExpOp
(HLO_AddOp
$input,
(HLO_LogOp
(HLO_ConstantLike<"0.5"> $input)
)
)
),
(HLO_ExpOp
(HLO_AddOp
(HLO_NegOp $input),
(HLO_LogOp
(HLO_ConstantLike<"0.5"> $input)
)
)
)
)>;
// Express `sinh` as
// sinh(x) = (e^x - e^-x) / 2 if |x| < 1
// = e^(x + log(1/2)) - e^(-x + log(1/2)) otherwise.
@ -136,7 +166,7 @@ def : Pat<(HLOClient_SinhOp NonComplexElementType:$input),
// Express tan in MHLO dialect as
// tan(x) = sin(x) / cos(x).
def : Pat<(HLOClient_TanOp $input),
def : Pat<(HLOClient_TanOp NonComplexElementType:$input),
(HLO_DivOp
(HLO_SinOp $input),
(HLO_CosOp $input)

View File

@ -50,9 +50,9 @@ namespace {
sep fn(ShiftRightLogicalOp) sep fn(SubOp) sep fn(XorOp)
// TODO(herhut): Generate these out of op definitions.
#define MAP_CHLO_OPERATION_CWISE_UNARY(fn, sep) \
fn(AcosOp) sep fn(AsinOp) sep fn(AtanOp) sep fn(ConjOp) sep fn(ErfOp) \
sep fn(ErfcOp) sep fn(SinhOp) sep fn(TanOp)
#define MAP_CHLO_OPERATION_CWISE_UNARY(fn, sep) \
fn(AcosOp) sep fn(AsinOp) sep fn(AtanOp) sep fn(ConjOp) sep fn(CoshOp) \
sep fn(ErfOp) sep fn(ErfcOp) sep fn(SinhOp) sep fn(TanOp)
template <typename OpTy>
inline void AddLegalOpOnRankedTensor(ConversionTarget *target) {

View File

@ -588,6 +588,7 @@ foreach Mapping = [
[TF_AsinOp, HLOClient_AsinOp],
[TF_AtanOp, HLOClient_AtanOp],
[TF_CeilOp, HLO_CeilOp],
[TF_CoshOp, HLOClient_CoshOp],
[TF_ComplexAbsOp, HLO_AbsOp],
[TF_ConjOp, HLOClient_ConjOp],
[TF_CosOp, HLO_CosOp],

View File

@ -19,8 +19,11 @@ namespace tensorflow {
REGISTER5(UnaryOp, CPU, "Cosh", functor::cosh, float, double, bfloat16,
complex64, complex128);
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
!defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
REGISTER2(UnaryOp, GPU, "Cosh", functor::cosh, float, double);
#endif
#endif
} // namespace tensorflow

View File

@ -53,6 +53,7 @@ filegroup(
"gpu_op_complex.cc",
"gpu_op_conj.cc",
"gpu_op_cos.cc",
"gpu_op_cosh.cc",
"gpu_op_exp.cc",
"gpu_op_expm1.cc",
"gpu_op_floor.cc",
@ -117,6 +118,7 @@ tf_kernel_library(
":complex_kernels",
":conj_kernels",
":cos_kernels",
":cosh_kernels",
":exp_kernels",
":expm1_kernels",
":floor_kernels",
@ -341,6 +343,16 @@ gen_kernel_library(
unroll_factors = "2",
)
gen_kernel_library(
name = "cosh",
tile_size = "256",
types = [
"f32",
"f64",
],
unroll_factors = "4",
)
gen_kernel_library(
name = "imag",
tile_size = "256",

View File

@ -0,0 +1,24 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/mlir_generated/gpu_ops_base.h"
namespace tensorflow {
GENERATE_AND_REGISTER_UNARY_KERNEL(Cosh, f32, DT_FLOAT, float);
GENERATE_AND_REGISTER_UNARY_KERNEL(Cosh, f64, DT_DOUBLE, double);
} // namespace tensorflow

View File

@ -84,7 +84,8 @@ class GpuUnaryOpTest : public OpsTestBase {
if (config.expect_strictly_equal) {
test::ExpectEqual(expected_tensor, *GetOutput(0));
} else {
test::ExpectClose(expected_tensor, *GetOutput(0));
test::ExpectClose(expected_tensor, *GetOutput(0), kAbsoluteTolerance,
kRelativeTolerance);
}
}
@ -106,6 +107,9 @@ class GpuUnaryOpTest : public OpsTestBase {
}
private:
constexpr static double kAbsoluteTolerance = 0.001;
constexpr static double kRelativeTolerance = 0.001;
template <typename T, typename BaselineT, typename OutT,
typename BaselineOutT>
absl::InlinedVector<OutT, 10> ComputeExpectedOutput(
@ -229,6 +233,14 @@ GENERATE_DEFAULT_TEST(Cos, DT_DOUBLE, DT_DOUBLE, std::cos,
GENERATE_DEFAULT_TEST_2(Cos, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT, std::cos,
test::GpuOpsTestConfig())
/// Test `tf.Cosh`.
GENERATE_DEFAULT_TEST(Cosh, DT_FLOAT, DT_FLOAT, std::cosh,
test::GpuOpsTestConfig())
GENERATE_DEFAULT_TEST(Cosh, DT_DOUBLE, DT_DOUBLE, std::cosh,
test::GpuOpsTestConfig())
/// Test `tf.Exp`.
GENERATE_DEFAULT_TEST(Exp, DT_FLOAT, DT_FLOAT, std::exp,