From 100b443d8fac3108da3240a8130b3e196e9f25d6 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 13 Jan 2021 12:39:03 -0800 Subject: [PATCH] Using new defines in elementwise kernels to make unified code. PiperOrigin-RevId: 351643998 Change-Id: Ia09df5b827b3d59c823921d0dca37a9c8ec2c51c --- .../lite/delegates/gpu/cl/cl_operation.cc | 3 + .../gpu/cl/kernels/elementwise_test.cc | 56 ++++++------ .../delegates/gpu/cl/kernels/reduce_test.cc | 10 +-- .../common/selectors/operation_selector.cc | 3 +- .../delegates/gpu/common/tasks/elementwise.cc | 87 +++++++++++-------- .../delegates/gpu/common/tasks/elementwise.h | 3 +- .../lite/delegates/gpu/common/tasks/prelu.cc | 16 ++-- .../common/tasks/quantize_and_dequantize.cc | 6 +- .../lite/delegates/gpu/common/tasks/relu.cc | 4 +- 9 files changed, 102 insertions(+), 86 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_operation.cc b/tensorflow/lite/delegates/gpu/cl/cl_operation.cc index b5cfdf9f78b..d35edff157e 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_operation.cc @@ -64,6 +64,7 @@ std::string GetCommonOpenCLDefines(CalculationsPrecision precision) { result += "#define TO_FLT4 convert_float4\n"; result += "#define TO_ACCUM_TYPE convert_float4\n"; result += "#define TO_ACCUM_FLT convert_float\n"; + result += "#define INIT_FLT(value) (float)(value)\n"; result += "#define INIT_FLT4(value) (float4)(value)\n"; break; case CalculationsPrecision::F16: @@ -77,6 +78,7 @@ std::string GetCommonOpenCLDefines(CalculationsPrecision precision) { result += "#define TO_FLT4 convert_half4\n"; result += "#define TO_ACCUM_TYPE convert_half4\n"; result += "#define TO_ACCUM_FLT convert_half\n"; + result += "#define INIT_FLT(value) (half)(value)\n"; result += "#define INIT_FLT4(value) (half4)(value)\n"; break; case CalculationsPrecision::F32_F16: @@ -90,6 +92,7 @@ std::string GetCommonOpenCLDefines(CalculationsPrecision precision) { result += "#define TO_FLT4 convert_half4\n"; result += "#define TO_ACCUM_TYPE convert_float4\n"; result += "#define TO_ACCUM_FLT convert_float\n"; + result += "#define INIT_FLT(value) (half)(value)\n"; result += "#define INIT_FLT4(value) (half4)(value)\n"; break; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index 2dd0ffdf0b9..9e851f6f162 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Abs) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::ABS); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::ABS); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -72,8 +72,8 @@ TEST_F(OpenCLOperationTest, Cos) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::COS); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::COS); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -99,8 +99,8 @@ TEST_F(OpenCLOperationTest, Copy) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::COPY); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::COPY); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -124,8 +124,8 @@ TEST_F(OpenCLOperationTest, Elu) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::ELU); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::ELU); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -152,8 +152,8 @@ TEST_F(OpenCLOperationTest, Exp) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::EXP); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::EXP); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -181,8 +181,8 @@ TEST_F(OpenCLOperationTest, HardSwish) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::HARD_SWISH); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::HARD_SWISH); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -209,8 +209,8 @@ TEST_F(OpenCLOperationTest, Log) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::LOG); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::LOG); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -236,8 +236,8 @@ TEST_F(OpenCLOperationTest, Neg) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::NEG); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::NEG); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -262,8 +262,8 @@ TEST_F(OpenCLOperationTest, Rsqrt) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::RSQRT); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::RSQRT); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -291,8 +291,8 @@ TEST_F(OpenCLOperationTest, Sigmoid) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::SIGMOID); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::SIGMOID); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -317,8 +317,8 @@ TEST_F(OpenCLOperationTest, Sin) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::SIN); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::SIN); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -345,8 +345,8 @@ TEST_F(OpenCLOperationTest, Sqrt) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::SQRT); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::SQRT); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -373,8 +373,8 @@ TEST_F(OpenCLOperationTest, Square) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::SQUARE); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::SQUARE); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), @@ -399,8 +399,8 @@ TEST_F(OpenCLOperationTest, Tanh) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - GPUOperation operation = - CreateElementwiseOneInput(op_def, OperationType::TANH); + GPUOperation operation = CreateElementwiseOneInput( + creation_context_.GetGpuInfo(), op_def, OperationType::TANH); ASSERT_OK(ExecuteGPUOperation( src_tensor, creation_context_, absl::make_unique(std::move(operation)), diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc index fd1e4dceb93..920861f1231 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc @@ -41,7 +41,7 @@ TEST_F(OpenCLOperationTest, MeanHW) { for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { - const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; OperationDef op_def; op_def.precision = precision; auto data_type = DeduceDataTypeFromPrecision(precision); @@ -68,7 +68,7 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) { for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { - const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; OperationDef op_def; op_def.precision = precision; auto data_type = DeduceDataTypeFromPrecision(precision); @@ -95,7 +95,7 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) { for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { - const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; OperationDef op_def; op_def.precision = precision; auto data_type = DeduceDataTypeFromPrecision(precision); @@ -123,7 +123,7 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) { for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { - const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; OperationDef op_def; op_def.precision = precision; auto data_type = DeduceDataTypeFromPrecision(precision); @@ -151,7 +151,7 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) { for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { - const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; OperationDef op_def; op_def.precision = precision; auto data_type = DeduceDataTypeFromPrecision(precision); diff --git a/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc index 41c6937fb2b..aae888cdcab 100644 --- a/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc @@ -481,7 +481,8 @@ absl::Status GPUOperationFromNode(const GpuInfo& gpu_info, case OperationType::SQRT: case OperationType::SQUARE: case OperationType::TANH: { - GPUOperation operation = CreateElementwiseOneInput(op_def, op_type); + GPUOperation operation = + CreateElementwiseOneInput(gpu_info, op_def, op_type); *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc b/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc index 31eb3524e34..088fb1d960a 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc @@ -25,7 +25,8 @@ namespace tflite { namespace gpu { namespace { -std::string GetOneInputCode(const OperationType& op_type, +std::string GetOneInputCode(const GpuInfo& gpu_info, + const OperationType& op_type, CalculationsPrecision precision, const std::string& input0) { std::string result; @@ -41,18 +42,28 @@ std::string GetOneInputCode(const OperationType& op_type, result = "\n"; break; case OperationType::ELU: - result = "$0.x = $0.x < (FLT)(0.0f) ? expm1($0.x) : $0.x;\n"; - result += "$0.y = $0.y < (FLT)(0.0f) ? expm1($0.y) : $0.y;\n"; - result += "$0.z = $0.z < (FLT)(0.0f) ? expm1($0.z) : $0.z;\n"; - result += "$0.w = $0.w < (FLT)(0.0f) ? expm1($0.w) : $0.w;\n"; + if (gpu_info.IsApiOpenCl()) { + result = R"( +$0.x = $0.x < INIT_FLT(0.0f) ? expm1($0.x) : $0.x; +$0.y = $0.y < INIT_FLT(0.0f) ? expm1($0.y) : $0.y; +$0.z = $0.z < INIT_FLT(0.0f) ? expm1($0.z) : $0.z; +$0.w = $0.w < INIT_FLT(0.0f) ? expm1($0.w) : $0.w;)"; + } else { + result = R"( +$0.x = $0.x < INIT_FLT(0.0f) ? exp($0.x) - INIT_FLT(1.0f) : $0.x; +$0.y = $0.y < INIT_FLT(0.0f) ? exp($0.y) - INIT_FLT(1.0f) : $0.y; +$0.z = $0.z < INIT_FLT(0.0f) ? exp($0.z) - INIT_FLT(1.0f) : $0.z; +$0.w = $0.w < INIT_FLT(0.0f) ? exp($0.w) - INIT_FLT(1.0f) : $0.w;)"; + } break; case OperationType::EXP: result = "$0 = exp($0);\n"; break; case OperationType::HARD_SWISH: result = - "$0 *= clamp($0 * (FLT)(0.16666667f) + (FLT)(0.5f), (FLT4)(0.0f), " - "(FLT4)(1.0f));\n"; + "$0 *= clamp($0 * INIT_FLT(0.16666667f) + INIT_FLT(0.5f), " + "INIT_FLT4(0.0f), " + "INIT_FLT4(1.0f));\n"; break; case OperationType::LOG: result = "$0 = log($0);\n"; @@ -64,12 +75,12 @@ std::string GetOneInputCode(const OperationType& op_type, result = "$0 = rsqrt($0);\n"; break; case OperationType::SIGMOID: - if (precision != CalculationsPrecision::F32) { + if (gpu_info.IsApiOpenCl() && precision != CalculationsPrecision::F32) { result = "$0 = convert_half4(native_recip(1.0f + " "native_exp(convert_float4(-$0))));\n"; } else { - result = "$0 = (FLT4)(1.0f) / ((FLT4)(1.0f) + exp(-($0)));\n"; + result = "$0 = INIT_FLT4(1.0f) / (INIT_FLT4(1.0f) + exp(-($0)));\n"; } break; case OperationType::SIN: @@ -123,40 +134,40 @@ std::string GetTwoInputCode(const OperationType& op_type, break; // Comparison operators case OperationType::LESS: - result = "$0.x = $1.x < $2.x ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.y = $1.y < $2.y ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.z = $1.z < $2.z ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.w = $1.w < $2.w ? (FLT)(1.0f) : (FLT)(0.0f);\n"; + result = "$0.x = $1.x < $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.y = $1.y < $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.z = $1.z < $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.w = $1.w < $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; break; case OperationType::LESS_EQUAL: - result = "$0.x = $1.x <= $2.x ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.y = $1.y <= $2.y ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.z = $1.z <= $2.z ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.w = $1.w <= $2.w ? (FLT)(1.0f) : (FLT)(0.0f);\n"; + result = "$0.x = $1.x <= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.y = $1.y <= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.z = $1.z <= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.w = $1.w <= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; break; case OperationType::GREATER: - result = "$0.x = $1.x > $2.x ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.y = $1.y > $2.y ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.z = $1.z > $2.z ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.w = $1.w > $2.w ? (FLT)(1.0f) : (FLT)(0.0f);\n"; + result = "$0.x = $1.x > $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.y = $1.y > $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.z = $1.z > $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.w = $1.w > $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; break; case OperationType::GREATER_EQUAL: - result = "$0.x = $1.x >= $2.x ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.y = $1.y >= $2.y ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.z = $1.z >= $2.z ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.w = $1.w >= $2.w ? (FLT)(1.0f) : (FLT)(0.0f);\n"; + result = "$0.x = $1.x >= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.y = $1.y >= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.z = $1.z >= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.w = $1.w >= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; break; case OperationType::EQUAL: - result = "$0.x = $1.x == $2.x ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.y = $1.y == $2.y ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.z = $1.z == $2.z ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.w = $1.w == $2.w ? (FLT)(1.0f) : (FLT)(0.0f);\n"; + result = "$0.x = $1.x == $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.y = $1.y == $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.z = $1.z == $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.w = $1.w == $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; break; case OperationType::NOT_EQUAL: - result = "$0.x = $1.x != $2.x ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.y = $1.y != $2.y ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.z = $1.z != $2.z ? (FLT)(1.0f) : (FLT)(0.0f);\n"; - result += "$0.w = $1.w != $2.w ? (FLT)(1.0f) : (FLT)(0.0f);\n"; + result = "$0.x = $1.x != $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.y = $1.y != $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.z = $1.z != $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; + result += "$0.w = $1.w != $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n"; break; default: return "Unknown operation type;\n"; @@ -180,9 +191,7 @@ GPUOperation CreateElementwiseOneRuntimeOneScalar( } else { op.args_.AddHalf("scalar", half(scalar_parameter)); } - op.code_ = - "FLT4 second_val = (FLT4)(args.scalar, args.scalar, args.scalar, " - "args.scalar);\n"; + op.code_ = "FLT4 second_val = INIT_FLT4(args.scalar);\n"; op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value", "second_val", swap_inputs); return op; @@ -256,11 +265,13 @@ GPUOperation CreateElementwiseTwoInput( } // namespace -GPUOperation CreateElementwiseOneInput(const OperationDef& definition, +GPUOperation CreateElementwiseOneInput(const GpuInfo& gpu_info, + const OperationDef& definition, const OperationType& op_type) { GPUOperation op(definition); op.elementwise_ = true; - op.code_ = GetOneInputCode(op_type, definition.precision, "in_out_value"); + op.code_ = + GetOneInputCode(gpu_info, op_type, definition.precision, "in_out_value"); return op; } diff --git a/tensorflow/lite/delegates/gpu/common/tasks/elementwise.h b/tensorflow/lite/delegates/gpu/common/tasks/elementwise.h index 26d209c046a..5c41a8c1421 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/elementwise.h +++ b/tensorflow/lite/delegates/gpu/common/tasks/elementwise.h @@ -27,7 +27,8 @@ namespace gpu { // Creates simple one input operation without any parameters, for example // log, sin, cos, etc. -GPUOperation CreateElementwiseOneInput(const OperationDef& definition, +GPUOperation CreateElementwiseOneInput(const GpuInfo& gpu_info, + const OperationDef& definition, const OperationType& op_type); // Creates simple two input(first input is runtime tensor and second input is diff --git a/tensorflow/lite/delegates/gpu/common/tasks/prelu.cc b/tensorflow/lite/delegates/gpu/common/tasks/prelu.cc index 04cf6a496dd..10b3d67fb0b 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/prelu.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/prelu.cc @@ -73,15 +73,15 @@ GPUOperation CreatePReLU(const GpuInfo& gpu_info, } else { result.args_.AddHalf("clip", half(attr.clip)); } - result.code_ = - alpha_read + - "in_out_value = clamp(in_out_value, (FLT4)(0.0f), (FLT4)(args.clip)) + " - "min((FLT4)(0.0f), in_out_value) * alpha_val;"; + result.code_ = alpha_read + + "in_out_value = clamp(in_out_value, INIT_FLT4(0.0f), " + "INIT_FLT4(args.clip)) + " + "min(INIT_FLT4(0.0f), in_out_value) * alpha_val;"; } else { - result.code_ = - alpha_read + - "in_out_value = max((FLT4)(0.0f), in_out_value) + min((FLT4)(0.0f), " - "in_out_value) * alpha_val;"; + result.code_ = alpha_read + + "in_out_value = max(INIT_FLT4(0.0f), in_out_value) + " + "min(INIT_FLT4(0.0f), " + "in_out_value) * alpha_val;"; } return result; diff --git a/tensorflow/lite/delegates/gpu/common/tasks/quantize_and_dequantize.cc b/tensorflow/lite/delegates/gpu/common/tasks/quantize_and_dequantize.cc index dad956b2239..7c9a4cf8580 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/quantize_and_dequantize.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/quantize_and_dequantize.cc @@ -49,9 +49,9 @@ GPUOperation CreateQuantizeAndDequantize( op.args_.AddHalf("scale", half(adjusted_attr.scale)); } op.code_ = R"( -FLT4 clamped_value = min((FLT4)(args.max), max((FLT4)(args.min), in_out_value)); -FLT4 quantized_value = round((clamped_value - (FLT4)(args.min)) / (FLT4)(args.scale)); -FLT4 dequantized_value = quantized_value * (FLT4)(args.scale) + (FLT4)(args.min); +FLT4 clamped_value = min(INIT_FLT4(args.max), max(INIT_FLT4(args.min), in_out_value)); +FLT4 quantized_value = round((clamped_value - INIT_FLT4(args.min)) / INIT_FLT4(args.scale)); +FLT4 dequantized_value = quantized_value * INIT_FLT4(args.scale) + INIT_FLT4(args.min); in_out_value = dequantized_value;)"; return op; diff --git a/tensorflow/lite/delegates/gpu/common/tasks/relu.cc b/tensorflow/lite/delegates/gpu/common/tasks/relu.cc index 8dc95f1675f..b4d2b086e36 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/relu.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/relu.cc @@ -27,14 +27,14 @@ GPUOperation CreateReLU(const OperationDef& definition, std::string min_func; if (attr.alpha != 0.0f) { - min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))"; + min_func = "min(in_out_value * args.alpha, INIT_FLT(0.0f))"; if (definition.precision == CalculationsPrecision::F32) { op.args_.AddFloat("alpha", attr.alpha); } else { op.args_.AddHalf("alpha", half(attr.alpha)); } } else { - min_func = "(FLT)(0.0f)"; + min_func = "INIT_FLT(0.0f)"; } if (attr.clip != 0.0f) { if (definition.precision == CalculationsPrecision::F32) {