diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 5ac2600b445..804366082da 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -991,6 +991,45 @@ cc_test( ], ) +cc_library( + name = "quantize_and_dequantize", + srcs = ["quantize_and_dequantize.cc"], + hdrs = ["quantize_and_dequantize.h"], + deps = [ + ":flt_type", + ":gpu_operation", + ":util", + "//tensorflow/lite/delegates/gpu/cl:cl_context", + "//tensorflow/lite/delegates/gpu/cl:cl_kernel", + "//tensorflow/lite/delegates/gpu/cl:linear_storage", + "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:tensor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:variant", + ], +) + +cc_test( + name = "quantize_and_dequantize_test", + srcs = ["quantize_and_dequantize_test.cc"], + linkstatic = True, + tags = tf_gpu_tests_tags() + [ + "linux", + "local", + ], + deps = [ + ":cl_test", + ":quantize_and_dequantize", + "//tensorflow/lite/delegates/gpu/cl:tensor", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/kernels/internal:quantization_util", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "relu", srcs = ["relu.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc new file mode 100644 index 00000000000..f7751fac6ff --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc @@ -0,0 +1,128 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h" + +#include + +#include "absl/strings/str_cat.h" +#include "absl/types/variant.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" +#include "tensorflow/lite/delegates/gpu/common/tensor.h" + +namespace tflite { +namespace gpu { +namespace cl { + +QuantizeAndDequantize::QuantizeAndDequantize( + const OperationDef& definition, const QuantizeAndDequantizeAttributes& attr, + CalculationsPrecision scalar_precision) + : ElementwiseOperation(definition) { + min_ = FLT(scalar_precision, attr.min); + max_ = FLT(scalar_precision, attr.max); + scale_ = FLT(scalar_precision, attr.scale); +} + +QuantizeAndDequantize::QuantizeAndDequantize(QuantizeAndDequantize&& operation) + : ElementwiseOperation(std::move(operation)), + min_(std::move(operation.min_)), + max_(std::move(operation.max_)), + scale_(std::move(operation.scale_)) {} + +QuantizeAndDequantize& QuantizeAndDequantize::operator=( + QuantizeAndDequantize&& operation) { + if (this != &operation) { + min_ = std::move(operation.min_); + max_ = std::move(operation.max_); + scale_ = std::move(operation.scale_); + ElementwiseOperation::operator=(std::move(operation)); + } + return *this; +} + +void QuantizeAndDequantize::SetLinkIndex(int index) { + min_.SetName(absl::StrCat("quantize_and_dequantize_min_", index)); + max_.SetName(absl::StrCat("quantize_and_dequantize_max_", index)); + scale_.SetName(absl::StrCat("quantize_and_dequantize_scale_", index)); +} + +std::string QuantizeAndDequantize::GetCoreCode( + const LinkingContext& context) const { + std::string scale_string, max_string, min_string; + if (!scale_.Active()) { + scale_string = "(FLT4)(1.0f)"; + } else { + scale_string = absl::StrCat("(FLT4)(", scale_.GetName(), ")"); + } + if (!max_.Active()) { + max_string = "(FLT4)(0.0f)"; + } else { + max_string = absl::StrCat("(FLT4)(", max_.GetName(), ")"); + } + if (!min_.Active()) { + min_string = "(FLT4)(0.0f)"; + } else { + min_string = absl::StrCat("(FLT4)(", min_.GetName(), ")"); + } + std::string clamped_value = absl::StrCat( + "min(", max_string, ", max(", min_string, ", ", context.var_name, "))"); + std::string quantized_value = absl::StrCat( + "round((", clamped_value, " - ", min_string, ") / ", scale_string, ")"); + std::string dequantized_value = + absl::StrCat(quantized_value, " * ", scale_string, " + ", min_string); + + return absl::StrCat(context.var_name, " = ", dequantized_value, ";\n"); +} + +std::string QuantizeAndDequantize::GetArgsDeclaration() const { + return absl::StrCat(",\n ", min_.GetDeclaration(), ",\n ", + max_.GetDeclaration(), ",\n ", + scale_.GetDeclaration()); +} + +Status QuantizeAndDequantize::BindArguments(CLKernel* kernel) { + RETURN_IF_ERROR(kernel->SetBytesAuto(min_)); + RETURN_IF_ERROR(kernel->SetBytesAuto(max_)); + RETURN_IF_ERROR(kernel->SetBytesAuto(scale_)); + return OkStatus(); +} + +Status CreateQuantizeAndDequantize(const CreationContext& creation_context, + const OperationDef& definition, + const QuantizeAndDequantizeAttributes& attr, + QuantizeAndDequantize* result) { + const auto scalar_precision = creation_context.device->IsPowerVR() + ? CalculationsPrecision::F32 + : definition.precision; + const bool is_fp16 = definition.precision == CalculationsPrecision::F16 || + definition.precision == CalculationsPrecision::F32_F16; + if (is_fp16 && attr.scale < 0.000062f) { + // The smallest positive normal number for Half-precision floating-point + // format is 2^-14 ~ 0.000062f. Therefore, if the scale is lesser than this + // number, we just reset it accordingly. + QuantizeAndDequantizeAttributes adjusted_attr = attr; + adjusted_attr.scale = 0.000062f; + *result = + QuantizeAndDequantize(definition, adjusted_attr, scalar_precision); + } else { + *result = QuantizeAndDequantize(definition, attr, scalar_precision); + } + result->SetLinkIndex(0); + return OkStatus(); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h new file mode 100644 index 00000000000..07fa8f21773 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h @@ -0,0 +1,100 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/flt_type.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" +#include "tensorflow/lite/delegates/gpu/cl/linear_storage.h" +#include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/tensor.h" + +namespace tflite { +namespace gpu { +namespace cl { + +// Performs the operation: {Quantize, Dequantize} on floating-point data. +// We need this operation to emulate the error introduced by quantization +// on the GPU, which cannot represent int8 tensors. +// +// Implemented as: +// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale) + 0.5) +// dq_value = qvalue * qscale + qmin +// Here, qmin, qmax & qscale refer to the quantization values as implemented in +// TensorFlow Lite's 'FakeQuant' kernel. round(x + 0.5) ensures we round away +// from zero. +// +// NOTE: We do not need to nudge min/max values in this op, since they would +// already be adjusted while generating the quantized model. +class QuantizeAndDequantize : public ElementwiseOperation { + public: + QuantizeAndDequantize() = default; + // Move only + QuantizeAndDequantize(QuantizeAndDequantize&& operation); + QuantizeAndDequantize& operator=(QuantizeAndDequantize&& operation); + QuantizeAndDequantize(const QuantizeAndDequantize&) = delete; + QuantizeAndDequantize& operator=(const QuantizeAndDequantize&) = delete; + + void SetLinkIndex(int index) override; + std::string GetCoreCode(const LinkingContext& context) const override; + std::string GetArgsDeclaration() const override; + Status BindArguments(CLKernel* kernel) override; + + friend Status CreateQuantizeAndDequantize( + const CreationContext& creation_context, const OperationDef& definition, + const QuantizeAndDequantizeAttributes& attr, + QuantizeAndDequantize* result); + + private: + QuantizeAndDequantize(const OperationDef& definition, + const QuantizeAndDequantizeAttributes& attr, + CalculationsPrecision scalar_precision); + + template + Status UploadParameters(const ::tflite::gpu::Tensor& parameters, + CLContext* context); + + FLT min_; + FLT max_; + FLT scale_; +}; + +Status CreateQuantizeAndDequantize(const CreationContext& creation_context, + const OperationDef& definition, + const QuantizeAndDequantizeAttributes& attr, + QuantizeAndDequantize* result); + +template +Status QuantizeAndDequantize::UploadParameters( + const ::tflite::gpu::Tensor& parameters, CLContext* context) { + LinearStorageCreateInfo create_info; + create_info.storage_type = + DeduceLinearStorageType(definition_.GetPrimaryStorageType()); + create_info.data_type = definition_.GetPrimaryDataType(); + return OkStatus(); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc new file mode 100644 index 00000000000..71d6d066b9b --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc @@ -0,0 +1,182 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h" + +#include + +#include +#include +#include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" + +using ::testing::FloatNear; +using ::testing::Pointwise; + +namespace tflite { +namespace gpu { +namespace cl { +namespace { + +TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits8) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 3, 2, 1); + src_tensor.data = {0.0f, 1.0f, 0.25f, 0.50f, 0.4444444f, 0.00001f}; + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 8; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + QuantizeAndDequantize operation; + ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, + &operation)); + ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, + BHWC(1, 3, 2, 1), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, 1.0f, 0.25098f, 0.498039f, + 0.443137f, 0.0f})); + } + } +} + +TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits8_NegativeRange) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 3, 1, 2); + src_tensor.data = {0.0f, -0.9f, 0.25f, 0.50f, 0.4444444f, -0.00001f}; + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 8; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + QuantizeAndDequantize operation; + ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, + &operation)); + ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, + BHWC(1, 3, 1, 2), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, -0.896471f, 0.247059f, + 0.501176f, 0.444706f, 0.0f})); + } + } +} + +TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits16) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 3, 1, 2); + src_tensor.data = {0.0f, 1.0f, 0.25f, 0.50f, 0.4444444f, 0.00001f}; + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 16; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + QuantizeAndDequantize operation; + ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, + &operation)); + ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, + BHWC(1, 3, 1, 2), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, 1.0f, 0.250004f, 0.500008f, + 0.44445f, 1.5259e-05f})); + } + } +} + +TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits16_NegativeRange) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 3, 2, 1); + src_tensor.data = {0.0f, -0.9f, 0.25f, 0.50f, 0.4444444f, -0.00001f}; + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 16; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + QuantizeAndDequantize operation; + ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, + &operation)); + ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, + BHWC(1, 3, 2, 1), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, -0.900014f, 0.249998f, + 0.499995f, 0.444431f, 0.0f})); + } + } +} + +} // namespace +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD index b633cb4311a..6f9b52bd1c9 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD @@ -132,6 +132,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/cl/kernels:padding", "//tensorflow/lite/delegates/gpu/cl/kernels:pooling", "//tensorflow/lite/delegates/gpu/cl/kernels:prelu", + "//tensorflow/lite/delegates/gpu/cl/kernels:quantize_and_dequantize", "//tensorflow/lite/delegates/gpu/cl/kernels:relu", "//tensorflow/lite/delegates/gpu/cl/kernels:reshape", "//tensorflow/lite/delegates/gpu/cl/kernels:reshapex4", diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 1ae6c23cbe5..2fcb90fc8d1 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -279,6 +279,12 @@ Status GPUOperationFromNode(const CreationContext& creation_context, auto attr = absl::any_cast(node.operation.attributes); return SelectPReLU(attr, creation_context, op_def, gpu_op); } + case OperationType::QUANTIZE_AND_DEQUANTIZE: { + auto attr = absl::any_cast( + node.operation.attributes); + return SelectQuantizeAndDequantize(attr, creation_context, op_def, + gpu_op); + } case OperationType::RELU: { auto attr = absl::any_cast(node.operation.attributes); SelectReLU(creation_context, attr, op_def, gpu_op); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc index 22244351bd7..ff26a3be601 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/kernels/padding.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/pooling.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/prelu.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/relu.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/reshape.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h" @@ -218,6 +219,17 @@ Status SelectWinograd36To4x4( return OkStatus(); } +Status SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr, + const CreationContext& creation_context, + const OperationDef& op_def, + std::unique_ptr* ptr) { + QuantizeAndDequantize operation; + RETURN_IF_ERROR( + CreateQuantizeAndDequantize(creation_context, op_def, attr, &operation)); + *ptr = absl::make_unique(std::move(operation)); + return OkStatus(); +} + } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h index fd29ebc0e91..d9a5365fc9e 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h @@ -100,6 +100,11 @@ Status SelectWinograd36To4x4( const ::tflite::gpu::Tensor& biases, std::unique_ptr* ptr); +Status SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr, + const CreationContext& creation_context, + const OperationDef& op_def, + std::unique_ptr* ptr); + } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index fa5cdc54047..b20b24d28c3 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -118,6 +118,8 @@ std::string ToString(enum OperationType op) { return "pow"; case OperationType::PRELU: return "prelu"; + case OperationType::QUANTIZE_AND_DEQUANTIZE: + return "quantize_and_dequantize"; case OperationType::RELU: return "relu"; case OperationType::RESHAPE: @@ -183,6 +185,7 @@ OperationType OperationTypeFromString(const std::string& name) { {"pooling_2d", OperationType::POOLING_2D}, {"pow", OperationType::POW}, {"prelu", OperationType::PRELU}, + {"quantize_and_dequantize", OperationType::QUANTIZE_AND_DEQUANTIZE}, {"relu", OperationType::RELU}, {"resize", OperationType::RESIZE}, {"reshape", OperationType::RESHAPE}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index c5be9897fed..16016d334cf 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -57,6 +57,8 @@ enum class OperationType { POOLING_2D, POW, PRELU, + // Used to accurately run inference on quantized models. + QUANTIZE_AND_DEQUANTIZE, RELU, RESHAPE, RESIZE, @@ -478,6 +480,14 @@ struct SpaceToDepthAttributes { int block_size; }; +// These help perform a combination of Quantize & Dequantize to adjust float +// values like quantized inference would. +struct QuantizeAndDequantizeAttributes { + float min = 0; + float max = 0; + float scale = 0; +}; + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index 68ae9dfd4dc..30d759df724 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -451,6 +451,38 @@ cc_test( ], ) +cc_library( + name = "quantize_and_dequantize", + srcs = ["quantize_and_dequantize.cc"], + hdrs = ["quantize_and_dequantize.h"], + deps = [ + "//tensorflow/lite/delegates/gpu/common:convert", + "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/gl:node_shader", + "@com_google_absl//absl/memory", + ], +) + +cc_test( + name = "quantize_and_dequantize_test", + srcs = ["quantize_and_dequantize_test.cc"], + tags = tf_gpu_tests_tags() + [ + "notap", + "tflite_not_portable_ios", + ], + deps = [ + ":quantize_and_dequantize", + ":test_util", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/kernels/internal:quantization_util", + "@com_google_googletest//:gtest", + ], +) + cc_library( name = "relu", srcs = ["relu.cc"], @@ -699,6 +731,7 @@ TFLITE_GPU_BINARY_RELEASE_OPERATORS = [ "pad", "pooling", "prelu", + "quantize_and_dequantize", "relu", "mean", "reshape", diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc new file mode 100644 index 00000000000..3f21124aee9 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc @@ -0,0 +1,74 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h" + +#include +#include + +#include "absl/memory/memory.h" +#include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/types.h" + +namespace tflite { +namespace gpu { +namespace gl { +namespace { + +class QuantizeAndDequantize : public NodeShader { + public: + Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { + std::string code; + // Constants + code += "vec4 scale = vec4($quant_scale$);"; + code += "vec4 min_bound = vec4($quant_min$);"; + code += "vec4 max_bound = vec4($quant_max$);"; + // Quantize + code += "value_0 = clamp(value_0, min_bound, max_bound);"; + code += "value_0 = (value_0 - min_bound) / scale;"; + code += "value_0 = floor(value_0 + vec4(0.5));"; + // Dequantize + code += "value_0 = value_0 * scale + min_bound;"; + + auto attr = absl::any_cast( + ctx.node->operation.attributes); + *generated_code = { + /*parameters=*/{{"quant_min", attr.min}, + {"quant_max", attr.max}, + {"quant_scale", attr.scale}}, + /*objects=*/{}, + /*shared_variables=*/{}, + /*workload=*/uint3(), + /*workgroup=*/uint3(), + /*source_code=*/code, + /*input=*/IOStructure::AUTO, + /*output=*/IOStructure::AUTO, + }; + return OkStatus(); + } +}; + +} // namespace + +std::unique_ptr NewQuantizeAndDequantizeNodeShader() { + return absl::make_unique(); +} + +} // namespace gl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h new file mode 100644 index 00000000000..1fa6ad918c4 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h @@ -0,0 +1,47 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/gl/node_shader.h" + +namespace tflite { +namespace gpu { +namespace gl { + +// Performs the operation: {Quantize, Dequantize} on floating-point data. +// We need this operation to emulate the error introduced by quantization +// on the GPU, which cannot represent int8 tensors. +// +// Implemented as: +// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale) + 0.5) +// dq_value = qvalue * qscale + qmin +// Here, qmin, qmax & qscale refer to the quantization values as implemented in +// TensorFlow Lite's 'FakeQuant' kernel. round(x + 0.5) ensures we round away +// from zero. +// +// NOTE: We do not need to nudge min/max values in this op, since they would +// already be adjusted while generating the quantized model. +std::unique_ptr NewQuantizeAndDequantizeNodeShader(); + +} // namespace gl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize_test.cc new file mode 100644 index 00000000000..916f9166175 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize_test.cc @@ -0,0 +1,159 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h" + +#include +#include +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/gl/kernels/test_util.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" + +using ::testing::FloatNear; +using ::testing::Pointwise; + +namespace tflite { +namespace gpu { +namespace gl { +namespace { + +TEST(QuantizeAndDequantizeTest, Dim2Bits8) { + TensorRef input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 3, 2, 1); + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 8; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + TensorRef output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 3, 2, 1); + + SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, + {input}, {output}); + ASSERT_TRUE( + model.PopulateTensor(0, {0.0, 1.0, 0.25, 0.50, 0.4444444, 0.00001})); + ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), + {0.0f, 1.0f, 0.25098f, 0.498039f, 0.443137f, 0.0f})); +} + +TEST(QuantizeAndDequantizeTest, Dim3Bits8_NegativeRange) { + TensorRef input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 3, 1, 2); + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 8; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + TensorRef output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 3, 1, 2); + + SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, + {input}, {output}); + ASSERT_TRUE( + model.PopulateTensor(0, {0.0, -0.9, 0.25, 0.50, 0.4444444, -0.00001})); + ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0f, -0.896471f, 0.247059f, + 0.501176f, 0.444706f, 0.0f})); +} + +TEST(QuantizeAndDequantizeTest, Dim3Bits16) { + TensorRef input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 3, 1, 2); + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 16; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + TensorRef output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 3, 1, 2); + + SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, + {input}, {output}); + ASSERT_TRUE( + model.PopulateTensor(0, {0.0, 1.0, 0.25, 0.50, 0.4444444, 0.00001})); + ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0f, 1.0f, 0.250004f, 0.500008f, + 0.44445f, 1.5259e-05f})); +} + +TEST(QuantizeAndDequantizeTest, Dim2Bits16_NegativeRange) { + TensorRef input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 3, 2, 1); + + // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are + // pre-nudged, since this should be done during model conversion. + const int num_bits = 16; + const int quant_min = 0; + const int quant_max = (1 << num_bits) - 1; + QuantizeAndDequantizeAttributes attr; + NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, + quant_min, quant_max, &attr.min, &attr.max, + &attr.scale); + + TensorRef output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 3, 2, 1); + + SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, + {input}, {output}); + ASSERT_TRUE( + model.PopulateTensor(0, {0.0, -0.9, 0.25, 0.50, 0.4444444, -0.00001})); + ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0f, -0.900014f, 0.249998f, + 0.499995f, 0.444431f, 0.0f})); +} + +} // namespace +} // namespace gl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index cb4bed369dc..6903abc0b26 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/kernels/pad.h" #include "tensorflow/lite/delegates/gpu/gl/kernels/pooling.h" #include "tensorflow/lite/delegates/gpu/gl/kernels/prelu.h" +#include "tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h" #include "tensorflow/lite/delegates/gpu/gl/kernels/relu.h" #include "tensorflow/lite/delegates/gpu/gl/kernels/reshape.h" #include "tensorflow/lite/delegates/gpu/gl/kernels/resize.h" @@ -85,6 +86,8 @@ class Registry : public NodeShader { insert_op(Type::PAD, NewPadNodeShader); insert_op(Type::POOLING_2D, NewPoolingNodeShader); insert_op(Type::PRELU, NewPReLUNodeShader); + insert_op(Type::QUANTIZE_AND_DEQUANTIZE, + NewQuantizeAndDequantizeNodeShader); insert_op(Type::RELU, NewReLUNodeShader); insert_op(Type::RESIZE, NewResizeNodeShader); insert_op(Type::RESHAPE, NewReshapeNodeShader); diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index f297164de03..5eb7d284ad1 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -305,6 +305,7 @@ Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, case OperationType::BATCH_TO_SPACE: case OperationType::CONST: case OperationType::LSTM: + case OperationType::QUANTIZE_AND_DEQUANTIZE: case OperationType::SPACE_TO_BATCH: case OperationType::TRANSPOSE: case OperationType::UNKNOWN: