Adds QuantizeAndDequantize kernel to OpenGL & OpenCL backends. This is not a TFLite op, but will be used to support inference on quantized models with future CLs.
PiperOrigin-RevId: 301229478 Change-Id: I7379a801ba355616a6730578a01c077253494670
This commit is contained in:
		
							parent
							
								
									eb6b2831f8
								
							
						
					
					
						commit
						e61ff10d8b
					
				| @ -991,6 +991,45 @@ cc_test( | |||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | cc_library( | ||||||
|  |     name = "quantize_and_dequantize", | ||||||
|  |     srcs = ["quantize_and_dequantize.cc"], | ||||||
|  |     hdrs = ["quantize_and_dequantize.h"], | ||||||
|  |     deps = [ | ||||||
|  |         ":flt_type", | ||||||
|  |         ":gpu_operation", | ||||||
|  |         ":util", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/cl:cl_context", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/cl:cl_kernel", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/cl:linear_storage", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:data_type", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:operations", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:status", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:tensor", | ||||||
|  |         "@com_google_absl//absl/strings", | ||||||
|  |         "@com_google_absl//absl/types:variant", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_test( | ||||||
|  |     name = "quantize_and_dequantize_test", | ||||||
|  |     srcs = ["quantize_and_dequantize_test.cc"], | ||||||
|  |     linkstatic = True, | ||||||
|  |     tags = tf_gpu_tests_tags() + [ | ||||||
|  |         "linux", | ||||||
|  |         "local", | ||||||
|  |     ], | ||||||
|  |     deps = [ | ||||||
|  |         ":cl_test", | ||||||
|  |         ":quantize_and_dequantize", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/cl:tensor", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:operations", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:status", | ||||||
|  |         "//tensorflow/lite/kernels/internal:quantization_util", | ||||||
|  |         "@com_google_googletest//:gtest_main", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| cc_library( | cc_library( | ||||||
|     name = "relu", |     name = "relu", | ||||||
|     srcs = ["relu.cc"], |     srcs = ["relu.cc"], | ||||||
|  | |||||||
| @ -0,0 +1,128 @@ | |||||||
|  | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | ||||||
|  | 
 | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  | 
 | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | 
 | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | ==============================================================================*/ | ||||||
|  | 
 | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h" | ||||||
|  | 
 | ||||||
|  | #include <string> | ||||||
|  | 
 | ||||||
|  | #include "absl/strings/str_cat.h" | ||||||
|  | #include "absl/types/variant.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/tensor.h" | ||||||
|  | 
 | ||||||
|  | namespace tflite { | ||||||
|  | namespace gpu { | ||||||
|  | namespace cl { | ||||||
|  | 
 | ||||||
|  | QuantizeAndDequantize::QuantizeAndDequantize( | ||||||
|  |     const OperationDef& definition, const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |     CalculationsPrecision scalar_precision) | ||||||
|  |     : ElementwiseOperation(definition) { | ||||||
|  |   min_ = FLT(scalar_precision, attr.min); | ||||||
|  |   max_ = FLT(scalar_precision, attr.max); | ||||||
|  |   scale_ = FLT(scalar_precision, attr.scale); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | QuantizeAndDequantize::QuantizeAndDequantize(QuantizeAndDequantize&& operation) | ||||||
|  |     : ElementwiseOperation(std::move(operation)), | ||||||
|  |       min_(std::move(operation.min_)), | ||||||
|  |       max_(std::move(operation.max_)), | ||||||
|  |       scale_(std::move(operation.scale_)) {} | ||||||
|  | 
 | ||||||
|  | QuantizeAndDequantize& QuantizeAndDequantize::operator=( | ||||||
|  |     QuantizeAndDequantize&& operation) { | ||||||
|  |   if (this != &operation) { | ||||||
|  |     min_ = std::move(operation.min_); | ||||||
|  |     max_ = std::move(operation.max_); | ||||||
|  |     scale_ = std::move(operation.scale_); | ||||||
|  |     ElementwiseOperation::operator=(std::move(operation)); | ||||||
|  |   } | ||||||
|  |   return *this; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void QuantizeAndDequantize::SetLinkIndex(int index) { | ||||||
|  |   min_.SetName(absl::StrCat("quantize_and_dequantize_min_", index)); | ||||||
|  |   max_.SetName(absl::StrCat("quantize_and_dequantize_max_", index)); | ||||||
|  |   scale_.SetName(absl::StrCat("quantize_and_dequantize_scale_", index)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::string QuantizeAndDequantize::GetCoreCode( | ||||||
|  |     const LinkingContext& context) const { | ||||||
|  |   std::string scale_string, max_string, min_string; | ||||||
|  |   if (!scale_.Active()) { | ||||||
|  |     scale_string = "(FLT4)(1.0f)"; | ||||||
|  |   } else { | ||||||
|  |     scale_string = absl::StrCat("(FLT4)(", scale_.GetName(), ")"); | ||||||
|  |   } | ||||||
|  |   if (!max_.Active()) { | ||||||
|  |     max_string = "(FLT4)(0.0f)"; | ||||||
|  |   } else { | ||||||
|  |     max_string = absl::StrCat("(FLT4)(", max_.GetName(), ")"); | ||||||
|  |   } | ||||||
|  |   if (!min_.Active()) { | ||||||
|  |     min_string = "(FLT4)(0.0f)"; | ||||||
|  |   } else { | ||||||
|  |     min_string = absl::StrCat("(FLT4)(", min_.GetName(), ")"); | ||||||
|  |   } | ||||||
|  |   std::string clamped_value = absl::StrCat( | ||||||
|  |       "min(", max_string, ", max(", min_string, ", ", context.var_name, "))"); | ||||||
|  |   std::string quantized_value = absl::StrCat( | ||||||
|  |       "round((", clamped_value, " - ", min_string, ") / ", scale_string, ")"); | ||||||
|  |   std::string dequantized_value = | ||||||
|  |       absl::StrCat(quantized_value, " * ", scale_string, " + ", min_string); | ||||||
|  | 
 | ||||||
|  |   return absl::StrCat(context.var_name, " = ", dequantized_value, ";\n"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::string QuantizeAndDequantize::GetArgsDeclaration() const { | ||||||
|  |   return absl::StrCat(",\n    ", min_.GetDeclaration(), ",\n    ", | ||||||
|  |                       max_.GetDeclaration(), ",\n    ", | ||||||
|  |                       scale_.GetDeclaration()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Status QuantizeAndDequantize::BindArguments(CLKernel* kernel) { | ||||||
|  |   RETURN_IF_ERROR(kernel->SetBytesAuto(min_)); | ||||||
|  |   RETURN_IF_ERROR(kernel->SetBytesAuto(max_)); | ||||||
|  |   RETURN_IF_ERROR(kernel->SetBytesAuto(scale_)); | ||||||
|  |   return OkStatus(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Status CreateQuantizeAndDequantize(const CreationContext& creation_context, | ||||||
|  |                                    const OperationDef& definition, | ||||||
|  |                                    const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |                                    QuantizeAndDequantize* result) { | ||||||
|  |   const auto scalar_precision = creation_context.device->IsPowerVR() | ||||||
|  |                                     ? CalculationsPrecision::F32 | ||||||
|  |                                     : definition.precision; | ||||||
|  |   const bool is_fp16 = definition.precision == CalculationsPrecision::F16 || | ||||||
|  |                        definition.precision == CalculationsPrecision::F32_F16; | ||||||
|  |   if (is_fp16 && attr.scale < 0.000062f) { | ||||||
|  |     // The smallest positive normal number for Half-precision floating-point
 | ||||||
|  |     // format is 2^-14 ~ 0.000062f. Therefore, if the scale is lesser than this
 | ||||||
|  |     // number, we just reset it accordingly.
 | ||||||
|  |     QuantizeAndDequantizeAttributes adjusted_attr = attr; | ||||||
|  |     adjusted_attr.scale = 0.000062f; | ||||||
|  |     *result = | ||||||
|  |         QuantizeAndDequantize(definition, adjusted_attr, scalar_precision); | ||||||
|  |   } else { | ||||||
|  |     *result = QuantizeAndDequantize(definition, attr, scalar_precision); | ||||||
|  |   } | ||||||
|  |   result->SetLinkIndex(0); | ||||||
|  |   return OkStatus(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | }  // namespace cl
 | ||||||
|  | }  // namespace gpu
 | ||||||
|  | }  // namespace tflite
 | ||||||
| @ -0,0 +1,100 @@ | |||||||
|  | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | ||||||
|  | 
 | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  | 
 | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | 
 | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | ==============================================================================*/ | ||||||
|  | 
 | ||||||
|  | #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ | ||||||
|  | #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ | ||||||
|  | 
 | ||||||
|  | #include <string> | ||||||
|  | 
 | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/flt_type.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/data_type.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/operations.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/status.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/tensor.h" | ||||||
|  | 
 | ||||||
|  | namespace tflite { | ||||||
|  | namespace gpu { | ||||||
|  | namespace cl { | ||||||
|  | 
 | ||||||
|  | // Performs the operation: {Quantize, Dequantize} on floating-point data.
 | ||||||
|  | // We need this operation to emulate the error introduced by quantization
 | ||||||
|  | // on the GPU, which cannot represent int8 tensors.
 | ||||||
|  | //
 | ||||||
|  | // Implemented as:
 | ||||||
|  | // qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale) + 0.5)
 | ||||||
|  | // dq_value = qvalue * qscale + qmin
 | ||||||
|  | // Here, qmin, qmax & qscale refer to the quantization values as implemented in
 | ||||||
|  | // TensorFlow Lite's 'FakeQuant' kernel. round(x + 0.5) ensures we round away
 | ||||||
|  | // from zero.
 | ||||||
|  | //
 | ||||||
|  | // NOTE: We do not need to nudge min/max values in this op, since they would
 | ||||||
|  | // already be adjusted while generating the quantized model.
 | ||||||
|  | class QuantizeAndDequantize : public ElementwiseOperation { | ||||||
|  |  public: | ||||||
|  |   QuantizeAndDequantize() = default; | ||||||
|  |   // Move only
 | ||||||
|  |   QuantizeAndDequantize(QuantizeAndDequantize&& operation); | ||||||
|  |   QuantizeAndDequantize& operator=(QuantizeAndDequantize&& operation); | ||||||
|  |   QuantizeAndDequantize(const QuantizeAndDequantize&) = delete; | ||||||
|  |   QuantizeAndDequantize& operator=(const QuantizeAndDequantize&) = delete; | ||||||
|  | 
 | ||||||
|  |   void SetLinkIndex(int index) override; | ||||||
|  |   std::string GetCoreCode(const LinkingContext& context) const override; | ||||||
|  |   std::string GetArgsDeclaration() const override; | ||||||
|  |   Status BindArguments(CLKernel* kernel) override; | ||||||
|  | 
 | ||||||
|  |   friend Status CreateQuantizeAndDequantize( | ||||||
|  |       const CreationContext& creation_context, const OperationDef& definition, | ||||||
|  |       const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |       QuantizeAndDequantize* result); | ||||||
|  | 
 | ||||||
|  |  private: | ||||||
|  |   QuantizeAndDequantize(const OperationDef& definition, | ||||||
|  |                         const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |                         CalculationsPrecision scalar_precision); | ||||||
|  | 
 | ||||||
|  |   template <DataType T> | ||||||
|  |   Status UploadParameters(const ::tflite::gpu::Tensor<Linear, T>& parameters, | ||||||
|  |                           CLContext* context); | ||||||
|  | 
 | ||||||
|  |   FLT min_; | ||||||
|  |   FLT max_; | ||||||
|  |   FLT scale_; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | Status CreateQuantizeAndDequantize(const CreationContext& creation_context, | ||||||
|  |                                    const OperationDef& definition, | ||||||
|  |                                    const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |                                    QuantizeAndDequantize* result); | ||||||
|  | 
 | ||||||
|  | template <DataType T> | ||||||
|  | Status QuantizeAndDequantize::UploadParameters( | ||||||
|  |     const ::tflite::gpu::Tensor<Linear, T>& parameters, CLContext* context) { | ||||||
|  |   LinearStorageCreateInfo create_info; | ||||||
|  |   create_info.storage_type = | ||||||
|  |       DeduceLinearStorageType(definition_.GetPrimaryStorageType()); | ||||||
|  |   create_info.data_type = definition_.GetPrimaryDataType(); | ||||||
|  |   return OkStatus(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | }  // namespace cl
 | ||||||
|  | }  // namespace gpu
 | ||||||
|  | }  // namespace tflite
 | ||||||
|  | 
 | ||||||
|  | #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_
 | ||||||
| @ -0,0 +1,182 @@ | |||||||
|  | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | ||||||
|  | 
 | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  | 
 | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | 
 | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | ==============================================================================*/ | ||||||
|  | 
 | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h" | ||||||
|  | 
 | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include <gmock/gmock.h> | ||||||
|  | #include <gtest/gtest.h> | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/operations.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/status.h" | ||||||
|  | #include "tensorflow/lite/kernels/internal/quantization_util.h" | ||||||
|  | 
 | ||||||
|  | using ::testing::FloatNear; | ||||||
|  | using ::testing::Pointwise; | ||||||
|  | 
 | ||||||
|  | namespace tflite { | ||||||
|  | namespace gpu { | ||||||
|  | namespace cl { | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits8) { | ||||||
|  |   TensorFloat32 src_tensor; | ||||||
|  |   src_tensor.shape = BHWC(1, 3, 2, 1); | ||||||
|  |   src_tensor.data = {0.0f, 1.0f, 0.25f, 0.50f, 0.4444444f, 0.00001f}; | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 8; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   for (auto storage : env_.GetSupportedStorages()) { | ||||||
|  |     for (auto precision : env_.GetSupportedPrecisions()) { | ||||||
|  |       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; | ||||||
|  |       OperationDef op_def; | ||||||
|  |       op_def.precision = precision; | ||||||
|  |       auto data_type = DeduceDataTypeFromPrecision(precision); | ||||||
|  |       op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       TensorFloat32 dst_tensor; | ||||||
|  |       QuantizeAndDequantize operation; | ||||||
|  |       ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, | ||||||
|  |                                             &operation)); | ||||||
|  |       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, | ||||||
|  |                                     BHWC(1, 3, 2, 1), &dst_tensor)); | ||||||
|  |       EXPECT_THAT(dst_tensor.data, | ||||||
|  |                   Pointwise(FloatNear(eps), {0.0f, 1.0f, 0.25098f, 0.498039f, | ||||||
|  |                                              0.443137f, 0.0f})); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits8_NegativeRange) { | ||||||
|  |   TensorFloat32 src_tensor; | ||||||
|  |   src_tensor.shape = BHWC(1, 3, 1, 2); | ||||||
|  |   src_tensor.data = {0.0f, -0.9f, 0.25f, 0.50f, 0.4444444f, -0.00001f}; | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 8; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   for (auto storage : env_.GetSupportedStorages()) { | ||||||
|  |     for (auto precision : env_.GetSupportedPrecisions()) { | ||||||
|  |       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; | ||||||
|  |       OperationDef op_def; | ||||||
|  |       op_def.precision = precision; | ||||||
|  |       auto data_type = DeduceDataTypeFromPrecision(precision); | ||||||
|  |       op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       TensorFloat32 dst_tensor; | ||||||
|  |       QuantizeAndDequantize operation; | ||||||
|  |       ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, | ||||||
|  |                                             &operation)); | ||||||
|  |       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, | ||||||
|  |                                     BHWC(1, 3, 1, 2), &dst_tensor)); | ||||||
|  |       EXPECT_THAT(dst_tensor.data, | ||||||
|  |                   Pointwise(FloatNear(eps), {0.0f, -0.896471f, 0.247059f, | ||||||
|  |                                              0.501176f, 0.444706f, 0.0f})); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits16) { | ||||||
|  |   TensorFloat32 src_tensor; | ||||||
|  |   src_tensor.shape = BHWC(1, 3, 1, 2); | ||||||
|  |   src_tensor.data = {0.0f, 1.0f, 0.25f, 0.50f, 0.4444444f, 0.00001f}; | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 16; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   for (auto storage : env_.GetSupportedStorages()) { | ||||||
|  |     for (auto precision : env_.GetSupportedPrecisions()) { | ||||||
|  |       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f; | ||||||
|  |       OperationDef op_def; | ||||||
|  |       op_def.precision = precision; | ||||||
|  |       auto data_type = DeduceDataTypeFromPrecision(precision); | ||||||
|  |       op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       TensorFloat32 dst_tensor; | ||||||
|  |       QuantizeAndDequantize operation; | ||||||
|  |       ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, | ||||||
|  |                                             &operation)); | ||||||
|  |       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, | ||||||
|  |                                     BHWC(1, 3, 1, 2), &dst_tensor)); | ||||||
|  |       EXPECT_THAT(dst_tensor.data, | ||||||
|  |                   Pointwise(FloatNear(eps), {0.0f, 1.0f, 0.250004f, 0.500008f, | ||||||
|  |                                              0.44445f, 1.5259e-05f})); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits16_NegativeRange) { | ||||||
|  |   TensorFloat32 src_tensor; | ||||||
|  |   src_tensor.shape = BHWC(1, 3, 2, 1); | ||||||
|  |   src_tensor.data = {0.0f, -0.9f, 0.25f, 0.50f, 0.4444444f, -0.00001f}; | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 16; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   for (auto storage : env_.GetSupportedStorages()) { | ||||||
|  |     for (auto precision : env_.GetSupportedPrecisions()) { | ||||||
|  |       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; | ||||||
|  |       OperationDef op_def; | ||||||
|  |       op_def.precision = precision; | ||||||
|  |       auto data_type = DeduceDataTypeFromPrecision(precision); | ||||||
|  |       op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); | ||||||
|  |       TensorFloat32 dst_tensor; | ||||||
|  |       QuantizeAndDequantize operation; | ||||||
|  |       ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, | ||||||
|  |                                             &operation)); | ||||||
|  |       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, | ||||||
|  |                                     BHWC(1, 3, 2, 1), &dst_tensor)); | ||||||
|  |       EXPECT_THAT(dst_tensor.data, | ||||||
|  |                   Pointwise(FloatNear(eps), {0.0f, -0.900014f, 0.249998f, | ||||||
|  |                                              0.499995f, 0.444431f, 0.0f})); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | }  // namespace
 | ||||||
|  | }  // namespace cl
 | ||||||
|  | }  // namespace gpu
 | ||||||
|  | }  // namespace tflite
 | ||||||
| @ -132,6 +132,7 @@ cc_library( | |||||||
|         "//tensorflow/lite/delegates/gpu/cl/kernels:padding", |         "//tensorflow/lite/delegates/gpu/cl/kernels:padding", | ||||||
|         "//tensorflow/lite/delegates/gpu/cl/kernels:pooling", |         "//tensorflow/lite/delegates/gpu/cl/kernels:pooling", | ||||||
|         "//tensorflow/lite/delegates/gpu/cl/kernels:prelu", |         "//tensorflow/lite/delegates/gpu/cl/kernels:prelu", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/cl/kernels:quantize_and_dequantize", | ||||||
|         "//tensorflow/lite/delegates/gpu/cl/kernels:relu", |         "//tensorflow/lite/delegates/gpu/cl/kernels:relu", | ||||||
|         "//tensorflow/lite/delegates/gpu/cl/kernels:reshape", |         "//tensorflow/lite/delegates/gpu/cl/kernels:reshape", | ||||||
|         "//tensorflow/lite/delegates/gpu/cl/kernels:reshapex4", |         "//tensorflow/lite/delegates/gpu/cl/kernels:reshapex4", | ||||||
|  | |||||||
| @ -279,6 +279,12 @@ Status GPUOperationFromNode(const CreationContext& creation_context, | |||||||
|       auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes); |       auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes); | ||||||
|       return SelectPReLU(attr, creation_context, op_def, gpu_op); |       return SelectPReLU(attr, creation_context, op_def, gpu_op); | ||||||
|     } |     } | ||||||
|  |     case OperationType::QUANTIZE_AND_DEQUANTIZE: { | ||||||
|  |       auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>( | ||||||
|  |           node.operation.attributes); | ||||||
|  |       return SelectQuantizeAndDequantize(attr, creation_context, op_def, | ||||||
|  |                                          gpu_op); | ||||||
|  |     } | ||||||
|     case OperationType::RELU: { |     case OperationType::RELU: { | ||||||
|       auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes); |       auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes); | ||||||
|       SelectReLU(creation_context, attr, op_def, gpu_op); |       SelectReLU(creation_context, attr, op_def, gpu_op); | ||||||
|  | |||||||
| @ -29,6 +29,7 @@ limitations under the License. | |||||||
| #include "tensorflow/lite/delegates/gpu/cl/kernels/padding.h" | #include "tensorflow/lite/delegates/gpu/cl/kernels/padding.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/cl/kernels/pooling.h" | #include "tensorflow/lite/delegates/gpu/cl/kernels/pooling.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/cl/kernels/prelu.h" | #include "tensorflow/lite/delegates/gpu/cl/kernels/prelu.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/cl/kernels/relu.h" | #include "tensorflow/lite/delegates/gpu/cl/kernels/relu.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/cl/kernels/reshape.h" | #include "tensorflow/lite/delegates/gpu/cl/kernels/reshape.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h" | #include "tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h" | ||||||
| @ -218,6 +219,17 @@ Status SelectWinograd36To4x4( | |||||||
|   return OkStatus(); |   return OkStatus(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Status SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |                                    const CreationContext& creation_context, | ||||||
|  |                                    const OperationDef& op_def, | ||||||
|  |                                    std::unique_ptr<GPUOperation>* ptr) { | ||||||
|  |   QuantizeAndDequantize operation; | ||||||
|  |   RETURN_IF_ERROR( | ||||||
|  |       CreateQuantizeAndDequantize(creation_context, op_def, attr, &operation)); | ||||||
|  |   *ptr = absl::make_unique<QuantizeAndDequantize>(std::move(operation)); | ||||||
|  |   return OkStatus(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| }  // namespace cl
 | }  // namespace cl
 | ||||||
| }  // namespace gpu
 | }  // namespace gpu
 | ||||||
| }  // namespace tflite
 | }  // namespace tflite
 | ||||||
|  | |||||||
| @ -100,6 +100,11 @@ Status SelectWinograd36To4x4( | |||||||
|     const ::tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases, |     const ::tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases, | ||||||
|     std::unique_ptr<GPUOperation>* ptr); |     std::unique_ptr<GPUOperation>* ptr); | ||||||
| 
 | 
 | ||||||
|  | Status SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr, | ||||||
|  |                                    const CreationContext& creation_context, | ||||||
|  |                                    const OperationDef& op_def, | ||||||
|  |                                    std::unique_ptr<GPUOperation>* ptr); | ||||||
|  | 
 | ||||||
| }  // namespace cl
 | }  // namespace cl
 | ||||||
| }  // namespace gpu
 | }  // namespace gpu
 | ||||||
| }  // namespace tflite
 | }  // namespace tflite
 | ||||||
|  | |||||||
| @ -118,6 +118,8 @@ std::string ToString(enum OperationType op) { | |||||||
|       return "pow"; |       return "pow"; | ||||||
|     case OperationType::PRELU: |     case OperationType::PRELU: | ||||||
|       return "prelu"; |       return "prelu"; | ||||||
|  |     case OperationType::QUANTIZE_AND_DEQUANTIZE: | ||||||
|  |       return "quantize_and_dequantize"; | ||||||
|     case OperationType::RELU: |     case OperationType::RELU: | ||||||
|       return "relu"; |       return "relu"; | ||||||
|     case OperationType::RESHAPE: |     case OperationType::RESHAPE: | ||||||
| @ -183,6 +185,7 @@ OperationType OperationTypeFromString(const std::string& name) { | |||||||
|           {"pooling_2d", OperationType::POOLING_2D}, |           {"pooling_2d", OperationType::POOLING_2D}, | ||||||
|           {"pow", OperationType::POW}, |           {"pow", OperationType::POW}, | ||||||
|           {"prelu", OperationType::PRELU}, |           {"prelu", OperationType::PRELU}, | ||||||
|  |           {"quantize_and_dequantize", OperationType::QUANTIZE_AND_DEQUANTIZE}, | ||||||
|           {"relu", OperationType::RELU}, |           {"relu", OperationType::RELU}, | ||||||
|           {"resize", OperationType::RESIZE}, |           {"resize", OperationType::RESIZE}, | ||||||
|           {"reshape", OperationType::RESHAPE}, |           {"reshape", OperationType::RESHAPE}, | ||||||
|  | |||||||
| @ -57,6 +57,8 @@ enum class OperationType { | |||||||
|   POOLING_2D, |   POOLING_2D, | ||||||
|   POW, |   POW, | ||||||
|   PRELU, |   PRELU, | ||||||
|  |   // Used to accurately run inference on quantized models.
 | ||||||
|  |   QUANTIZE_AND_DEQUANTIZE, | ||||||
|   RELU, |   RELU, | ||||||
|   RESHAPE, |   RESHAPE, | ||||||
|   RESIZE, |   RESIZE, | ||||||
| @ -478,6 +480,14 @@ struct SpaceToDepthAttributes { | |||||||
|   int block_size; |   int block_size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | // These help perform a combination of Quantize & Dequantize to adjust float
 | ||||||
|  | // values like quantized inference would.
 | ||||||
|  | struct QuantizeAndDequantizeAttributes { | ||||||
|  |   float min = 0; | ||||||
|  |   float max = 0; | ||||||
|  |   float scale = 0; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| }  // namespace gpu
 | }  // namespace gpu
 | ||||||
| }  // namespace tflite
 | }  // namespace tflite
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -451,6 +451,38 @@ cc_test( | |||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | cc_library( | ||||||
|  |     name = "quantize_and_dequantize", | ||||||
|  |     srcs = ["quantize_and_dequantize.cc"], | ||||||
|  |     hdrs = ["quantize_and_dequantize.h"], | ||||||
|  |     deps = [ | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:convert", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:data_type", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:operations", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:shape", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:status", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:types", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/gl:node_shader", | ||||||
|  |         "@com_google_absl//absl/memory", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_test( | ||||||
|  |     name = "quantize_and_dequantize_test", | ||||||
|  |     srcs = ["quantize_and_dequantize_test.cc"], | ||||||
|  |     tags = tf_gpu_tests_tags() + [ | ||||||
|  |         "notap", | ||||||
|  |         "tflite_not_portable_ios", | ||||||
|  |     ], | ||||||
|  |     deps = [ | ||||||
|  |         ":quantize_and_dequantize", | ||||||
|  |         ":test_util", | ||||||
|  |         "//tensorflow/lite/delegates/gpu/common:operations", | ||||||
|  |         "//tensorflow/lite/kernels/internal:quantization_util", | ||||||
|  |         "@com_google_googletest//:gtest", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| cc_library( | cc_library( | ||||||
|     name = "relu", |     name = "relu", | ||||||
|     srcs = ["relu.cc"], |     srcs = ["relu.cc"], | ||||||
| @ -699,6 +731,7 @@ TFLITE_GPU_BINARY_RELEASE_OPERATORS = [ | |||||||
|     "pad", |     "pad", | ||||||
|     "pooling", |     "pooling", | ||||||
|     "prelu", |     "prelu", | ||||||
|  |     "quantize_and_dequantize", | ||||||
|     "relu", |     "relu", | ||||||
|     "mean", |     "mean", | ||||||
|     "reshape", |     "reshape", | ||||||
|  | |||||||
| @ -0,0 +1,74 @@ | |||||||
|  | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | ||||||
|  | 
 | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  | 
 | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | 
 | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | ==============================================================================*/ | ||||||
|  | 
 | ||||||
|  | #include "tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h" | ||||||
|  | 
 | ||||||
|  | #include <memory> | ||||||
|  | #include <string> | ||||||
|  | 
 | ||||||
|  | #include "absl/memory/memory.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/data_type.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/shape.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/status.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/types.h" | ||||||
|  | 
 | ||||||
|  | namespace tflite { | ||||||
|  | namespace gpu { | ||||||
|  | namespace gl { | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | class QuantizeAndDequantize : public NodeShader { | ||||||
|  |  public: | ||||||
|  |   Status GenerateCode(const GenerationContext& ctx, | ||||||
|  |                       GeneratedCode* generated_code) const final { | ||||||
|  |     std::string code; | ||||||
|  |     // Constants
 | ||||||
|  |     code += "vec4 scale = vec4($quant_scale$);"; | ||||||
|  |     code += "vec4 min_bound = vec4($quant_min$);"; | ||||||
|  |     code += "vec4 max_bound = vec4($quant_max$);"; | ||||||
|  |     // Quantize
 | ||||||
|  |     code += "value_0 = clamp(value_0, min_bound, max_bound);"; | ||||||
|  |     code += "value_0 = (value_0 - min_bound) / scale;"; | ||||||
|  |     code += "value_0 = floor(value_0 + vec4(0.5));"; | ||||||
|  |     // Dequantize
 | ||||||
|  |     code += "value_0 = value_0 * scale + min_bound;"; | ||||||
|  | 
 | ||||||
|  |     auto attr = absl::any_cast<const QuantizeAndDequantizeAttributes&>( | ||||||
|  |         ctx.node->operation.attributes); | ||||||
|  |     *generated_code = { | ||||||
|  |         /*parameters=*/{{"quant_min", attr.min}, | ||||||
|  |                         {"quant_max", attr.max}, | ||||||
|  |                         {"quant_scale", attr.scale}}, | ||||||
|  |         /*objects=*/{}, | ||||||
|  |         /*shared_variables=*/{}, | ||||||
|  |         /*workload=*/uint3(), | ||||||
|  |         /*workgroup=*/uint3(), | ||||||
|  |         /*source_code=*/code, | ||||||
|  |         /*input=*/IOStructure::AUTO, | ||||||
|  |         /*output=*/IOStructure::AUTO, | ||||||
|  |     }; | ||||||
|  |     return OkStatus(); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | }  // namespace
 | ||||||
|  | 
 | ||||||
|  | std::unique_ptr<NodeShader> NewQuantizeAndDequantizeNodeShader() { | ||||||
|  |   return absl::make_unique<QuantizeAndDequantize>(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | }  // namespace gl
 | ||||||
|  | }  // namespace gpu
 | ||||||
|  | }  // namespace tflite
 | ||||||
| @ -0,0 +1,47 @@ | |||||||
|  | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | ||||||
|  | 
 | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  | 
 | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | 
 | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | ==============================================================================*/ | ||||||
|  | 
 | ||||||
|  | #ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ | ||||||
|  | #define TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_ | ||||||
|  | 
 | ||||||
|  | #include <memory> | ||||||
|  | 
 | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/operations.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/gl/node_shader.h" | ||||||
|  | 
 | ||||||
|  | namespace tflite { | ||||||
|  | namespace gpu { | ||||||
|  | namespace gl { | ||||||
|  | 
 | ||||||
|  | // Performs the operation: {Quantize, Dequantize} on floating-point data.
 | ||||||
|  | // We need this operation to emulate the error introduced by quantization
 | ||||||
|  | // on the GPU, which cannot represent int8 tensors.
 | ||||||
|  | //
 | ||||||
|  | // Implemented as:
 | ||||||
|  | // qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale) + 0.5)
 | ||||||
|  | // dq_value = qvalue * qscale + qmin
 | ||||||
|  | // Here, qmin, qmax & qscale refer to the quantization values as implemented in
 | ||||||
|  | // TensorFlow Lite's 'FakeQuant' kernel. round(x + 0.5) ensures we round away
 | ||||||
|  | // from zero.
 | ||||||
|  | //
 | ||||||
|  | // NOTE: We do not need to nudge min/max values in this op, since they would
 | ||||||
|  | // already be adjusted while generating the quantized model.
 | ||||||
|  | std::unique_ptr<NodeShader> NewQuantizeAndDequantizeNodeShader(); | ||||||
|  | 
 | ||||||
|  | }  // namespace gl
 | ||||||
|  | }  // namespace gpu
 | ||||||
|  | }  // namespace tflite
 | ||||||
|  | 
 | ||||||
|  | #endif  // TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_
 | ||||||
| @ -0,0 +1,159 @@ | |||||||
|  | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 | ||||||
|  | 
 | ||||||
|  | Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | you may not use this file except in compliance with the License. | ||||||
|  | You may obtain a copy of the License at | ||||||
|  | 
 | ||||||
|  |     http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | 
 | ||||||
|  | Unless required by applicable law or agreed to in writing, software | ||||||
|  | distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | See the License for the specific language governing permissions and | ||||||
|  | limitations under the License. | ||||||
|  | ==============================================================================*/ | ||||||
|  | 
 | ||||||
|  | #include "tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h" | ||||||
|  | 
 | ||||||
|  | #include <gmock/gmock.h> | ||||||
|  | #include <gtest/gtest.h> | ||||||
|  | #include "tensorflow/lite/delegates/gpu/common/operations.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/gl/kernels/test_util.h" | ||||||
|  | #include "tensorflow/lite/kernels/internal/quantization_util.h" | ||||||
|  | 
 | ||||||
|  | using ::testing::FloatNear; | ||||||
|  | using ::testing::Pointwise; | ||||||
|  | 
 | ||||||
|  | namespace tflite { | ||||||
|  | namespace gpu { | ||||||
|  | namespace gl { | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | TEST(QuantizeAndDequantizeTest, Dim2Bits8) { | ||||||
|  |   TensorRef<BHWC> input; | ||||||
|  |   input.type = DataType::FLOAT32; | ||||||
|  |   input.ref = 0; | ||||||
|  |   input.shape = BHWC(1, 3, 2, 1); | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 8; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   TensorRef<BHWC> output; | ||||||
|  |   output.type = DataType::FLOAT32; | ||||||
|  |   output.ref = 1; | ||||||
|  |   output.shape = BHWC(1, 3, 2, 1); | ||||||
|  | 
 | ||||||
|  |   SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, | ||||||
|  |                       {input}, {output}); | ||||||
|  |   ASSERT_TRUE( | ||||||
|  |       model.PopulateTensor(0, {0.0, 1.0, 0.25, 0.50, 0.4444444, 0.00001})); | ||||||
|  |   ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); | ||||||
|  |   EXPECT_THAT(model.GetOutput(0), | ||||||
|  |               Pointwise(FloatNear(1e-6), | ||||||
|  |                         {0.0f, 1.0f, 0.25098f, 0.498039f, 0.443137f, 0.0f})); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST(QuantizeAndDequantizeTest, Dim3Bits8_NegativeRange) { | ||||||
|  |   TensorRef<BHWC> input; | ||||||
|  |   input.type = DataType::FLOAT32; | ||||||
|  |   input.ref = 0; | ||||||
|  |   input.shape = BHWC(1, 3, 1, 2); | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 8; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   TensorRef<BHWC> output; | ||||||
|  |   output.type = DataType::FLOAT32; | ||||||
|  |   output.ref = 1; | ||||||
|  |   output.shape = BHWC(1, 3, 1, 2); | ||||||
|  | 
 | ||||||
|  |   SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, | ||||||
|  |                       {input}, {output}); | ||||||
|  |   ASSERT_TRUE( | ||||||
|  |       model.PopulateTensor(0, {0.0, -0.9, 0.25, 0.50, 0.4444444, -0.00001})); | ||||||
|  |   ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); | ||||||
|  |   EXPECT_THAT(model.GetOutput(0), | ||||||
|  |               Pointwise(FloatNear(1e-6), {0.0f, -0.896471f, 0.247059f, | ||||||
|  |                                           0.501176f, 0.444706f, 0.0f})); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST(QuantizeAndDequantizeTest, Dim3Bits16) { | ||||||
|  |   TensorRef<BHWC> input; | ||||||
|  |   input.type = DataType::FLOAT32; | ||||||
|  |   input.ref = 0; | ||||||
|  |   input.shape = BHWC(1, 3, 1, 2); | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 16; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   TensorRef<BHWC> output; | ||||||
|  |   output.type = DataType::FLOAT32; | ||||||
|  |   output.ref = 1; | ||||||
|  |   output.shape = BHWC(1, 3, 1, 2); | ||||||
|  | 
 | ||||||
|  |   SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, | ||||||
|  |                       {input}, {output}); | ||||||
|  |   ASSERT_TRUE( | ||||||
|  |       model.PopulateTensor(0, {0.0, 1.0, 0.25, 0.50, 0.4444444, 0.00001})); | ||||||
|  |   ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); | ||||||
|  |   EXPECT_THAT(model.GetOutput(0), | ||||||
|  |               Pointwise(FloatNear(1e-6), {0.0f, 1.0f, 0.250004f, 0.500008f, | ||||||
|  |                                           0.44445f, 1.5259e-05f})); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TEST(QuantizeAndDequantizeTest, Dim2Bits16_NegativeRange) { | ||||||
|  |   TensorRef<BHWC> input; | ||||||
|  |   input.type = DataType::FLOAT32; | ||||||
|  |   input.ref = 0; | ||||||
|  |   input.shape = BHWC(1, 3, 2, 1); | ||||||
|  | 
 | ||||||
|  |   // Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
 | ||||||
|  |   // pre-nudged, since this should be done during model conversion.
 | ||||||
|  |   const int num_bits = 16; | ||||||
|  |   const int quant_min = 0; | ||||||
|  |   const int quant_max = (1 << num_bits) - 1; | ||||||
|  |   QuantizeAndDequantizeAttributes attr; | ||||||
|  |   NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, | ||||||
|  |                          quant_min, quant_max, &attr.min, &attr.max, | ||||||
|  |                          &attr.scale); | ||||||
|  | 
 | ||||||
|  |   TensorRef<BHWC> output; | ||||||
|  |   output.type = DataType::FLOAT32; | ||||||
|  |   output.ref = 1; | ||||||
|  |   output.shape = BHWC(1, 3, 2, 1); | ||||||
|  | 
 | ||||||
|  |   SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, | ||||||
|  |                       {input}, {output}); | ||||||
|  |   ASSERT_TRUE( | ||||||
|  |       model.PopulateTensor(0, {0.0, -0.9, 0.25, 0.50, 0.4444444, -0.00001})); | ||||||
|  |   ASSERT_OK(model.Invoke(*NewQuantizeAndDequantizeNodeShader())); | ||||||
|  |   EXPECT_THAT(model.GetOutput(0), | ||||||
|  |               Pointwise(FloatNear(1e-6), {0.0f, -0.900014f, 0.249998f, | ||||||
|  |                                           0.499995f, 0.444431f, 0.0f})); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | }  // namespace
 | ||||||
|  | }  // namespace gl
 | ||||||
|  | }  // namespace gpu
 | ||||||
|  | }  // namespace tflite
 | ||||||
| @ -40,6 +40,7 @@ limitations under the License. | |||||||
| #include "tensorflow/lite/delegates/gpu/gl/kernels/pad.h" | #include "tensorflow/lite/delegates/gpu/gl/kernels/pad.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/gl/kernels/pooling.h" | #include "tensorflow/lite/delegates/gpu/gl/kernels/pooling.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/gl/kernels/prelu.h" | #include "tensorflow/lite/delegates/gpu/gl/kernels/prelu.h" | ||||||
|  | #include "tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/gl/kernels/relu.h" | #include "tensorflow/lite/delegates/gpu/gl/kernels/relu.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/gl/kernels/reshape.h" | #include "tensorflow/lite/delegates/gpu/gl/kernels/reshape.h" | ||||||
| #include "tensorflow/lite/delegates/gpu/gl/kernels/resize.h" | #include "tensorflow/lite/delegates/gpu/gl/kernels/resize.h" | ||||||
| @ -85,6 +86,8 @@ class Registry : public NodeShader { | |||||||
|     insert_op(Type::PAD, NewPadNodeShader); |     insert_op(Type::PAD, NewPadNodeShader); | ||||||
|     insert_op(Type::POOLING_2D, NewPoolingNodeShader); |     insert_op(Type::POOLING_2D, NewPoolingNodeShader); | ||||||
|     insert_op(Type::PRELU, NewPReLUNodeShader); |     insert_op(Type::PRELU, NewPReLUNodeShader); | ||||||
|  |     insert_op(Type::QUANTIZE_AND_DEQUANTIZE, | ||||||
|  |               NewQuantizeAndDequantizeNodeShader); | ||||||
|     insert_op(Type::RELU, NewReLUNodeShader); |     insert_op(Type::RELU, NewReLUNodeShader); | ||||||
|     insert_op(Type::RESIZE, NewResizeNodeShader); |     insert_op(Type::RESIZE, NewResizeNodeShader); | ||||||
|     insert_op(Type::RESHAPE, NewReshapeNodeShader); |     insert_op(Type::RESHAPE, NewReshapeNodeShader); | ||||||
|  | |||||||
| @ -305,6 +305,7 @@ Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, | |||||||
|     case OperationType::BATCH_TO_SPACE: |     case OperationType::BATCH_TO_SPACE: | ||||||
|     case OperationType::CONST: |     case OperationType::CONST: | ||||||
|     case OperationType::LSTM: |     case OperationType::LSTM: | ||||||
|  |     case OperationType::QUANTIZE_AND_DEQUANTIZE: | ||||||
|     case OperationType::SPACE_TO_BATCH: |     case OperationType::SPACE_TO_BATCH: | ||||||
|     case OperationType::TRANSPOSE: |     case OperationType::TRANSPOSE: | ||||||
|     case OperationType::UNKNOWN: |     case OperationType::UNKNOWN: | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user