Adds QuantizeAndDequantize kernel to Metal backend
* Fixed comment to match implementation. PiperOrigin-RevId: 314684033 Change-Id: Ice38717811c8a5f6abf4a4fb65ecbdf7b9ed42ca
This commit is contained in:
parent
1421933a1d
commit
282f234242
@ -37,11 +37,10 @@ namespace cl {
|
||||
// on the GPU, which cannot represent int8 tensors.
|
||||
//
|
||||
// Implemented as:
|
||||
// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale) + 0.5)
|
||||
// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale))
|
||||
// dq_value = qvalue * qscale + qmin
|
||||
// Here, qmin, qmax & qscale refer to the quantization values as implemented in
|
||||
// TensorFlow Lite's 'FakeQuant' kernel. round(x + 0.5) ensures we round away
|
||||
// from zero.
|
||||
// TensorFlow Lite's 'FakeQuant' kernel.
|
||||
//
|
||||
// NOTE: We do not need to nudge min/max values in this op, since they would
|
||||
// already be adjusted while generating the quantized model.
|
||||
|
@ -30,11 +30,10 @@ namespace gl {
|
||||
// on the GPU, which cannot represent int8 tensors.
|
||||
//
|
||||
// Implemented as:
|
||||
// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale) + 0.5)
|
||||
// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale))
|
||||
// dq_value = qvalue * qscale + qmin
|
||||
// Here, qmin, qmax & qscale refer to the quantization values as implemented in
|
||||
// TensorFlow Lite's 'FakeQuant' kernel. round(x + 0.5) ensures we round away
|
||||
// from zero.
|
||||
// TensorFlow Lite's 'FakeQuant' kernel.
|
||||
//
|
||||
// NOTE: We do not need to nudge min/max values in this op, since they would
|
||||
// already be adjusted while generating the quantized model.
|
||||
|
@ -39,6 +39,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/pooling.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/prelu.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/relu.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/reshape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/resize.h"
|
||||
@ -96,6 +97,12 @@ std::vector<ComputeTaskDescriptorPtr> SelectConvolutionTransposed(
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ComputeTaskDescriptorPtr> SelectQuantizeAndDequantize(
|
||||
int id, ValueId input_id, ValueId output_id,
|
||||
const QuantizeAndDequantizeAttributes& attr) {
|
||||
return QuantizeAndDequantize(id, input_id, output_id, attr);
|
||||
}
|
||||
|
||||
std::vector<ComputeTaskDescriptorPtr> SelectPReLU(
|
||||
const GraphFloat32& graph, int id, ValueId input_id, ValueId output_id,
|
||||
const PReLUAttributes& attr, const metal::RuntimeOptions& options) {
|
||||
@ -351,6 +358,12 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
|
||||
*tasks = ReLU(node_id, inputs[0], outputs[0],
|
||||
absl::any_cast<ReLUAttributes>(node->operation.attributes));
|
||||
break;
|
||||
case OperationType::QUANTIZE_AND_DEQUANTIZE:
|
||||
*tasks = SelectQuantizeAndDequantize(
|
||||
node_id, inputs[0], outputs[0],
|
||||
absl::any_cast<QuantizeAndDequantizeAttributes>(
|
||||
node->operation.attributes));
|
||||
break;
|
||||
case OperationType::RESHAPE:
|
||||
*tasks = SelectReshape(
|
||||
graph, node_id, inputs[0], outputs[0],
|
||||
@ -427,7 +440,6 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
|
||||
case OperationType::BATCH_TO_SPACE:
|
||||
case OperationType::CONST:
|
||||
case OperationType::LSTM:
|
||||
case OperationType::QUANTIZE_AND_DEQUANTIZE:
|
||||
case OperationType::SPACE_TO_BATCH:
|
||||
case OperationType::TRANSPOSE:
|
||||
case OperationType::UNKNOWN:
|
||||
|
@ -31,6 +31,7 @@ cc_library(
|
||||
":padding",
|
||||
":pooling",
|
||||
":prelu",
|
||||
":quantize_and_dequantize",
|
||||
":relu",
|
||||
":reshape",
|
||||
":resize",
|
||||
@ -539,6 +540,53 @@ ios_unit_test(
|
||||
deps = [":prelu_test_lib"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "quantize_and_dequantize",
|
||||
srcs = ["quantize_and_dequantize.cc"],
|
||||
hdrs = ["quantize_and_dequantize.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
|
||||
"//tensorflow/lite/delegates/gpu/metal:runtime_options",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
objc_library(
|
||||
name = "quantize_and_dequantize_test_lib",
|
||||
testonly = 1,
|
||||
srcs = ["quantize_and_dequantize_test.mm"],
|
||||
sdk_frameworks = ["XCTest"],
|
||||
deps = [
|
||||
":quantize_and_dequantize",
|
||||
":test_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
|
||||
"//tensorflow/lite/delegates/gpu/metal:runtime_options",
|
||||
"//tensorflow/lite/kernels/internal:quantization_util",
|
||||
],
|
||||
)
|
||||
|
||||
ios_unit_test(
|
||||
name = "quantize_and_dequantize_test",
|
||||
testonly = 1,
|
||||
minimum_os_version = "11.0",
|
||||
runner = tflite_ios_lab_runner("IOS_LATEST"),
|
||||
tags = tf_gpu_tests_tags() + [
|
||||
"notap",
|
||||
"tflite_not_portable_android",
|
||||
],
|
||||
deps = [":quantize_and_dequantize_test_lib"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "relu",
|
||||
srcs = ["relu.cc"],
|
||||
|
@ -0,0 +1,54 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize.h"
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
std::vector<ComputeTaskDescriptorPtr> QuantizeAndDequantize(
|
||||
int id, ValueId input_id, ValueId output_id,
|
||||
const QuantizeAndDequantizeAttributes& attr) {
|
||||
auto desc = std::make_shared<ComputeTaskDescriptor>();
|
||||
desc->id = id;
|
||||
desc->is_linkable = true;
|
||||
desc->shader_source = R"(
|
||||
FLT4 linkable$0(FLT4 value, int linear_index, uint3 gid, float3 params) {
|
||||
value = clamp(value, FLT4(params.x), FLT4(params.y));
|
||||
value = (value - FLT4(params.x)) / FLT4(params.z);
|
||||
return round(value) * FLT4(params.z) + FLT4(params.x);
|
||||
}
|
||||
)";
|
||||
|
||||
desc->input_buffers = {{input_id}};
|
||||
desc->output_buffer = {output_id};
|
||||
desc->uniform_buffers = {
|
||||
{"constant float3&",
|
||||
[attr](const std::map<ValueId, BHWC>& buffers) {
|
||||
return GetByteBuffer(
|
||||
std::vector<float>{attr.min, attr.max, attr.scale});
|
||||
}},
|
||||
};
|
||||
return {desc};
|
||||
}
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
@ -0,0 +1,50 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
|
||||
// Performs the operation: {Quantize, Dequantize} on floating-point data.
|
||||
// We need this operation to emulate the error introduced by quantization
|
||||
// on the GPU, which cannot represent int8 tensors.
|
||||
//
|
||||
// Implemented as:
|
||||
// qvalue = round((min(qmax, max(qmin, src_val)) - qmin) * (1/qscale))
|
||||
// dq_value = qvalue * qscale + qmin
|
||||
// Here, qmin, qmax & qscale refer to the quantization values as implemented in
|
||||
// TensorFlow Lite's 'FakeQuant' kernel.
|
||||
//
|
||||
// NOTE: We do not need to nudge min/max values in this op, since they would
|
||||
// already be adjusted while generating the quantized model.
|
||||
std::vector<ComputeTaskDescriptorPtr> QuantizeAndDequantize(
|
||||
int id, ValueId input_id, ValueId output_id,
|
||||
const QuantizeAndDequantizeAttributes& attr);
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_QUANTIZE_AND_DEQUANTIZE_H_
|
@ -0,0 +1,167 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#import <XCTest/XCTest.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
using ::tflite::NudgeQuantizationRange;
|
||||
using ::tflite::gpu::DataType;
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::OperationType;
|
||||
using ::tflite::gpu::QuantizeAndDequantizeAttributes;
|
||||
using ::tflite::gpu::TensorRef;
|
||||
using ::tflite::gpu::metal::CompareVectors;
|
||||
using ::tflite::gpu::metal::SingleOpModel;
|
||||
|
||||
// TODO: Add per-op test if possible.
|
||||
@interface QuantizeAndDequantizeTest : XCTestCase
|
||||
@end
|
||||
|
||||
@implementation QuantizeAndDequantizeTest
|
||||
- (void)setUp {
|
||||
[super setUp];
|
||||
}
|
||||
|
||||
- (void)testDim2Bits8 {
|
||||
TensorRef<BHWC> input;
|
||||
input.type = DataType::FLOAT32;
|
||||
input.ref = 0;
|
||||
input.shape = BHWC(1, 3, 2, 1);
|
||||
|
||||
// Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
|
||||
// pre-nudged, since this should be done during model conversion.
|
||||
const int num_bits = 8;
|
||||
const int quant_min = 0;
|
||||
const int quant_max = (1 << num_bits) - 1;
|
||||
QuantizeAndDequantizeAttributes attr;
|
||||
NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, quant_min, quant_max,
|
||||
&attr.min, &attr.max, &attr.scale);
|
||||
|
||||
TensorRef<BHWC> output;
|
||||
output.type = DataType::FLOAT32;
|
||||
output.ref = 1;
|
||||
output.shape = BHWC(1, 3, 2, 1);
|
||||
|
||||
SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, {input}, {output});
|
||||
XCTAssertTrue(model.PopulateTensor(0, {0.0, 1.0, 0.25, 0.50, 0.4444444, 0.00001}));
|
||||
auto status = model.Invoke();
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
std::vector<float> expected_output = {0.0f, 1.0f, 0.25098f, 0.498039f, 0.443137f, 0.0f};
|
||||
status =
|
||||
CompareVectors({0.0f, 1.0f, 0.25098f, 0.498039f, 0.443137f, 0.0f}, model.GetOutput(0), 1e-6f);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testDim3Bits8_NegativeRange {
|
||||
TensorRef<BHWC> input;
|
||||
input.type = DataType::FLOAT32;
|
||||
input.ref = 0;
|
||||
input.shape = BHWC(1, 3, 1, 2);
|
||||
|
||||
// Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
|
||||
// pre-nudged, since this should be done during model conversion.
|
||||
const int num_bits = 8;
|
||||
const int quant_min = 0;
|
||||
const int quant_max = (1 << num_bits) - 1;
|
||||
QuantizeAndDequantizeAttributes attr;
|
||||
NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, quant_min, quant_max,
|
||||
&attr.min, &attr.max, &attr.scale);
|
||||
|
||||
TensorRef<BHWC> output;
|
||||
output.type = DataType::FLOAT32;
|
||||
output.ref = 1;
|
||||
output.shape = BHWC(1, 3, 1, 2);
|
||||
|
||||
SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, {input}, {output});
|
||||
XCTAssertTrue(model.PopulateTensor(0, {0.0, -0.9, 0.25, 0.50, 0.4444444, -0.00001}));
|
||||
auto status = model.Invoke();
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
status = CompareVectors({0.0f, -0.896471f, 0.247059f, 0.501176f, 0.444706f, 0.0f},
|
||||
model.GetOutput(0), 1e-6f);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testDim3Bits16 {
|
||||
TensorRef<BHWC> input;
|
||||
input.type = DataType::FLOAT32;
|
||||
input.ref = 0;
|
||||
input.shape = BHWC(1, 3, 1, 2);
|
||||
|
||||
// Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
|
||||
// pre-nudged, since this should be done during model conversion.
|
||||
const int num_bits = 16;
|
||||
const int quant_min = 0;
|
||||
const int quant_max = (1 << num_bits) - 1;
|
||||
QuantizeAndDequantizeAttributes attr;
|
||||
NudgeQuantizationRange(/**original_min**/ 0.0, /**original_max**/ 1.0, quant_min, quant_max,
|
||||
&attr.min, &attr.max, &attr.scale);
|
||||
|
||||
TensorRef<BHWC> output;
|
||||
output.type = DataType::FLOAT32;
|
||||
output.ref = 1;
|
||||
output.shape = BHWC(1, 3, 1, 2);
|
||||
|
||||
SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, {input}, {output});
|
||||
XCTAssertTrue(model.PopulateTensor(0, {0.0, 1.0, 0.25, 0.50, 0.4444444, 0.00001}));
|
||||
auto status = model.Invoke();
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
status = CompareVectors({0.0f, 1.0f, 0.250004f, 0.500008f, 0.44445f, 1.5259e-05f},
|
||||
model.GetOutput(0), 1e-6f);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testDim2Bits16_NegativeRange {
|
||||
TensorRef<BHWC> input;
|
||||
input.type = DataType::FLOAT32;
|
||||
input.ref = 0;
|
||||
input.shape = BHWC(1, 3, 2, 1);
|
||||
|
||||
// Unlike TFLite's FakeQuant kernel, we assume that the incoming values are
|
||||
// pre-nudged, since this should be done during model conversion.
|
||||
const int num_bits = 16;
|
||||
const int quant_min = 0;
|
||||
const int quant_max = (1 << num_bits) - 1;
|
||||
QuantizeAndDequantizeAttributes attr;
|
||||
NudgeQuantizationRange(/**original_min**/ -0.9, /**original_max**/ 0.9, quant_min, quant_max,
|
||||
&attr.min, &attr.max, &attr.scale);
|
||||
|
||||
TensorRef<BHWC> output;
|
||||
output.type = DataType::FLOAT32;
|
||||
output.ref = 1;
|
||||
output.shape = BHWC(1, 3, 2, 1);
|
||||
|
||||
SingleOpModel model({ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), attr}, {input}, {output});
|
||||
XCTAssertTrue(model.PopulateTensor(0, {0.0, -0.9, 0.25, 0.50, 0.4444444, -0.00001}));
|
||||
auto status = model.Invoke();
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
status =
|
||||
CompareVectors({0.0f, -0.900014f, 0.249998f, 0.499995f, 0.444431f, 0.0f}, model.GetOutput(0),
|
||||
1e-6f);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
@end
|
Loading…
Reference in New Issue
Block a user