From d46aa971be6e7d5a2ec2b9029f38c24bdfb8c277 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Fri, 20 Mar 2020 09:07:06 -0700 Subject: [PATCH] Adds GraphTransformation to add QuantizeAndDequantize nodes in GPU graph PiperOrigin-RevId: 302038856 Change-Id: I009684ea5b611a3bfc05c88b4fd8a40c570cfd86 --- tensorflow/lite/delegates/gpu/common/BUILD | 1 + tensorflow/lite/delegates/gpu/common/model.h | 10 ++ .../gpu/common/transformations/BUILD | 31 ++++ .../transformations/add_quant_adjustments.cc | 110 ++++++++++++ .../transformations/add_quant_adjustments.h | 45 +++++ .../add_quant_adjustments_test.cc | 166 ++++++++++++++++++ 6 files changed, 363 insertions(+) create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index 08945c70d0b..08612e37b3e 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -92,6 +92,7 @@ cc_library( ":tensor", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:any", + "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/model.h b/tensorflow/lite/delegates/gpu/common/model.h index f5aad207168..6989584a24c 100644 --- a/tensorflow/lite/delegates/gpu/common/model.h +++ b/tensorflow/lite/delegates/gpu/common/model.h @@ -24,6 +24,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "absl/types/any.h" +#include "absl/types/optional.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -39,6 +40,13 @@ using ValueId = uint32_t; using NodeId = uint32_t; +// Used to emulate quantized behavior. +struct QuantizationParams { + float min = 0; + float max = 0; + float scale = 0; +}; + // Connects tensor's producer and operation that depends on this tensor. template struct Value { @@ -47,6 +55,8 @@ struct Value { const ValueId id; TensorType tensor; + + absl::optional quant_params; }; struct Operation { diff --git a/tensorflow/lite/delegates/gpu/common/transformations/BUILD b/tensorflow/lite/delegates/gpu/common/transformations/BUILD index d0411473fae..3fe22f540ad 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/BUILD +++ b/tensorflow/lite/delegates/gpu/common/transformations/BUILD @@ -19,6 +19,37 @@ cc_library( ], ) +cc_library( + name = "add_quant_adjustments", + srcs = ["add_quant_adjustments.cc"], + hdrs = ["add_quant_adjustments.h"], + deps = [ + "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:model_transformer", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:status", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:any", + ], +) + +cc_test( + name = "add_quant_adjustments_test", + srcs = ["add_quant_adjustments_test.cc"], + deps = [ + ":add_quant_adjustments", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:model_transformer", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:shape", + "@com_google_absl//absl/types:any", + "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "fuse_add_to_conv", srcs = ["fuse_add_to_conv.cc"], diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc new file mode 100644 index 00000000000..872c4bcd903 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc @@ -0,0 +1,110 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h" + +#include + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/types/any.h" +#include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" + +namespace tflite { +namespace gpu { + +class AddQuantAdjustments : public NodeTransformation { + public: + TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final { + if (node->operation.type == + ToString(OperationType::QUANTIZE_AND_DEQUANTIZE)) { + return {TransformStatus::SKIPPED, ""}; + } + + bool transform_applied = false; + auto node_outputs = graph->FindOutputs(node->id); + for (auto output_value : node_outputs) { + // Skip if quantization doesn't apply. + if (!output_value->quant_params) continue; + auto consumers = graph->FindConsumers(output_value->id); + // No need to do anything if this isn't consumed by another node. + if (consumers.empty()) { + continue; + } + + // Add a new QuantizeAndDequantize node. + auto* quant_and_dequant_node = graph->NewNode(); + quant_and_dequant_node->operation.type = + ToString(OperationType::QUANTIZE_AND_DEQUANTIZE); + QuantizeAndDequantizeAttributes attr; + attr.min = output_value->quant_params.value().min; + attr.max = output_value->quant_params.value().max; + attr.scale = output_value->quant_params.value().scale; + quant_and_dequant_node->operation.attributes = attr; + + // Add one output Value for the new node. + // The tensor information should rename the same. + Value>* adjusted_value = graph->NewValue(); + adjusted_value->tensor = output_value->tensor; + Status status = + graph->SetProducer(quant_and_dequant_node->id, adjusted_value->id); + if (!status.ok()) { + return {TransformStatus::INVALID, + "Could not create QuantizeAndDequantize node."}; + } + + // Replace output_value with adjusted_value on all consumers. + for (auto& consumer : consumers) { + status = graph->ReplaceInput(consumer->id, output_value->id, + adjusted_value->id); + if (!status.ok()) { + return {TransformStatus::INVALID, + absl::StrCat( + "Failed to associate quant-adjusted value for consumer: ", + status.message())}; + } + } + + // Add QuantizeAndDequantize node as a consumer of output_value. + status = graph->AddConsumer(quant_and_dequant_node->id, output_value->id); + if (!status.ok()) { + return {TransformStatus::INVALID, + absl::StrCat( + "Could not associate output to QuantizeAndDequantize: ", + status.message())}; + } + + // Remove quant params on output_value, to make the transformation + // idempotent. + output_value->quant_params.reset(); + transform_applied = true; + } + + if (transform_applied) { + return {TransformStatus::APPLIED, ""}; + } + return {TransformStatus::SKIPPED, ""}; + } +}; + +std::unique_ptr NewAddQuantAdjustments() { + return absl::make_unique(); +} + +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h new file mode 100644 index 00000000000..6eb4aaaf029 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h @@ -0,0 +1,45 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_ADD_QUANT_ADJUSTMENTS_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_ADD_QUANT_ADJUSTMENTS_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" + +namespace tflite { +namespace gpu { + +// This pass is used to support inference on quantized models with the GPU +// delegate. +// +// When delegating quantized models, we still run float-point inference on GPU +// under-the-hood. This is done by dequantizing inputs (at runtime) & constants +// (during delegation). +// However, intermediate tensors can still deviate from the original quantized +// inference, since activations may not follow the attributes set by the +// original quantizion parameters. +// To prevent this, we add "QuantizeAndDequantize" nodes for each node-output +// that was originally fixed-point: +// op1 -> op2 +// becomes +// op1 -> QuantizeAndDequantize -> op2 +std::unique_ptr NewAddQuantAdjustments(); + +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_ADD_QUANT_ADJUSTMENTS_H_ diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc new file mode 100644 index 00000000000..fc0913d2494 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc @@ -0,0 +1,166 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h" + +#include +#include +#include "absl/types/any.h" +#include "absl/types/optional.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" + +namespace tflite { +namespace gpu { +namespace { + +void AddQuantParams(absl::optional* params, float min, + float max, float scale) { + params->emplace(); + params->value().min = min; + params->value().max = max; + params->value().scale = scale; +} + +// Scenario: +// -> Add -> +// +// Since there is only one node output with no consumers, no new node should be +// added. +TEST(AddQuantAdjustments, OneNode) { + GraphFloat32 graph; + auto input = graph.NewValue(); + input->tensor.shape = BHWC(1, 4, 4, 8); + AddQuantParams(&input->quant_params, /*min=*/0.0, /*max=*/1.0, + /*scale=*/0.004); + + Tensor add_tensor; + add_tensor.shape = Linear(8); + add_tensor.data.resize(8); + AddAttributes add_attr; + add_attr.param = add_tensor; + auto add_node = graph.NewNode(); + add_node->operation.type = ToString(OperationType::ADD); + add_node->operation.attributes = add_attr; + + ASSERT_TRUE(graph.AddConsumer(add_node->id, input->id).ok()); + + Value>* output; + AddQuantParams(&input->quant_params, /*min=*/0.0, /*max=*/2.0, + /*scale=*/0.008); + ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok()); + output->tensor.shape = BHWC(1, 4, 4, 8); + + ASSERT_EQ(1, graph.nodes().size()); + ASSERT_EQ(2, graph.values().size()); + + auto transformation = NewAddQuantAdjustments(); + ModelTransformer transformer(&graph, nullptr); + transformer.Apply("add_quant_adjustments", transformation.get()); + + EXPECT_EQ(1, graph.nodes().size()); + EXPECT_EQ(2, graph.values().size()); +} + +// Scenario: +// -> Add -> QuantizeAndDequantize -> Add -> +// | ^ +// | | +// ------------------------------ +// +// A new QuantizeAndDequantize should only be added after the left/first 'Add' +// op, and it should connect to both its consumers. +TEST(AddQuantAdjustments, GeneralCase) { + GraphFloat32 graph; + auto input = graph.NewValue(); + input->tensor.shape = BHWC(1, 4, 4, 8); + AddQuantParams(&input->quant_params, /*min=*/0.0, /*max=*/1.0, + /*scale=*/0.004); + + // First Add. + Tensor add_tensor; + add_tensor.shape = Linear(8); + add_tensor.data.resize(8); + AddAttributes add_attr; + add_attr.param = add_tensor; + auto add1_node = graph.NewNode(); + add1_node->operation.type = ToString(OperationType::ADD); + add1_node->operation.attributes = add_attr; + // QuantizeAndDequantize. + QuantizeAndDequantizeAttributes quant_attr; + quant_attr.min = -1.0; + quant_attr.max = 1.0; + quant_attr.scale = 0.008; + auto quant_node = graph.NewNode(); + quant_node->operation.type = ToString(OperationType::QUANTIZE_AND_DEQUANTIZE); + quant_node->operation.attributes = quant_attr; + // Second Add. + auto add2_node = graph.NewNode(); + add2_node->operation.type = ToString(OperationType::ADD); + + // Connections. + ASSERT_TRUE(graph.AddConsumer(add1_node->id, input->id).ok()); + Value>* link1; + ASSERT_TRUE(ConnectTwoNodes(&graph, add1_node, quant_node, &link1).ok()); + AddQuantParams(&link1->quant_params, /*min=*/0.0, /*max=*/2.0, + /*scale=*/0.008); + link1->tensor.shape = BHWC(1, 4, 4, 8); + ASSERT_TRUE(graph.AddConsumer(add2_node->id, link1->id).ok()); + Value>* link2; + ASSERT_TRUE(ConnectTwoNodes(&graph, quant_node, add2_node, &link2).ok()); + AddQuantParams(&link2->quant_params, /*min=*/-1.0, /*max=*/1.0, + /*scale=*/0.008); + link2->tensor.shape = BHWC(1, 4, 4, 8); + Value>* output; + ASSERT_TRUE(AddOutput(&graph, add2_node, &output).ok()); + AddQuantParams(&output->quant_params, /*min=*/-1.0, /*max=*/1.0, + /*scale=*/0.008); + output->tensor.shape = BHWC(1, 4, 4, 8); + + ASSERT_EQ(3, graph.nodes().size()); + ASSERT_EQ(4, graph.values().size()); + + auto transformation = NewAddQuantAdjustments(); + ModelTransformer transformer(&graph, nullptr); + transformer.Apply("add_quant_adjustments", transformation.get()); + + EXPECT_EQ(4, graph.nodes().size()); + EXPECT_EQ(5, graph.values().size()); + EXPECT_EQ(ToString(OperationType::ADD), graph.nodes()[0]->operation.type); + EXPECT_EQ(ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), + graph.nodes()[1]->operation.type); + EXPECT_EQ(ToString(OperationType::ADD), graph.nodes()[2]->operation.type); + EXPECT_EQ(ToString(OperationType::QUANTIZE_AND_DEQUANTIZE), + graph.nodes()[3]->operation.type); + auto new_quant_attr = absl::any_cast( + graph.nodes()[3]->operation.attributes); + EXPECT_EQ(0.0, new_quant_attr.min); + EXPECT_EQ(2.0, new_quant_attr.max); + const auto& new_quant_consumers = graph.FindConsumers(graph.values()[4]->id); + EXPECT_EQ(2, new_quant_consumers.size()); + EXPECT_EQ(quant_node, new_quant_consumers[0]); + EXPECT_EQ(add2_node, new_quant_consumers[1]); + + // Transformation should be idempotent. + transformer.Apply("add_quant_adjustments", transformation.get()); + EXPECT_EQ(4, graph.nodes().size()); + EXPECT_EQ(5, graph.values().size()); +} + +} // namespace +} // namespace gpu +} // namespace tflite