From d46aa971be6e7d5a2ec2b9029f38c24bdfb8c277 Mon Sep 17 00:00:00 2001
From: Sachin Joglekar <srjoglekar@google.com>
Date: Fri, 20 Mar 2020 09:07:06 -0700
Subject: [PATCH] Adds GraphTransformation to add QuantizeAndDequantize nodes
 in GPU graph

PiperOrigin-RevId: 302038856
Change-Id: I009684ea5b611a3bfc05c88b4fd8a40c570cfd86
---
 tensorflow/lite/delegates/gpu/common/BUILD    |   1 +
 tensorflow/lite/delegates/gpu/common/model.h  |  10 ++
 .../gpu/common/transformations/BUILD          |  31 ++++
 .../transformations/add_quant_adjustments.cc  | 110 ++++++++++++
 .../transformations/add_quant_adjustments.h   |  45 +++++
 .../add_quant_adjustments_test.cc             | 166 ++++++++++++++++++
 6 files changed, 363 insertions(+)
 create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc
 create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h
 create mode 100644 tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc
diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD
index 08945c70d0b..08612e37b3e 100644
--- a/tensorflow/lite/delegates/gpu/common/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/BUILD
@@ -92,6 +92,7 @@ cc_library(
         ":tensor",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:any",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
diff --git a/tensorflow/lite/delegates/gpu/common/model.h b/tensorflow/lite/delegates/gpu/common/model.h
index f5aad207168..6989584a24c 100644
--- a/tensorflow/lite/delegates/gpu/common/model.h
+++ b/tensorflow/lite/delegates/gpu/common/model.h
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/memory/memory.h"
 #include "absl/types/any.h"
+#include "absl/types/optional.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@@ -39,6 +40,13 @@ using ValueId = uint32_t;
 
 using NodeId = uint32_t;
 
+// Used to emulate quantized behavior.
+struct QuantizationParams {
+  float min = 0;
+  float max = 0;
+  float scale = 0;
+};
+
 // Connects tensor's producer and operation that depends on this tensor.
 template <typename TensorT>
 struct Value {
@@ -47,6 +55,8 @@ struct Value {
   const ValueId id;
 
   TensorType tensor;
+
+  absl::optional<QuantizationParams> quant_params;
 };
 
 struct Operation {
diff --git a/tensorflow/lite/delegates/gpu/common/transformations/BUILD b/tensorflow/lite/delegates/gpu/common/transformations/BUILD
index d0411473fae..3fe22f540ad 100644
--- a/tensorflow/lite/delegates/gpu/common/transformations/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/transformations/BUILD
@@ -19,6 +19,37 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "add_quant_adjustments",
+    srcs = ["add_quant_adjustments.cc"],
+    hdrs = ["add_quant_adjustments.h"],
+    deps = [
+        "//tensorflow/lite/delegates/gpu/common:data_type",
+        "//tensorflow/lite/delegates/gpu/common:model",
+        "//tensorflow/lite/delegates/gpu/common:model_transformer",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:status",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:any",
+    ],
+)
+
+cc_test(
+    name = "add_quant_adjustments_test",
+    srcs = ["add_quant_adjustments_test.cc"],
+    deps = [
+        ":add_quant_adjustments",
+        "//tensorflow/lite/delegates/gpu/common:model",
+        "//tensorflow/lite/delegates/gpu/common:model_transformer",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:shape",
+        "@com_google_absl//absl/types:any",
+        "@com_google_absl//absl/types:optional",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "fuse_add_to_conv",
     srcs = ["fuse_add_to_conv.cc"],
diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc
new file mode 100644
index 00000000000..872c4bcd903
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.cc
@@ -0,0 +1,110 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h"
+
+#include <string>
+
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/types/any.h"
+#include "tensorflow/lite/delegates/gpu/common/data_type.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+
+namespace tflite {
+namespace gpu {
+
+class AddQuantAdjustments : public NodeTransformation {
+ public:
+  TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final {
+    if (node->operation.type ==
+        ToString(OperationType::QUANTIZE_AND_DEQUANTIZE)) {
+      return {TransformStatus::SKIPPED, ""};
+    }
+
+    bool transform_applied = false;
+    auto node_outputs = graph->FindOutputs(node->id);
+    for (auto output_value : node_outputs) {
+      // Skip if quantization doesn't apply.
+      if (!output_value->quant_params) continue;
+      auto consumers = graph->FindConsumers(output_value->id);
+      // No need to do anything if this isn't consumed by another node.
+      if (consumers.empty()) {
+        continue;
+      }
+
+      // Add a new QuantizeAndDequantize node.
+      auto* quant_and_dequant_node = graph->NewNode();
+      quant_and_dequant_node->operation.type =
+          ToString(OperationType::QUANTIZE_AND_DEQUANTIZE);
+      QuantizeAndDequantizeAttributes attr;
+      attr.min = output_value->quant_params.value().min;
+      attr.max = output_value->quant_params.value().max;
+      attr.scale = output_value->quant_params.value().scale;
+      quant_and_dequant_node->operation.attributes = attr;
+
+      // Add one output Value for the new node.
+      // The tensor information should rename the same.
+      Value<TensorRef<BHWC>>* adjusted_value = graph->NewValue();
+      adjusted_value->tensor = output_value->tensor;
+      Status status =
+          graph->SetProducer(quant_and_dequant_node->id, adjusted_value->id);
+      if (!status.ok()) {
+        return {TransformStatus::INVALID,
+                "Could not create QuantizeAndDequantize node."};
+      }
+
+      // Replace output_value with adjusted_value on all consumers.
+      for (auto& consumer : consumers) {
+        status = graph->ReplaceInput(consumer->id, output_value->id,
+                                     adjusted_value->id);
+        if (!status.ok()) {
+          return {TransformStatus::INVALID,
+                  absl::StrCat(
+                      "Failed to associate quant-adjusted value for consumer: ",
+                      status.message())};
+        }
+      }
+
+      // Add QuantizeAndDequantize node as a consumer of output_value.
+      status = graph->AddConsumer(quant_and_dequant_node->id, output_value->id);
+      if (!status.ok()) {
+        return {TransformStatus::INVALID,
+                absl::StrCat(
+                    "Could not associate output to QuantizeAndDequantize: ",
+                    status.message())};
+      }
+
+      // Remove quant params on output_value, to make the transformation
+      // idempotent.
+      output_value->quant_params.reset();
+      transform_applied = true;
+    }
+
+    if (transform_applied) {
+      return {TransformStatus::APPLIED, ""};
+    }
+    return {TransformStatus::SKIPPED, ""};
+  }
+};
+
+std::unique_ptr<NodeTransformation> NewAddQuantAdjustments() {
+  return absl::make_unique<AddQuantAdjustments>();
+}
+
+}  // namespace gpu
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h
new file mode 100644
index 00000000000..6eb4aaaf029
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h
@@ -0,0 +1,45 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_ADD_QUANT_ADJUSTMENTS_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_ADD_QUANT_ADJUSTMENTS_H_
+
+#include <memory>
+
+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
+
+namespace tflite {
+namespace gpu {
+
+// This pass is used to support inference on quantized models with the GPU
+// delegate.
+//
+// When delegating quantized models, we still run float-point inference on GPU
+// under-the-hood. This is done by dequantizing inputs (at runtime) & constants
+// (during delegation).
+// However, intermediate tensors can still deviate from the original quantized
+// inference, since activations may not follow the attributes set by the
+// original quantizion parameters.
+// To prevent this, we add "QuantizeAndDequantize" nodes for each node-output
+// that was originally fixed-point:
+// op1 -> op2
+// becomes
+// op1 -> QuantizeAndDequantize -> op2
+std::unique_ptr<NodeTransformation> NewAddQuantAdjustments();
+
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_ADD_QUANT_ADJUSTMENTS_H_
diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc
new file mode 100644
index 00000000000..fc0913d2494
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc
@@ -0,0 +1,166 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "absl/types/any.h"
+#include "absl/types/optional.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+
+namespace tflite {
+namespace gpu {
+namespace {
+
+void AddQuantParams(absl::optional<QuantizationParams>* params, float min,
+                    float max, float scale) {
+  params->emplace();
+  params->value().min = min;
+  params->value().max = max;
+  params->value().scale = scale;
+}
+
+// Scenario:
+// -> Add ->
+//
+// Since there is only one node output with no consumers, no new node should be
+// added.
+TEST(AddQuantAdjustments, OneNode) {
+  GraphFloat32 graph;
+  auto input = graph.NewValue();
+  input->tensor.shape = BHWC(1, 4, 4, 8);
+  AddQuantParams(&input->quant_params, /*min=*/0.0, /*max=*/1.0,
+                 /*scale=*/0.004);
+
+  Tensor<Linear, DataType::FLOAT32> add_tensor;
+  add_tensor.shape = Linear(8);
+  add_tensor.data.resize(8);
+  AddAttributes add_attr;
+  add_attr.param = add_tensor;
+  auto add_node = graph.NewNode();
+  add_node->operation.type = ToString(OperationType::ADD);
+  add_node->operation.attributes = add_attr;
+
+  ASSERT_TRUE(graph.AddConsumer(add_node->id, input->id).ok());
+
+  Value<TensorRef<BHWC>>* output;
+  AddQuantParams(&input->quant_params, /*min=*/0.0, /*max=*/2.0,
+                 /*scale=*/0.008);
+  ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok());
+  output->tensor.shape = BHWC(1, 4, 4, 8);
+
+  ASSERT_EQ(1, graph.nodes().size());
+  ASSERT_EQ(2, graph.values().size());
+
+  auto transformation = NewAddQuantAdjustments();
+  ModelTransformer transformer(&graph, nullptr);
+  transformer.Apply("add_quant_adjustments", transformation.get());
+
+  EXPECT_EQ(1, graph.nodes().size());
+  EXPECT_EQ(2, graph.values().size());
+}
+
+// Scenario:
+// -> Add -> QuantizeAndDequantize -> Add ->
+//        |                            ^
+//        |                            |
+//        ------------------------------
+//
+// A new QuantizeAndDequantize should only be added after the left/first 'Add'
+// op, and it should connect to both its consumers.
+TEST(AddQuantAdjustments, GeneralCase) {
+  GraphFloat32 graph;
+  auto input = graph.NewValue();
+  input->tensor.shape = BHWC(1, 4, 4, 8);
+  AddQuantParams(&input->quant_params, /*min=*/0.0, /*max=*/1.0,
+                 /*scale=*/0.004);
+
+  // First Add.
+  Tensor<Linear, DataType::FLOAT32> add_tensor;
+  add_tensor.shape = Linear(8);
+  add_tensor.data.resize(8);
+  AddAttributes add_attr;
+  add_attr.param = add_tensor;
+  auto add1_node = graph.NewNode();
+  add1_node->operation.type = ToString(OperationType::ADD);
+  add1_node->operation.attributes = add_attr;
+  // QuantizeAndDequantize.
+  QuantizeAndDequantizeAttributes quant_attr;
+  quant_attr.min = -1.0;
+  quant_attr.max = 1.0;
+  quant_attr.scale = 0.008;
+  auto quant_node = graph.NewNode();
+  quant_node->operation.type = ToString(OperationType::QUANTIZE_AND_DEQUANTIZE);
+  quant_node->operation.attributes = quant_attr;
+  // Second Add.
+  auto add2_node = graph.NewNode();
+  add2_node->operation.type = ToString(OperationType::ADD);
+
+  // Connections.
+  ASSERT_TRUE(graph.AddConsumer(add1_node->id, input->id).ok());
+  Value<TensorRef<BHWC>>* link1;
+  ASSERT_TRUE(ConnectTwoNodes(&graph, add1_node, quant_node, &link1).ok());
+  AddQuantParams(&link1->quant_params, /*min=*/0.0, /*max=*/2.0,
+                 /*scale=*/0.008);
+  link1->tensor.shape = BHWC(1, 4, 4, 8);
+  ASSERT_TRUE(graph.AddConsumer(add2_node->id, link1->id).ok());
+  Value<TensorRef<BHWC>>* link2;
+  ASSERT_TRUE(ConnectTwoNodes(&graph, quant_node, add2_node, &link2).ok());
+  AddQuantParams(&link2->quant_params, /*min=*/-1.0, /*max=*/1.0,
+                 /*scale=*/0.008);
+  link2->tensor.shape = BHWC(1, 4, 4, 8);
+  Value<TensorRef<BHWC>>* output;
+  ASSERT_TRUE(AddOutput(&graph, add2_node, &output).ok());
+  AddQuantParams(&output->quant_params, /*min=*/-1.0, /*max=*/1.0,
+                 /*scale=*/0.008);
+  output->tensor.shape = BHWC(1, 4, 4, 8);
+
+  ASSERT_EQ(3, graph.nodes().size());
+  ASSERT_EQ(4, graph.values().size());
+
+  auto transformation = NewAddQuantAdjustments();
+  ModelTransformer transformer(&graph, nullptr);
+  transformer.Apply("add_quant_adjustments", transformation.get());
+
+  EXPECT_EQ(4, graph.nodes().size());
+  EXPECT_EQ(5, graph.values().size());
+  EXPECT_EQ(ToString(OperationType::ADD), graph.nodes()[0]->operation.type);
+  EXPECT_EQ(ToString(OperationType::QUANTIZE_AND_DEQUANTIZE),
+            graph.nodes()[1]->operation.type);
+  EXPECT_EQ(ToString(OperationType::ADD), graph.nodes()[2]->operation.type);
+  EXPECT_EQ(ToString(OperationType::QUANTIZE_AND_DEQUANTIZE),
+            graph.nodes()[3]->operation.type);
+  auto new_quant_attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
+      graph.nodes()[3]->operation.attributes);
+  EXPECT_EQ(0.0, new_quant_attr.min);
+  EXPECT_EQ(2.0, new_quant_attr.max);
+  const auto& new_quant_consumers = graph.FindConsumers(graph.values()[4]->id);
+  EXPECT_EQ(2, new_quant_consumers.size());
+  EXPECT_EQ(quant_node, new_quant_consumers[0]);
+  EXPECT_EQ(add2_node, new_quant_consumers[1]);
+
+  // Transformation should be idempotent.
+  transformer.Apply("add_quant_adjustments", transformation.get());
+  EXPECT_EQ(4, graph.nodes().size());
+  EXPECT_EQ(5, graph.values().size());
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace tflite