From feac6d2f33d26878eaf346b920a0cd680e9d69ed Mon Sep 17 00:00:00 2001
From: Yunlu Li <yunluli@google.com>
Date: Mon, 2 Dec 2019 13:26:33 -0800
Subject: [PATCH] Better quantized op tests.

PiperOrigin-RevId: 283404748
Change-Id: Ief89d65cf9632783b3e602c8550bf4381d3d4705
---
 .../testing/generated_examples_zip_test.cc    | 25 ++++--
 tensorflow/lite/testing/tflite_driver.cc      | 85 +++++++++++++++++--
 tensorflow/lite/testing/tflite_driver.h       |  2 +
 tensorflow/lite/testing/tflite_driver_test.cc |  2 +-
 tensorflow/lite/testing/toco_convert.py       |  2 +
 5 files changed, 104 insertions(+), 12 deletions(-)
diff --git a/tensorflow/lite/testing/generated_examples_zip_test.cc b/tensorflow/lite/testing/generated_examples_zip_test.cc
index 16b4675bb0d..d1b3d267eba 100644
--- a/tensorflow/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/lite/testing/generated_examples_zip_test.cc
@@ -156,14 +156,22 @@ const std::map<string, string>& GetKnownBrokenNnapiTests() {
 const std::map<string, string>& GetKnownQuantizeBrokenTests() {
   static const std::map<string, string>* const kQuantizeBrokenTests =
       new std::map<string, string>({
-          {R"(^\/conv.*fully_quantize=True)", "134594898"},
-          {R"(^\/depthwiseconv.*fully_quantize=True)", "134594898"},
           {R"(^\/sum.*fully_quantize=True)", "134594898"},
           {R"(^\/l2norm.*fully_quantize=True)", "134594898"},
       });
   return *kQuantizeBrokenTests;
 }
 
+const std::map<string, int>& GetQuantizeTestsError() {
+  static const std::map<string, int>* const kQuantizeBrokenTests =
+      new std::map<string, int>({
+          {R"(^\/conv_relu1.*fully_quantize=True)", 18},
+          {R"(^\/conv_relu6.*fully_quantize=True)", 8},
+          {R"(^\/maximum.*fully_quantize=True)", 8},
+      });
+  return *kQuantizeBrokenTests;
+}
+
 // Allows test data to be unarchived into a temporary directory and makes
 // sure those temporary directories are removed later.
 class ArchiveEnvironment : public ::testing::Environment {
@@ -299,10 +307,16 @@ TEST_P(OpsTest, RunZipTests) {
   tflite::testing::TfLiteDriver test_driver(
       FLAGS_use_nnapi ? TfLiteDriver::DelegateType::kNnapi
                       : TfLiteDriver::DelegateType::kNone);
+
+  auto quantized_tests_error = GetQuantizeTestsError();
   bool fully_quantize = false;
   if (test_path.find("fully_quantize=True") != std::string::npos) {
-    // TODO(b/134594898): Tighten this constraint.
-    test_driver.SetThreshold(0.2, 0.1);
+    for (const auto& p : quantized_tests_error) {
+      if (RE2::PartialMatch(test_name, p.first)) {
+        test_driver.SetQuantizationErrorMultiplier(p.second);
+        break;
+      }
+    }
     fully_quantize = true;
   }
 
@@ -313,7 +327,6 @@ TEST_P(OpsTest, RunZipTests) {
     auto kBrokenNnapiTests = GetKnownBrokenNnapiTests();
     broken_tests.insert(kBrokenNnapiTests.begin(), kBrokenNnapiTests.end());
   }
-  auto quantize_broken_tests = GetKnownQuantizeBrokenTests();
 
   bool result = tflite::testing::ParseAndRunTests(&tflite_stream, &test_driver);
   string message = test_driver.GetErrorMessage();
@@ -346,7 +359,7 @@ TEST_P(OpsTest, RunZipTests) {
     if (!result) {
       string bug_number;
       // See if the tests are potential quantize failures.
-      for (const auto& p : quantize_broken_tests) {
+      for (const auto& p : GetKnownQuantizeBrokenTests()) {
         if (RE2::PartialMatch(test_name, p.first)) {
           bug_number = p.second;
           break;
diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc
index 795fb1fee99..47293016ab6 100644
--- a/tensorflow/lite/testing/tflite_driver.cc
+++ b/tensorflow/lite/testing/tflite_driver.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <complex>
 #include <memory>
 #include <vector>
-
 #include "absl/strings/escaping.h"
 #include "tensorflow/lite/builtin_op_data.h"
 #include "tensorflow/lite/delegates/flex/delegate.h"
@@ -37,6 +36,22 @@ namespace {
 const double kRelativeThreshold = 1e-2f;
 const double kAbsoluteThreshold = 1e-4f;
 
+// For quantized tests, we use a different error measurement from float ones.
+// Assumes the baseline is a always a float TF model.
+// Error of a quantized model compared to the baseline comes from two sources:
+//   1. the math done with quantized inputs, and
+//   2. quantization of the output.
+// Assumes there is no error introduced by source 1, the theoretical maximum
+// error allowed for the output is 0.5 * scale, because scale is equal to the
+// size of the quantization bucket.
+//
+// As a result, we use `scale` as a unit for measuring the quantization error.
+// To add the error introduced by source 1 as well, we need to relax the
+// multiplier from 0.5 to a larger number, which is model/op dependent.
+// The number below is good enough to account for both the two sources of error
+// for most quantized op tests to pass.
+const int kQuantizationErrorMultiplier = 4;
+
 // Returns the value in the given position in a tensor.
 template <typename T>
 T Value(void* data, int index) {
@@ -58,15 +73,31 @@ unique_void_ptr make_type_erased_array(size_t size) {
                          [](void* data) { delete[] static_cast<T*>(data); });
 }
 
+bool IsQuantized(const TfLiteTensor& tensor) {
+  if (tensor.type != kTfLiteInt8) return false;
+
+  if (tensor.quantization.params != nullptr) {
+    auto* quantization =
+        reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
+    if (quantization->scale != nullptr && quantization->scale->size == 1 &&
+        quantization->zero_point != nullptr &&
+        quantization->zero_point->size == 1) {
+      return true;
+    }
+  }
+  return false;
+}
 }  // namespace
 
 class TfLiteDriver::DataExpectation {
  public:
-  DataExpectation(double relative_threshold, double absolute_threshold)
+  DataExpectation(double relative_threshold, double absolute_threshold,
+                  int quantization_error_multiplier)
       : data_(nullptr, nullptr),
         num_elements_(0),
         relative_threshold_(relative_threshold),
-        absolute_threshold_(absolute_threshold) {}
+        absolute_threshold_(absolute_threshold),
+        quantization_error_multiplier_(quantization_error_multiplier) {}
 
   template <typename T>
   void SetData(const string& csv_values) {
@@ -128,11 +159,13 @@ class TfLiteDriver::DataExpectation {
   }
 
   bool TypedCheckString(bool verbose, const TfLiteTensor& tensor);
+  bool QuantizedCheck(bool verbose, const TfLiteTensor& tensor);
 
   unique_void_ptr data_;
   size_t num_elements_;
   double relative_threshold_;
   double absolute_threshold_;
+  int quantization_error_multiplier_;
 };
 
 class TfLiteDriver::ShapeExpectation {
@@ -218,8 +251,37 @@ bool TfLiteDriver::DataExpectation::TypedCheckString(
   return true;
 }
 
+bool TfLiteDriver::DataExpectation::QuantizedCheck(bool verbose,
+                                                   const TfLiteTensor& tensor) {
+  auto* quantization =
+      reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
+  const float scale = quantization->scale->data[0];
+  const int32 zero_point = quantization->zero_point->data[0];
+
+  bool good_result = true;
+  for (int i = 0; i < tensor.bytes; i++) {
+    const int32 computed = tensor.data.int8[i];
+    const float dequantized =
+        static_cast<float>(scale * (computed - zero_point));
+    const float reference = Value<float>(data_.get(), i);
+    if (std::abs(dequantized - reference) >
+        quantization_error_multiplier_ * scale) {
+      if (verbose) {
+        std::cerr << "  index " << i << ": got " << dequantized
+                  << ", but expected " << reference << std::endl;
+      }
+      good_result = false;
+    }
+  }
+  return good_result;
+}
+
 bool TfLiteDriver::DataExpectation::Check(bool verbose,
                                           const TfLiteTensor& tensor) {
+  if (IsQuantized(tensor)) {
+    return QuantizedCheck(verbose, tensor);
+  }
+
   switch (tensor.type) {
     case kTfLiteFloat32:
       return TypedCheck<float, float>(verbose, tensor);
@@ -247,7 +309,8 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose,
 TfLiteDriver::TfLiteDriver(DelegateType delegate_type, bool reference_kernel)
     : delegate_(nullptr, nullptr),
       relative_threshold_(kRelativeThreshold),
-      absolute_threshold_(kAbsoluteThreshold) {
+      absolute_threshold_(kAbsoluteThreshold),
+      quantization_error_multiplier_(kQuantizationErrorMultiplier) {
   if (reference_kernel) {
     resolver_.reset(new ops::builtin::BuiltinRefOpResolver);
   } else {
@@ -395,6 +458,11 @@ void TfLiteDriver::SetThreshold(double relative_threshold,
   absolute_threshold_ = absolute_threshold;
 }
 
+void TfLiteDriver::SetQuantizationErrorMultiplier(
+    int quantization_error_multiplier) {
+  quantization_error_multiplier_ = quantization_error_multiplier;
+}
+
 void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
   if (!IsValid()) return;
   auto* tensor = interpreter_->tensor(id);
@@ -402,7 +470,14 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
     Invalidate(absl::StrCat("Overridden expectation for tensor '", id, "'"));
   }
   expected_output_[id].reset(
-      new DataExpectation(relative_threshold_, absolute_threshold_));
+      new DataExpectation(relative_threshold_, absolute_threshold_,
+                          quantization_error_multiplier_));
+
+  if (IsQuantized(*tensor)) {
+    expected_output_[id]->SetData<float>(csv_values);
+    return;
+  }
+
   switch (tensor->type) {
     case kTfLiteFloat32:
       expected_output_[id]->SetData<float>(csv_values);
diff --git a/tensorflow/lite/testing/tflite_driver.h b/tensorflow/lite/testing/tflite_driver.h
index ae843d1cba7..258902606a5 100644
--- a/tensorflow/lite/testing/tflite_driver.h
+++ b/tensorflow/lite/testing/tflite_driver.h
@@ -64,6 +64,7 @@ class TfLiteDriver : public TestRunner {
   bool CheckResults() override;
   string ReadOutput(int id) override;
   void SetThreshold(double relative_threshold, double absolute_threshold);
+  void SetQuantizationErrorMultiplier(int quantization_error_multiplier);
 
  protected:
   Interpreter::TfLiteDelegatePtr delegate_;
@@ -95,6 +96,7 @@ class TfLiteDriver : public TestRunner {
   std::map<int, TfLiteTensor*> tensors_to_deallocate_;
   double relative_threshold_;
   double absolute_threshold_;
+  int quantization_error_multiplier_;
 };
 
 }  // namespace testing
diff --git a/tensorflow/lite/testing/tflite_driver_test.cc b/tensorflow/lite/testing/tflite_driver_test.cc
index 99efd2d66d1..6dac9565dde 100644
--- a/tensorflow/lite/testing/tflite_driver_test.cc
+++ b/tensorflow/lite/testing/tflite_driver_test.cc
@@ -112,7 +112,7 @@ TEST(TfliteDriverTest, AddQuantizedInt8Test) {
 
   runner->SetInput(1, "1,1,1,1");
 
-  runner->SetExpectation(2, "3,3,3,3");
+  runner->SetExpectation(2, "0.0117,0.0117,0.0117,0.0117");
 
   runner->Invoke();
   ASSERT_TRUE(runner->IsValid());
diff --git a/tensorflow/lite/testing/toco_convert.py b/tensorflow/lite/testing/toco_convert.py
index 3e8a489c5f8..e8d1e8eec12 100644
--- a/tensorflow/lite/testing/toco_convert.py
+++ b/tensorflow/lite/testing/toco_convert.py
@@ -146,6 +146,8 @@ def toco_convert(options, graph_def, input_tensors, output_tensors, **kwargs):
       if extra_toco_options.inference_output_type:
         converter.inference_output_type = (
             extra_toco_options.inference_output_type)
+      else:
+        converter.inference_output_type = tf.int8
 
       try:
         tflite_model = converter.convert()