Better quantized op tests.

PiperOrigin-RevId: 283404748
Change-Id: Ief89d65cf9632783b3e602c8550bf4381d3d4705
This commit is contained in:
Yunlu Li 2019-12-02 13:26:33 -08:00 committed by TensorFlower Gardener
parent c77c0f8176
commit feac6d2f33
5 changed files with 104 additions and 12 deletions

View File

@ -156,14 +156,22 @@ const std::map<string, string>& GetKnownBrokenNnapiTests() {
const std::map<string, string>& GetKnownQuantizeBrokenTests() {
static const std::map<string, string>* const kQuantizeBrokenTests =
new std::map<string, string>({
{R"(^\/conv.*fully_quantize=True)", "134594898"},
{R"(^\/depthwiseconv.*fully_quantize=True)", "134594898"},
{R"(^\/sum.*fully_quantize=True)", "134594898"},
{R"(^\/l2norm.*fully_quantize=True)", "134594898"},
});
return *kQuantizeBrokenTests;
}
const std::map<string, int>& GetQuantizeTestsError() {
static const std::map<string, int>* const kQuantizeBrokenTests =
new std::map<string, int>({
{R"(^\/conv_relu1.*fully_quantize=True)", 18},
{R"(^\/conv_relu6.*fully_quantize=True)", 8},
{R"(^\/maximum.*fully_quantize=True)", 8},
});
return *kQuantizeBrokenTests;
}
// Allows test data to be unarchived into a temporary directory and makes
// sure those temporary directories are removed later.
class ArchiveEnvironment : public ::testing::Environment {
@ -299,10 +307,16 @@ TEST_P(OpsTest, RunZipTests) {
tflite::testing::TfLiteDriver test_driver(
FLAGS_use_nnapi ? TfLiteDriver::DelegateType::kNnapi
: TfLiteDriver::DelegateType::kNone);
auto quantized_tests_error = GetQuantizeTestsError();
bool fully_quantize = false;
if (test_path.find("fully_quantize=True") != std::string::npos) {
// TODO(b/134594898): Tighten this constraint.
test_driver.SetThreshold(0.2, 0.1);
for (const auto& p : quantized_tests_error) {
if (RE2::PartialMatch(test_name, p.first)) {
test_driver.SetQuantizationErrorMultiplier(p.second);
break;
}
}
fully_quantize = true;
}
@ -313,7 +327,6 @@ TEST_P(OpsTest, RunZipTests) {
auto kBrokenNnapiTests = GetKnownBrokenNnapiTests();
broken_tests.insert(kBrokenNnapiTests.begin(), kBrokenNnapiTests.end());
}
auto quantize_broken_tests = GetKnownQuantizeBrokenTests();
bool result = tflite::testing::ParseAndRunTests(&tflite_stream, &test_driver);
string message = test_driver.GetErrorMessage();
@ -346,7 +359,7 @@ TEST_P(OpsTest, RunZipTests) {
if (!result) {
string bug_number;
// See if the tests are potential quantize failures.
for (const auto& p : quantize_broken_tests) {
for (const auto& p : GetKnownQuantizeBrokenTests()) {
if (RE2::PartialMatch(test_name, p.first)) {
bug_number = p.second;
break;

View File

@ -18,7 +18,6 @@ limitations under the License.
#include <complex>
#include <memory>
#include <vector>
#include "absl/strings/escaping.h"
#include "tensorflow/lite/builtin_op_data.h"
#include "tensorflow/lite/delegates/flex/delegate.h"
@ -37,6 +36,22 @@ namespace {
const double kRelativeThreshold = 1e-2f;
const double kAbsoluteThreshold = 1e-4f;
// For quantized tests, we use a different error measurement from float ones.
// Assumes the baseline is a always a float TF model.
// Error of a quantized model compared to the baseline comes from two sources:
// 1. the math done with quantized inputs, and
// 2. quantization of the output.
// Assumes there is no error introduced by source 1, the theoretical maximum
// error allowed for the output is 0.5 * scale, because scale is equal to the
// size of the quantization bucket.
//
// As a result, we use `scale` as a unit for measuring the quantization error.
// To add the error introduced by source 1 as well, we need to relax the
// multiplier from 0.5 to a larger number, which is model/op dependent.
// The number below is good enough to account for both the two sources of error
// for most quantized op tests to pass.
const int kQuantizationErrorMultiplier = 4;
// Returns the value in the given position in a tensor.
template <typename T>
T Value(void* data, int index) {
@ -58,15 +73,31 @@ unique_void_ptr make_type_erased_array(size_t size) {
[](void* data) { delete[] static_cast<T*>(data); });
}
bool IsQuantized(const TfLiteTensor& tensor) {
if (tensor.type != kTfLiteInt8) return false;
if (tensor.quantization.params != nullptr) {
auto* quantization =
reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
if (quantization->scale != nullptr && quantization->scale->size == 1 &&
quantization->zero_point != nullptr &&
quantization->zero_point->size == 1) {
return true;
}
}
return false;
}
} // namespace
class TfLiteDriver::DataExpectation {
public:
DataExpectation(double relative_threshold, double absolute_threshold)
DataExpectation(double relative_threshold, double absolute_threshold,
int quantization_error_multiplier)
: data_(nullptr, nullptr),
num_elements_(0),
relative_threshold_(relative_threshold),
absolute_threshold_(absolute_threshold) {}
absolute_threshold_(absolute_threshold),
quantization_error_multiplier_(quantization_error_multiplier) {}
template <typename T>
void SetData(const string& csv_values) {
@ -128,11 +159,13 @@ class TfLiteDriver::DataExpectation {
}
bool TypedCheckString(bool verbose, const TfLiteTensor& tensor);
bool QuantizedCheck(bool verbose, const TfLiteTensor& tensor);
unique_void_ptr data_;
size_t num_elements_;
double relative_threshold_;
double absolute_threshold_;
int quantization_error_multiplier_;
};
class TfLiteDriver::ShapeExpectation {
@ -218,8 +251,37 @@ bool TfLiteDriver::DataExpectation::TypedCheckString(
return true;
}
bool TfLiteDriver::DataExpectation::QuantizedCheck(bool verbose,
const TfLiteTensor& tensor) {
auto* quantization =
reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
const float scale = quantization->scale->data[0];
const int32 zero_point = quantization->zero_point->data[0];
bool good_result = true;
for (int i = 0; i < tensor.bytes; i++) {
const int32 computed = tensor.data.int8[i];
const float dequantized =
static_cast<float>(scale * (computed - zero_point));
const float reference = Value<float>(data_.get(), i);
if (std::abs(dequantized - reference) >
quantization_error_multiplier_ * scale) {
if (verbose) {
std::cerr << " index " << i << ": got " << dequantized
<< ", but expected " << reference << std::endl;
}
good_result = false;
}
}
return good_result;
}
bool TfLiteDriver::DataExpectation::Check(bool verbose,
const TfLiteTensor& tensor) {
if (IsQuantized(tensor)) {
return QuantizedCheck(verbose, tensor);
}
switch (tensor.type) {
case kTfLiteFloat32:
return TypedCheck<float, float>(verbose, tensor);
@ -247,7 +309,8 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose,
TfLiteDriver::TfLiteDriver(DelegateType delegate_type, bool reference_kernel)
: delegate_(nullptr, nullptr),
relative_threshold_(kRelativeThreshold),
absolute_threshold_(kAbsoluteThreshold) {
absolute_threshold_(kAbsoluteThreshold),
quantization_error_multiplier_(kQuantizationErrorMultiplier) {
if (reference_kernel) {
resolver_.reset(new ops::builtin::BuiltinRefOpResolver);
} else {
@ -395,6 +458,11 @@ void TfLiteDriver::SetThreshold(double relative_threshold,
absolute_threshold_ = absolute_threshold;
}
void TfLiteDriver::SetQuantizationErrorMultiplier(
int quantization_error_multiplier) {
quantization_error_multiplier_ = quantization_error_multiplier;
}
void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
if (!IsValid()) return;
auto* tensor = interpreter_->tensor(id);
@ -402,7 +470,14 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
Invalidate(absl::StrCat("Overridden expectation for tensor '", id, "'"));
}
expected_output_[id].reset(
new DataExpectation(relative_threshold_, absolute_threshold_));
new DataExpectation(relative_threshold_, absolute_threshold_,
quantization_error_multiplier_));
if (IsQuantized(*tensor)) {
expected_output_[id]->SetData<float>(csv_values);
return;
}
switch (tensor->type) {
case kTfLiteFloat32:
expected_output_[id]->SetData<float>(csv_values);

View File

@ -64,6 +64,7 @@ class TfLiteDriver : public TestRunner {
bool CheckResults() override;
string ReadOutput(int id) override;
void SetThreshold(double relative_threshold, double absolute_threshold);
void SetQuantizationErrorMultiplier(int quantization_error_multiplier);
protected:
Interpreter::TfLiteDelegatePtr delegate_;
@ -95,6 +96,7 @@ class TfLiteDriver : public TestRunner {
std::map<int, TfLiteTensor*> tensors_to_deallocate_;
double relative_threshold_;
double absolute_threshold_;
int quantization_error_multiplier_;
};
} // namespace testing

View File

@ -112,7 +112,7 @@ TEST(TfliteDriverTest, AddQuantizedInt8Test) {
runner->SetInput(1, "1,1,1,1");
runner->SetExpectation(2, "3,3,3,3");
runner->SetExpectation(2, "0.0117,0.0117,0.0117,0.0117");
runner->Invoke();
ASSERT_TRUE(runner->IsValid());

View File

@ -146,6 +146,8 @@ def toco_convert(options, graph_def, input_tensors, output_tensors, **kwargs):
if extra_toco_options.inference_output_type:
converter.inference_output_type = (
extra_toco_options.inference_output_type)
else:
converter.inference_output_type = tf.int8
try:
tflite_model = converter.convert()