Better quantized op tests.
PiperOrigin-RevId: 283404748 Change-Id: Ief89d65cf9632783b3e602c8550bf4381d3d4705
This commit is contained in:
parent
c77c0f8176
commit
feac6d2f33
@ -156,14 +156,22 @@ const std::map<string, string>& GetKnownBrokenNnapiTests() {
|
||||
const std::map<string, string>& GetKnownQuantizeBrokenTests() {
|
||||
static const std::map<string, string>* const kQuantizeBrokenTests =
|
||||
new std::map<string, string>({
|
||||
{R"(^\/conv.*fully_quantize=True)", "134594898"},
|
||||
{R"(^\/depthwiseconv.*fully_quantize=True)", "134594898"},
|
||||
{R"(^\/sum.*fully_quantize=True)", "134594898"},
|
||||
{R"(^\/l2norm.*fully_quantize=True)", "134594898"},
|
||||
});
|
||||
return *kQuantizeBrokenTests;
|
||||
}
|
||||
|
||||
const std::map<string, int>& GetQuantizeTestsError() {
|
||||
static const std::map<string, int>* const kQuantizeBrokenTests =
|
||||
new std::map<string, int>({
|
||||
{R"(^\/conv_relu1.*fully_quantize=True)", 18},
|
||||
{R"(^\/conv_relu6.*fully_quantize=True)", 8},
|
||||
{R"(^\/maximum.*fully_quantize=True)", 8},
|
||||
});
|
||||
return *kQuantizeBrokenTests;
|
||||
}
|
||||
|
||||
// Allows test data to be unarchived into a temporary directory and makes
|
||||
// sure those temporary directories are removed later.
|
||||
class ArchiveEnvironment : public ::testing::Environment {
|
||||
@ -299,10 +307,16 @@ TEST_P(OpsTest, RunZipTests) {
|
||||
tflite::testing::TfLiteDriver test_driver(
|
||||
FLAGS_use_nnapi ? TfLiteDriver::DelegateType::kNnapi
|
||||
: TfLiteDriver::DelegateType::kNone);
|
||||
|
||||
auto quantized_tests_error = GetQuantizeTestsError();
|
||||
bool fully_quantize = false;
|
||||
if (test_path.find("fully_quantize=True") != std::string::npos) {
|
||||
// TODO(b/134594898): Tighten this constraint.
|
||||
test_driver.SetThreshold(0.2, 0.1);
|
||||
for (const auto& p : quantized_tests_error) {
|
||||
if (RE2::PartialMatch(test_name, p.first)) {
|
||||
test_driver.SetQuantizationErrorMultiplier(p.second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
fully_quantize = true;
|
||||
}
|
||||
|
||||
@ -313,7 +327,6 @@ TEST_P(OpsTest, RunZipTests) {
|
||||
auto kBrokenNnapiTests = GetKnownBrokenNnapiTests();
|
||||
broken_tests.insert(kBrokenNnapiTests.begin(), kBrokenNnapiTests.end());
|
||||
}
|
||||
auto quantize_broken_tests = GetKnownQuantizeBrokenTests();
|
||||
|
||||
bool result = tflite::testing::ParseAndRunTests(&tflite_stream, &test_driver);
|
||||
string message = test_driver.GetErrorMessage();
|
||||
@ -346,7 +359,7 @@ TEST_P(OpsTest, RunZipTests) {
|
||||
if (!result) {
|
||||
string bug_number;
|
||||
// See if the tests are potential quantize failures.
|
||||
for (const auto& p : quantize_broken_tests) {
|
||||
for (const auto& p : GetKnownQuantizeBrokenTests()) {
|
||||
if (RE2::PartialMatch(test_name, p.first)) {
|
||||
bug_number = p.second;
|
||||
break;
|
||||
|
@ -18,7 +18,6 @@ limitations under the License.
|
||||
#include <complex>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/escaping.h"
|
||||
#include "tensorflow/lite/builtin_op_data.h"
|
||||
#include "tensorflow/lite/delegates/flex/delegate.h"
|
||||
@ -37,6 +36,22 @@ namespace {
|
||||
const double kRelativeThreshold = 1e-2f;
|
||||
const double kAbsoluteThreshold = 1e-4f;
|
||||
|
||||
// For quantized tests, we use a different error measurement from float ones.
|
||||
// Assumes the baseline is a always a float TF model.
|
||||
// Error of a quantized model compared to the baseline comes from two sources:
|
||||
// 1. the math done with quantized inputs, and
|
||||
// 2. quantization of the output.
|
||||
// Assumes there is no error introduced by source 1, the theoretical maximum
|
||||
// error allowed for the output is 0.5 * scale, because scale is equal to the
|
||||
// size of the quantization bucket.
|
||||
//
|
||||
// As a result, we use `scale` as a unit for measuring the quantization error.
|
||||
// To add the error introduced by source 1 as well, we need to relax the
|
||||
// multiplier from 0.5 to a larger number, which is model/op dependent.
|
||||
// The number below is good enough to account for both the two sources of error
|
||||
// for most quantized op tests to pass.
|
||||
const int kQuantizationErrorMultiplier = 4;
|
||||
|
||||
// Returns the value in the given position in a tensor.
|
||||
template <typename T>
|
||||
T Value(void* data, int index) {
|
||||
@ -58,15 +73,31 @@ unique_void_ptr make_type_erased_array(size_t size) {
|
||||
[](void* data) { delete[] static_cast<T*>(data); });
|
||||
}
|
||||
|
||||
bool IsQuantized(const TfLiteTensor& tensor) {
|
||||
if (tensor.type != kTfLiteInt8) return false;
|
||||
|
||||
if (tensor.quantization.params != nullptr) {
|
||||
auto* quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
|
||||
if (quantization->scale != nullptr && quantization->scale->size == 1 &&
|
||||
quantization->zero_point != nullptr &&
|
||||
quantization->zero_point->size == 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class TfLiteDriver::DataExpectation {
|
||||
public:
|
||||
DataExpectation(double relative_threshold, double absolute_threshold)
|
||||
DataExpectation(double relative_threshold, double absolute_threshold,
|
||||
int quantization_error_multiplier)
|
||||
: data_(nullptr, nullptr),
|
||||
num_elements_(0),
|
||||
relative_threshold_(relative_threshold),
|
||||
absolute_threshold_(absolute_threshold) {}
|
||||
absolute_threshold_(absolute_threshold),
|
||||
quantization_error_multiplier_(quantization_error_multiplier) {}
|
||||
|
||||
template <typename T>
|
||||
void SetData(const string& csv_values) {
|
||||
@ -128,11 +159,13 @@ class TfLiteDriver::DataExpectation {
|
||||
}
|
||||
|
||||
bool TypedCheckString(bool verbose, const TfLiteTensor& tensor);
|
||||
bool QuantizedCheck(bool verbose, const TfLiteTensor& tensor);
|
||||
|
||||
unique_void_ptr data_;
|
||||
size_t num_elements_;
|
||||
double relative_threshold_;
|
||||
double absolute_threshold_;
|
||||
int quantization_error_multiplier_;
|
||||
};
|
||||
|
||||
class TfLiteDriver::ShapeExpectation {
|
||||
@ -218,8 +251,37 @@ bool TfLiteDriver::DataExpectation::TypedCheckString(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TfLiteDriver::DataExpectation::QuantizedCheck(bool verbose,
|
||||
const TfLiteTensor& tensor) {
|
||||
auto* quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
|
||||
const float scale = quantization->scale->data[0];
|
||||
const int32 zero_point = quantization->zero_point->data[0];
|
||||
|
||||
bool good_result = true;
|
||||
for (int i = 0; i < tensor.bytes; i++) {
|
||||
const int32 computed = tensor.data.int8[i];
|
||||
const float dequantized =
|
||||
static_cast<float>(scale * (computed - zero_point));
|
||||
const float reference = Value<float>(data_.get(), i);
|
||||
if (std::abs(dequantized - reference) >
|
||||
quantization_error_multiplier_ * scale) {
|
||||
if (verbose) {
|
||||
std::cerr << " index " << i << ": got " << dequantized
|
||||
<< ", but expected " << reference << std::endl;
|
||||
}
|
||||
good_result = false;
|
||||
}
|
||||
}
|
||||
return good_result;
|
||||
}
|
||||
|
||||
bool TfLiteDriver::DataExpectation::Check(bool verbose,
|
||||
const TfLiteTensor& tensor) {
|
||||
if (IsQuantized(tensor)) {
|
||||
return QuantizedCheck(verbose, tensor);
|
||||
}
|
||||
|
||||
switch (tensor.type) {
|
||||
case kTfLiteFloat32:
|
||||
return TypedCheck<float, float>(verbose, tensor);
|
||||
@ -247,7 +309,8 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose,
|
||||
TfLiteDriver::TfLiteDriver(DelegateType delegate_type, bool reference_kernel)
|
||||
: delegate_(nullptr, nullptr),
|
||||
relative_threshold_(kRelativeThreshold),
|
||||
absolute_threshold_(kAbsoluteThreshold) {
|
||||
absolute_threshold_(kAbsoluteThreshold),
|
||||
quantization_error_multiplier_(kQuantizationErrorMultiplier) {
|
||||
if (reference_kernel) {
|
||||
resolver_.reset(new ops::builtin::BuiltinRefOpResolver);
|
||||
} else {
|
||||
@ -395,6 +458,11 @@ void TfLiteDriver::SetThreshold(double relative_threshold,
|
||||
absolute_threshold_ = absolute_threshold;
|
||||
}
|
||||
|
||||
void TfLiteDriver::SetQuantizationErrorMultiplier(
|
||||
int quantization_error_multiplier) {
|
||||
quantization_error_multiplier_ = quantization_error_multiplier;
|
||||
}
|
||||
|
||||
void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
|
||||
if (!IsValid()) return;
|
||||
auto* tensor = interpreter_->tensor(id);
|
||||
@ -402,7 +470,14 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
|
||||
Invalidate(absl::StrCat("Overridden expectation for tensor '", id, "'"));
|
||||
}
|
||||
expected_output_[id].reset(
|
||||
new DataExpectation(relative_threshold_, absolute_threshold_));
|
||||
new DataExpectation(relative_threshold_, absolute_threshold_,
|
||||
quantization_error_multiplier_));
|
||||
|
||||
if (IsQuantized(*tensor)) {
|
||||
expected_output_[id]->SetData<float>(csv_values);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (tensor->type) {
|
||||
case kTfLiteFloat32:
|
||||
expected_output_[id]->SetData<float>(csv_values);
|
||||
|
@ -64,6 +64,7 @@ class TfLiteDriver : public TestRunner {
|
||||
bool CheckResults() override;
|
||||
string ReadOutput(int id) override;
|
||||
void SetThreshold(double relative_threshold, double absolute_threshold);
|
||||
void SetQuantizationErrorMultiplier(int quantization_error_multiplier);
|
||||
|
||||
protected:
|
||||
Interpreter::TfLiteDelegatePtr delegate_;
|
||||
@ -95,6 +96,7 @@ class TfLiteDriver : public TestRunner {
|
||||
std::map<int, TfLiteTensor*> tensors_to_deallocate_;
|
||||
double relative_threshold_;
|
||||
double absolute_threshold_;
|
||||
int quantization_error_multiplier_;
|
||||
};
|
||||
|
||||
} // namespace testing
|
||||
|
@ -112,7 +112,7 @@ TEST(TfliteDriverTest, AddQuantizedInt8Test) {
|
||||
|
||||
runner->SetInput(1, "1,1,1,1");
|
||||
|
||||
runner->SetExpectation(2, "3,3,3,3");
|
||||
runner->SetExpectation(2, "0.0117,0.0117,0.0117,0.0117");
|
||||
|
||||
runner->Invoke();
|
||||
ASSERT_TRUE(runner->IsValid());
|
||||
|
@ -146,6 +146,8 @@ def toco_convert(options, graph_def, input_tensors, output_tensors, **kwargs):
|
||||
if extra_toco_options.inference_output_type:
|
||||
converter.inference_output_type = (
|
||||
extra_toco_options.inference_output_type)
|
||||
else:
|
||||
converter.inference_output_type = tf.int8
|
||||
|
||||
try:
|
||||
tflite_model = converter.convert()
|
||||
|
Loading…
Reference in New Issue
Block a user