Add debug_mode in NumericVerify custom op.
This CL adds debug_mode in NumericVerify op for the purpose of debugging. In details, the current behavior of NumericVerify op is to throw errors if discrepancy between the values of float and quantized layers. However, for debugging, we need another behavior - collecting all debugging information about the discrepancies without any errors. This debug_mode outputs the differences between float & quantized values. Later, python debugger API can deal with this information to calculate statistics such as MSE, Min/Max, Mean/Stddev, etc. PiperOrigin-RevId: 346217688 Change-Id: I26f8e1f4adfce2d26cd534f4b706bb24a020f1e2
This commit is contained in:
parent
d36db2955c
commit
c28356d6e0
@ -1141,14 +1141,17 @@ cc_test(
|
||||
srcs = ["numeric_verify_test.cc"],
|
||||
tags = ["tflite_nnapi"],
|
||||
deps = [
|
||||
":kernel_util",
|
||||
":test_main",
|
||||
":test_util",
|
||||
"//tensorflow/lite:framework",
|
||||
"//tensorflow/lite/kernels/internal:reference",
|
||||
"//tensorflow/lite/kernels/internal:types",
|
||||
"//tensorflow/lite/schema:schema_fbs",
|
||||
"//third_party/eigen3",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_googletest//:gtest",
|
||||
"@flatbuffers",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "flatbuffers/flexbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/dequantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
|
||||
@ -36,13 +37,19 @@ namespace ops {
|
||||
namespace custom {
|
||||
namespace numeric_verify {
|
||||
|
||||
static constexpr const char kToleranceStr[] = "tolerance";
|
||||
static constexpr const char kDebugModeStr[] = "debug_mode";
|
||||
static constexpr const int kTemporaryDequantizedTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input = GetInput(context, node, 0);
|
||||
ref = GetInput(context, node, 1);
|
||||
output = GetOutput(context, node, 0);
|
||||
}
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* ref;
|
||||
TfLiteTensor* output;
|
||||
};
|
||||
|
||||
const int kTensorNotAllocated = -1;
|
||||
@ -50,21 +57,23 @@ const int kTensorNotAllocated = -1;
|
||||
struct OpData {
|
||||
// The percentage of the tensor value range. Must be a number less than 1.0.
|
||||
float tolerance;
|
||||
// The abstract value allowed for the floating-point value difference.
|
||||
float max_diff;
|
||||
// This boolean value is only used when the input tensor is constant.
|
||||
bool float_input_initialized;
|
||||
int cache_tensor_id = kTensorNotAllocated;
|
||||
// This boolean value is for controlling the behavior of numeric verify op.
|
||||
bool debug_mode;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
auto* op_data = new OpData();
|
||||
op_data->float_input_initialized = false;
|
||||
|
||||
// Get the tolerance parameter from the buffer. Use flexbuffers asMap if there
|
||||
// multiple custom options.
|
||||
const float* buffer_t = reinterpret_cast<const float*>(buffer);
|
||||
op_data->tolerance = *buffer_t;
|
||||
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
|
||||
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
|
||||
const float tolerance = m[kToleranceStr].AsFloat();
|
||||
const bool debug_mode = m[kDebugModeStr].AsBool();
|
||||
op_data->tolerance = tolerance;
|
||||
op_data->debug_mode = debug_mode;
|
||||
|
||||
return op_data;
|
||||
}
|
||||
@ -75,30 +84,19 @@ void Free(TfLiteContext* context, void* buffer) {
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
OpContext op_context(context, node);
|
||||
|
||||
const int num_output = (op_data->debug_mode) ? 1 : 0;
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), num_output);
|
||||
|
||||
TF_LITE_ENSURE(context, op_context.input->type == kTfLiteUInt8 ||
|
||||
op_context.input->type == kTfLiteInt8 ||
|
||||
op_context.input->type == kTfLiteInt16 ||
|
||||
op_context.input->type == kTfLiteFloat16);
|
||||
TF_LITE_ENSURE(context, op_context.ref->type == kTfLiteFloat32);
|
||||
|
||||
op_data->max_diff = op_data->tolerance * op_context.input->params.scale;
|
||||
switch (op_context.input->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
op_data->max_diff *= (1 << 8);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
op_data->max_diff *= (1 << 16);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Allocate tensor to store the dequantized inputs.
|
||||
if (op_data->cache_tensor_id == kTensorNotAllocated) {
|
||||
TF_LITE_ENSURE_OK(
|
||||
@ -111,7 +109,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TfLiteTensor* dequantized;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetTemporarySafe(context, node, /*index=*/0, &dequantized));
|
||||
GetTemporarySafe(context, node, kTemporaryDequantizedTensor,
|
||||
&dequantized));
|
||||
dequantized->type = op_context.ref->type;
|
||||
dequantized->allocation_type = kTfLiteDynamic;
|
||||
|
||||
@ -119,6 +118,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
context, dequantized,
|
||||
TfLiteIntArrayCopy(op_context.input->dims)));
|
||||
|
||||
if (op_data->debug_mode) {
|
||||
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, num_output - 1,
|
||||
&op_context.output));
|
||||
op_context.output->type = kTfLiteFloat32;
|
||||
op_context.output->allocation_type = kTfLiteArenaRwPersistent;
|
||||
return context->ResizeTensor(context, op_context.output,
|
||||
TfLiteIntArrayCopy(op_context.input->dims));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -146,7 +153,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Dequantize the input
|
||||
TfLiteTensor* dequantized;
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
GetTemporarySafe(context, node, /*index=*/0, &dequantized));
|
||||
GetTemporarySafe(context, node, kTemporaryDequantizedTensor,
|
||||
&dequantized));
|
||||
auto status = builtin::dequantize::DequantizeImpl<kernel_type>(
|
||||
context, node, op_context.input, dequantized);
|
||||
if (status != kTfLiteOk) {
|
||||
@ -157,15 +165,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
op_data->float_input_initialized = true;
|
||||
}
|
||||
|
||||
// If the tolerance is very small, we only display the stats of the diff.
|
||||
if (op_data->tolerance < 0.1) {
|
||||
// If the debug_mode is on, we don't throw any errors.
|
||||
// We just calculate difference between float and quantized values, letting
|
||||
// python debugger deal with the information.
|
||||
if (op_data->debug_mode || op_data->tolerance < 0.1) {
|
||||
const int num_output = (op_data->debug_mode) ? 1 : 0;
|
||||
const int n = NumElements(dequantized);
|
||||
if (op_data->debug_mode) {
|
||||
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, num_output - 1,
|
||||
&op_context.output));
|
||||
auto output_data = GetTensorData<float>(op_context.output);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
float dequant = GetTensorData<float>(dequantized)[i];
|
||||
float reference = GetTensorData<float>(op_context.ref)[i];
|
||||
output_data[i] = dequant - reference;
|
||||
}
|
||||
}
|
||||
// These statistics logging was added to identify some errors in practice.
|
||||
std::vector<double> diffs, temp;
|
||||
diffs.reserve(NumElements(dequantized));
|
||||
temp.reserve(NumElements(dequantized));
|
||||
for (int i = 0; i < NumElements(op_context.ref); ++i) {
|
||||
diffs.reserve(n);
|
||||
temp.reserve(n);
|
||||
diffs.resize(n);
|
||||
temp.resize(n);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
float dequant = GetTensorData<float>(dequantized)[i];
|
||||
float reference = GetTensorData<float>(op_context.ref)[i];
|
||||
diffs.push_back(dequant - reference);
|
||||
diffs[i] = static_cast<double>(dequant - reference);
|
||||
}
|
||||
double mean =
|
||||
std::accumulate(diffs.begin(), diffs.end(), 0.0) / diffs.size();
|
||||
@ -184,24 +209,24 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
mean, max_diff, op_context.input->params.scale,
|
||||
op_context.input->params.zero_point);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Verify the dequantized output.
|
||||
auto max_diff = op_data->tolerance * op_context.input->params.scale;
|
||||
for (int i = 0; i < NumElements(op_context.ref); ++i) {
|
||||
int32_t value = GetQuantizedValue(op_context, i);
|
||||
float dequant = GetTensorData<float>(dequantized)[i];
|
||||
float reference = GetTensorData<float>(op_context.ref)[i];
|
||||
float diff = std::abs(reference - dequant);
|
||||
if (diff > max_diff) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Mismatch: %f is quantized to %d with (%f, %d). "
|
||||
"abs(%f - %f) = %f > %f (tolerance) range percentage %f.\n",
|
||||
reference, value, op_context.input->params.scale,
|
||||
op_context.input->params.zero_point, reference, dequant, diff,
|
||||
max_diff, op_data->tolerance);
|
||||
return kTfLiteError;
|
||||
} else {
|
||||
// Verify the dequantized output.
|
||||
auto max_diff = op_data->tolerance * op_context.input->params.scale;
|
||||
for (int i = 0; i < NumElements(op_context.ref); ++i) {
|
||||
int32_t value = GetQuantizedValue(op_context, i);
|
||||
float dequant = GetTensorData<float>(dequantized)[i];
|
||||
float reference = GetTensorData<float>(op_context.ref)[i];
|
||||
float diff = std::abs(reference - dequant);
|
||||
if (diff > max_diff) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Mismatch: %f is quantized to %d with (%f, %d). "
|
||||
"abs(%f - %f) = %f > %f (tolerance) range percentage %f.\n",
|
||||
reference, value, op_context.input->params.scale,
|
||||
op_context.input->params.zero_point, reference, dequant, diff,
|
||||
max_diff, op_data->tolerance);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
|
@ -21,8 +21,11 @@ limitations under the License.
|
||||
#include <gtest/gtest.h>
|
||||
#include "absl/memory/memory.h"
|
||||
#include "third_party/eigen3/Eigen/Core"
|
||||
#include "flatbuffers/flexbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/interpreter.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/test_util.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
@ -42,15 +45,25 @@ class NumericVerifyOpModel : public SingleOpModel {
|
||||
public:
|
||||
NumericVerifyOpModel(TensorType type, std::initializer_list<int> shape,
|
||||
float scale, int32_t zero_point, int version,
|
||||
float tolerance = 5.0) {
|
||||
float tolerance = 5.0, bool debug_mode = false) {
|
||||
const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
|
||||
input_ = AddInput(input_tensor_data);
|
||||
ref_ = AddInput({TensorType_FLOAT32, shape});
|
||||
if (debug_mode) {
|
||||
// The output tensor has the same shape with that of the input tensor.
|
||||
output_ = AddOutput({TensorType_FLOAT32, shape});
|
||||
}
|
||||
|
||||
std::vector<uint8_t> custom_options(sizeof(float));
|
||||
memcpy(custom_options.data(), &tolerance, sizeof(float));
|
||||
|
||||
SetCustomOp("NUMERIC_VERIFY", custom_options,
|
||||
flexbuffers::Builder fbb;
|
||||
fbb.Map([&]() {
|
||||
fbb.Float("tolerance", tolerance);
|
||||
fbb.Bool("debug_mode", debug_mode);
|
||||
});
|
||||
fbb.Finish();
|
||||
|
||||
SetCustomOp("NUMERIC_VERIFY", fbb.GetBuffer(),
|
||||
ops::custom::Register_NUMERIC_VERIFY);
|
||||
|
||||
BuildInterpreter({GetShape(input_), GetShape(ref_)});
|
||||
@ -63,9 +76,12 @@ class NumericVerifyOpModel : public SingleOpModel {
|
||||
PopulateTensor(ref_, ref_data);
|
||||
}
|
||||
|
||||
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||
|
||||
private:
|
||||
int input_;
|
||||
int ref_;
|
||||
int output_;
|
||||
};
|
||||
|
||||
TEST(NumericVerifyOpTest, Uint8) {
|
||||
@ -117,5 +133,18 @@ TEST(NumericVerifyOpFailedTest, Int8) {
|
||||
EXPECT_EQ(m.InvokeUnchecked(), kTfLiteError);
|
||||
}
|
||||
|
||||
TEST(NumericVerifyOpDebugModeTest, Int8) {
|
||||
// [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
|
||||
NumericVerifyOpModel m(TensorType_INT8, {2, 5}, 0.5, -1, 2, 5.0, true);
|
||||
|
||||
// The 5th element is set to 0.
|
||||
m.SetInputs<int8_t>({-128, -127, -126, -125, -124, 0, 124, 125, 126, 127},
|
||||
{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||
EXPECT_EQ(m.InvokeUnchecked(), kTfLiteOk);
|
||||
// The 5th element has discrepancy -61.5 (=dequantized - reference=0-(61.5)).
|
||||
EXPECT_THAT(
|
||||
m.GetOutput(),
|
||||
ElementsAreArray(ArrayFloatNear({0, 0, 0, 0, 0, -61.5, 0, 0, 0, 0})));
|
||||
}
|
||||
} // namespace
|
||||
} // namespace tflite
|
||||
|
Loading…
Reference in New Issue
Block a user