Make quantization properties per-tensor.
PiperOrigin-RevId: 251876877
This commit is contained in:
parent
651201e51a
commit
a372bb0e9d
@ -21,18 +21,18 @@ OperatorProperty GetOperatorProperty(const BuiltinOperator& op) {
|
||||
OperatorProperty property;
|
||||
switch (op) {
|
||||
case BuiltinOperator_ADD:
|
||||
property.input_indexes = {0, 1};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}, {1, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_ARG_MAX:
|
||||
property.input_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
// ArgMax has no quantizable output.
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_AVERAGE_POOL_2D:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
@ -40,175 +40,196 @@ OperatorProperty GetOperatorProperty(const BuiltinOperator& op) {
|
||||
case BuiltinOperator_SPACE_TO_BATCH_ND:
|
||||
case BuiltinOperator_SPACE_TO_DEPTH:
|
||||
// We skip inputs 1 and 2 since they aren't real valued (they are shapes).
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_CONCATENATION:
|
||||
property.arbitrary_inputs = true;
|
||||
property.input_indexes = {};
|
||||
property.output_indexes = {0};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_CONV_2D:
|
||||
property.per_axis = true;
|
||||
property.per_axis_index = 0;
|
||||
property.input_indexes = {0, 1};
|
||||
property.output_indexes = {0};
|
||||
case BuiltinOperator_CONV_2D: {
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.per_axis = true;
|
||||
tensor_property.per_axis_index = 0;
|
||||
tensor_property.symmetric = true;
|
||||
property.inputs = {{0, {}}, {1, tensor_property}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.biases = {2};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_DEPTHWISE_CONV_2D:
|
||||
property.per_axis = true;
|
||||
property.per_axis_index = 3;
|
||||
property.input_indexes = {0, 1};
|
||||
property.output_indexes = {0};
|
||||
}
|
||||
case BuiltinOperator_DEPTHWISE_CONV_2D: {
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.per_axis = true;
|
||||
tensor_property.per_axis_index = 3;
|
||||
tensor_property.symmetric = true;
|
||||
property.inputs = {
|
||||
{0, {}},
|
||||
{1, tensor_property},
|
||||
};
|
||||
property.outputs = {{0, {}}};
|
||||
property.biases = {2};
|
||||
property.version = 3;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_EQUAL:
|
||||
case BuiltinOperator_NOT_EQUAL:
|
||||
case BuiltinOperator_GREATER:
|
||||
case BuiltinOperator_GREATER_EQUAL:
|
||||
case BuiltinOperator_LESS:
|
||||
case BuiltinOperator_LESS_EQUAL:
|
||||
property.input_indexes = {0, 1};
|
||||
property.inputs = {{0, {}}, {1, {}}};
|
||||
// Comparisons have no quantizable outputs.
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_FULLY_CONNECTED:
|
||||
property.input_indexes = {0, 1};
|
||||
property.output_indexes = {0};
|
||||
case BuiltinOperator_FULLY_CONNECTED: {
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.symmetric = true;
|
||||
property.inputs = {{0, {}}, {1, tensor_property}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.biases = {2};
|
||||
property.version = 4;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_GATHER:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_LOG_SOFTMAX:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
case BuiltinOperator_LOG_SOFTMAX: {
|
||||
property.inputs = {{0, {}}};
|
||||
// LogSoftmax requires output with 16/256 as scale and 127 as zero point.
|
||||
property.restriction_on_output = true;
|
||||
property.restricted_value_on_output = {16.0 / 256.0, 127};
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.restriction = true;
|
||||
tensor_property.restricted_value = {16.0 / 256.0, 127};
|
||||
property.outputs = {{0, tensor_property}};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_LOGISTIC:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
}
|
||||
case BuiltinOperator_LOGISTIC: {
|
||||
property.inputs = {{0, {}}};
|
||||
// Logistic requires output with 1/256 as scale and -128 as zero point.
|
||||
property.restriction_on_output = true;
|
||||
property.restricted_value_on_output = {1 / 256.0, -128};
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.restriction = true;
|
||||
tensor_property.restricted_value = {1 / 256.0, -128};
|
||||
property.outputs = {{0, tensor_property}};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_L2_NORMALIZATION:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
}
|
||||
case BuiltinOperator_L2_NORMALIZATION: {
|
||||
property.inputs = {{0, {}}};
|
||||
// L2 Norm requires output with 1/128 as scale and 0 as zero point.
|
||||
property.restriction_on_output = true;
|
||||
property.restricted_value_on_output = {1 / 128.0, 0};
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.restriction = true;
|
||||
tensor_property.restricted_value = {1 / 128.0, 0};
|
||||
property.outputs = {{0, tensor_property}};
|
||||
property.version = 2;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_MAX_POOL_2D:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_MAXIMUM:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_MEAN:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_MINIMUM:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_MUL:
|
||||
property.input_indexes = {0, 1};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_PAD:
|
||||
case BuiltinOperator_PADV2:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_QUANTIZE:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.version = 1;
|
||||
break;
|
||||
case BuiltinOperator_RESHAPE:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 1;
|
||||
break;
|
||||
case BuiltinOperator_RESIZE_BILINEAR:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_SHAPE:
|
||||
property.input_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
// Shape has no quantizable output.
|
||||
property.version = 1;
|
||||
break;
|
||||
case BuiltinOperator_SLICE:
|
||||
// We skip inputs 1 and 2 since they aren't real valued (they are the
|
||||
// index and size).
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_SQUEEZE:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 1;
|
||||
break;
|
||||
case BuiltinOperator_SOFTMAX:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
case BuiltinOperator_SOFTMAX: {
|
||||
property.inputs = {{0, {}}};
|
||||
// Softmax requires output with 1/256 as scale and -128 as zero point.
|
||||
property.restriction_on_output = true;
|
||||
property.restricted_value_on_output = {1 / 256.0, -128};
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.restriction = true;
|
||||
tensor_property.restricted_value = {1 / 256.0, -128};
|
||||
property.outputs = {{0, tensor_property}};
|
||||
property.version = 2;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_SUB:
|
||||
property.input_indexes = {0, 1};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}, {1, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.version = 2;
|
||||
break;
|
||||
case BuiltinOperator_TANH:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
case BuiltinOperator_TANH: {
|
||||
property.inputs = {{0, {}}};
|
||||
// Tanh requires output with 1/128 as scale and 0 as zero point.
|
||||
property.restriction_on_output = true;
|
||||
property.restricted_value_on_output = {1 / 128.0, 0};
|
||||
TensorProperty tensor_property;
|
||||
tensor_property.restriction = true;
|
||||
tensor_property.restricted_value = {1 / 128.0, 0};
|
||||
property.outputs = {{0, tensor_property}};
|
||||
property.version = 2;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_TRANSPOSE:
|
||||
property.input_indexes = {0};
|
||||
property.output_indexes = {0};
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.version = 2;
|
||||
break;
|
||||
|
@ -22,30 +22,34 @@ namespace tflite {
|
||||
namespace optimize {
|
||||
namespace operator_property {
|
||||
|
||||
struct OperatorProperty {
|
||||
// Is a quantized operations currently supported.
|
||||
bool quantizable = true;
|
||||
// Per axis.
|
||||
struct TensorProperty {
|
||||
// per_axis also implies symmetric currently.
|
||||
bool per_axis = false;
|
||||
// TODO(jianlijianli): remove dimension index and read it from tensor instead.
|
||||
int per_axis_index = 0;
|
||||
bool symmetric = false;
|
||||
|
||||
// Constraints.
|
||||
bool restriction = false;
|
||||
// scale/zero_point hardcoded.
|
||||
std::pair<float, int> restricted_value = {0.0, 0};
|
||||
};
|
||||
|
||||
struct OperatorProperty {
|
||||
// Is a quantized operations currently supported.
|
||||
bool quantizable = true;
|
||||
|
||||
// Op has arbitrary number of inputs, such as concat.
|
||||
bool arbitrary_inputs = false;
|
||||
// Input and weight indexes. Unable to separate the two because of ops such as
|
||||
// ADD.
|
||||
std::vector<int> input_indexes = {};
|
||||
|
||||
// Output indexes
|
||||
std::vector<int> output_indexes = {};
|
||||
|
||||
// Input indexes -> input tensor property.
|
||||
std::vector<std::pair<int, TensorProperty>> inputs = {};
|
||||
// Output indexes -> output tensor property.
|
||||
std::vector<std::pair<int, TensorProperty>> outputs = {};
|
||||
// Bias indexes.
|
||||
std::vector<int> biases = {};
|
||||
|
||||
// Constraints.
|
||||
bool restrict_same_input_output_scale = false;
|
||||
bool restriction_on_output = false;
|
||||
std::pair<float, float> restricted_value_on_output = {0.0, 0.0};
|
||||
|
||||
// Op version.
|
||||
int version = 1;
|
||||
|
@ -357,6 +357,9 @@ TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
|
||||
|
||||
TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
|
||||
int per_axis_index) {
|
||||
// TODO(suharshs): Currently we conflate quantizing weights and constants. Its
|
||||
// possible that the right thing to do is asymmetric quantize the weight. Add
|
||||
// support for this.
|
||||
if (per_channel) {
|
||||
return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index);
|
||||
} else {
|
||||
|
@ -276,12 +276,12 @@ TfLiteStatus ApplyConstraints(ModelT* model, ErrorReporter* error_reporter) {
|
||||
}
|
||||
// Basically only Concat passes this check.
|
||||
if (!property.restrict_same_input_output_scale ||
|
||||
(property.input_indexes.size() == 1 &&
|
||||
property.output_indexes.size() == 1 && property.biases.empty())) {
|
||||
(property.inputs.size() == 1 && property.outputs.size() == 1 &&
|
||||
property.biases.empty())) {
|
||||
continue;
|
||||
}
|
||||
// If ApplyConstraintsnd requant is needed, use the min of min and max of
|
||||
// max, which means using the scale and zero point of output.
|
||||
// If ApplyConstraints and requant is needed, use the min of min and max
|
||||
// of max, which means using the scale and zero point of output.
|
||||
TensorT* output_tensor = subgraph->tensors[op->outputs[0]].get();
|
||||
if (!utils::QuantizationParametersExist(output_tensor)) {
|
||||
error_reporter->Report(
|
||||
@ -332,24 +332,23 @@ TfLiteStatus ApplyConstraints(ModelT* model, ErrorReporter* error_reporter) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
std::vector<int> GetInputIndexes(const OperatorT* op,
|
||||
operator_property::OperatorProperty property) {
|
||||
std::vector<int> input_indexes;
|
||||
std::vector<std::pair<int, operator_property::TensorProperty>> GetInputs(
|
||||
const OperatorT* op, operator_property::OperatorProperty property) {
|
||||
std::vector<std::pair<int, operator_property::TensorProperty>> inputs;
|
||||
if (property.arbitrary_inputs || !property.quantizable) {
|
||||
for (int i = 0; i < op->inputs.size(); ++i) {
|
||||
input_indexes.push_back(i);
|
||||
inputs.push_back({i, {}});
|
||||
}
|
||||
} else {
|
||||
input_indexes = property.input_indexes;
|
||||
inputs = property.inputs;
|
||||
}
|
||||
return input_indexes;
|
||||
return inputs;
|
||||
}
|
||||
|
||||
bool ShouldRestrictSameInputOutputScale(
|
||||
operator_property::OperatorProperty property) {
|
||||
return (property.input_indexes.size() == 1 &&
|
||||
property.output_indexes.size() == 1 && property.biases.empty() &&
|
||||
property.restrict_same_input_output_scale);
|
||||
return (property.inputs.size() == 1 && property.outputs.size() == 1 &&
|
||||
property.biases.empty() && property.restrict_same_input_output_scale);
|
||||
}
|
||||
|
||||
bool IsSubgraphInput(SubGraphT* subgraph, int32_t index) {
|
||||
@ -362,10 +361,13 @@ bool IsSubgraphInput(SubGraphT* subgraph, int32_t index) {
|
||||
}
|
||||
|
||||
// Quantize the op input. Will increment op_idx if ops are added.
|
||||
TfLiteStatus QuantizeOpInput(ModelT* model, int32_t subgraph_idx,
|
||||
size_t* op_idx,
|
||||
TfLiteStatus QuantizeOpInput(
|
||||
ModelT* model, int32_t subgraph_idx, size_t* op_idx,
|
||||
operator_property::OperatorProperty property,
|
||||
int32_t input_idx, ErrorReporter* error_reporter) {
|
||||
const std::pair<int32_t, operator_property::TensorProperty>& input,
|
||||
ErrorReporter* error_reporter) {
|
||||
int32_t input_idx = input.first;
|
||||
operator_property::TensorProperty tensor_property = input.second;
|
||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
|
||||
OperatorT* op = subgraph->operators[*op_idx].get();
|
||||
const BuiltinOperator op_code =
|
||||
@ -384,8 +386,11 @@ TfLiteStatus QuantizeOpInput(ModelT* model, int32_t subgraph_idx,
|
||||
if (property.quantizable && !is_input_quantized) {
|
||||
// The operation is quantizable, but the input isn't yet quantized.
|
||||
if (utils::HasBuffer(model, subgraph, tensor_idx)) {
|
||||
if (utils::QuantizeWeight(model, tensor, property.per_axis,
|
||||
property.per_axis_index) == kTfLiteError) {
|
||||
// TODO(suharshs): Look at consumers, throw error if one consumer is
|
||||
// per-channel and one per-layer.
|
||||
if (utils::QuantizeWeight(model, tensor, tensor_property.per_axis,
|
||||
tensor_property.per_axis_index) ==
|
||||
kTfLiteError) {
|
||||
error_reporter->Report(
|
||||
"Unable to quantize buffer or min/max value for input %d "
|
||||
"in op %s in subgraph %d, node: %d",
|
||||
@ -393,6 +398,7 @@ TfLiteStatus QuantizeOpInput(ModelT* model, int32_t subgraph_idx,
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (utils::HasMinMax(tensor)) {
|
||||
// TODO(suharshs): Handle per-channel dynamic tensor.
|
||||
if (IsSubgraphInput(subgraph, tensor_idx)) {
|
||||
utils::QuantizeActivation(tensor);
|
||||
} else {
|
||||
@ -442,11 +448,13 @@ TfLiteStatus QuantizeOpInput(ModelT* model, int32_t subgraph_idx,
|
||||
}
|
||||
|
||||
// Quantize the op output.
|
||||
TfLiteStatus QuantizeOpOutput(ModelT* model, int32_t subgraph_idx,
|
||||
int32_t op_idx,
|
||||
TfLiteStatus QuantizeOpOutput(
|
||||
ModelT* model, int32_t subgraph_idx, int32_t op_idx,
|
||||
operator_property::OperatorProperty property,
|
||||
int32_t output_idx,
|
||||
const std::pair<int32_t, operator_property::TensorProperty>& output,
|
||||
ErrorReporter* error_reporter) {
|
||||
int32_t output_idx = output.first;
|
||||
operator_property::TensorProperty tensor_property = output.second;
|
||||
// If the operator is not quantizable, we don't need to do anything for the
|
||||
// output.
|
||||
if (!property.quantizable) {
|
||||
@ -470,16 +478,16 @@ TfLiteStatus QuantizeOpOutput(ModelT* model, int32_t subgraph_idx,
|
||||
// Copy quantization parameter. For average pool, max pool, etc
|
||||
// min/max can be different but we want them to be the same.
|
||||
// Get scale and zero point of input.
|
||||
if (property.input_indexes[0] >= op->inputs.size()) {
|
||||
if (property.inputs[0].first >= op->inputs.size()) {
|
||||
error_reporter->Report(
|
||||
"Required input index %d is larger than the input length of "
|
||||
"op %s at index %d in subgraph %d",
|
||||
property.input_indexes[0], op->inputs.size(),
|
||||
property.inputs[0].first, op->inputs.size(),
|
||||
EnumNameBuiltinOperator(op_code), op_idx, subgraph_idx);
|
||||
return kTfLiteError;
|
||||
}
|
||||
const int input_index = op->inputs[property.input_indexes[0]];
|
||||
TensorT* input_tensor = subgraph->tensors[input_index].get();
|
||||
const int input_tensor_idx = op->inputs[property.inputs[0].first];
|
||||
TensorT* input_tensor = subgraph->tensors[input_tensor_idx].get();
|
||||
if (input_tensor->quantization->scale.size() != 1 ||
|
||||
input_tensor->quantization->zero_point.size() != 1 ||
|
||||
input_tensor->quantization->min.size() != 1 ||
|
||||
@ -514,8 +522,8 @@ TfLiteStatus QuantizeOpOutput(ModelT* model, int32_t subgraph_idx,
|
||||
output_tensor->quantization->min.push_back(min);
|
||||
output_tensor->quantization->max.push_back(max);
|
||||
output_tensor->type = TensorType_INT8;
|
||||
} else if (property.restriction_on_output) {
|
||||
const auto scale_and_zp = property.restricted_value_on_output;
|
||||
} else if (tensor_property.restriction) {
|
||||
const auto scale_and_zp = tensor_property.restricted_value;
|
||||
// Apply to output.
|
||||
output_tensor->quantization = absl::make_unique<QuantizationParametersT>();
|
||||
output_tensor->quantization->scale.push_back(scale_and_zp.first);
|
||||
@ -557,15 +565,17 @@ TfLiteStatus QuantizeWeightsInputOutput(ModelT* model, bool allow_float,
|
||||
}
|
||||
|
||||
// Quantize operator inputs/weights.
|
||||
for (const int input_idx : GetInputIndexes(op, property)) {
|
||||
TF_LITE_ENSURE_STATUS(QuantizeOpInput(
|
||||
model, subgraph_idx, &op_idx, property, input_idx, error_reporter));
|
||||
for (const std::pair<int, operator_property::TensorProperty>& input :
|
||||
GetInputs(op, property)) {
|
||||
TF_LITE_ENSURE_STATUS(QuantizeOpInput(model, subgraph_idx, &op_idx,
|
||||
property, input, error_reporter));
|
||||
}
|
||||
|
||||
// Quantize operator outputs.
|
||||
for (const int output_idx : property.output_indexes) {
|
||||
for (const std::pair<int, operator_property::TensorProperty>& output :
|
||||
property.outputs) {
|
||||
TF_LITE_ENSURE_STATUS(QuantizeOpOutput(
|
||||
model, subgraph_idx, op_idx, property, output_idx, error_reporter));
|
||||
model, subgraph_idx, op_idx, property, output, error_reporter));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -601,7 +611,7 @@ TfLiteStatus QuantizeBiases(ModelT* model, ErrorReporter* error_reporter) {
|
||||
if (utils::HasBuffer(model, subgraph, op->inputs[bias_idx])) {
|
||||
TensorT* bias_tensor =
|
||||
subgraph->tensors[op->inputs[bias_idx]].get();
|
||||
if (property.input_indexes.size() != 2) {
|
||||
if (property.inputs.size() != 2) {
|
||||
error_reporter->Report(
|
||||
"Expect the input length of "
|
||||
"op %s at index %d in subgraph %d to be 2",
|
||||
@ -610,12 +620,15 @@ TfLiteStatus QuantizeBiases(ModelT* model, ErrorReporter* error_reporter) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
TensorT* input_tensor =
|
||||
subgraph->tensors[op->inputs[property.input_indexes[0]]].get();
|
||||
subgraph->tensors[op->inputs[property.inputs[0].first]].get();
|
||||
TensorT* weight_tensor =
|
||||
subgraph->tensors[op->inputs[property.input_indexes[1]]].get();
|
||||
TF_LITE_ENSURE_STATUS(QuantizeBias(
|
||||
model, input_tensor, weight_tensor, bias_tensor,
|
||||
property.per_axis, property.per_axis_index, error_reporter));
|
||||
subgraph->tensors[op->inputs[property.inputs[1].first]].get();
|
||||
operator_property::TensorProperty weight_property =
|
||||
property.inputs[1].second;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
QuantizeBias(model, input_tensor, weight_tensor, bias_tensor,
|
||||
weight_property.per_axis,
|
||||
weight_property.per_axis_index, error_reporter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user