Added versioning to ADD/SUB + some rework of the existing code.
This commit is contained in:
parent
4180f945a7
commit
dd0d9e8f03
tensorflow/lite
kernels
toco/tflite
tools/versioning
@ -100,19 +100,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
|
|
||||||
// 8bit -> 8bit general quantized path, with general rescalings
|
// 8bit -> 8bit general quantized path, with general rescalings
|
||||||
// as well as, 16bit -> 16bit with general rescalings
|
// as well as, 16bit -> 16bit with general rescalings
|
||||||
bool pot_scale_16bit = false;
|
bool pot_scale_16bit = true;
|
||||||
|
|
||||||
bool input1_scale_is_pot = false;
|
bool input1_scale_is_pot = false;
|
||||||
bool input2_scale_is_pot = false;
|
bool input2_scale_is_pot = false;
|
||||||
bool output_scale_is_pot = false;
|
bool output_scale_is_pot = false;
|
||||||
|
|
||||||
int input1_scale_log2_rounded;
|
int input1_scale_log2_rounded{0};
|
||||||
int input2_scale_log2_rounded;
|
int input2_scale_log2_rounded{0};
|
||||||
int output_scale_log2_rounded;
|
int output_scale_log2_rounded{0};
|
||||||
|
|
||||||
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
||||||
output->type == kTfLiteInt16) {
|
output->type == kTfLiteInt16) {
|
||||||
// Check that param scale is POT
|
// In case of 16-bit, there are two implementation:
|
||||||
|
// the scale parameter is a general number
|
||||||
|
// the scale parameter is POT and
|
||||||
|
// zero_point is zero for inputs/output.
|
||||||
|
pot_scale_16bit = (input1->params.zero_point == 0) &&
|
||||||
|
(input2->params.zero_point == 0) &&
|
||||||
|
(output->params.zero_point == 0);
|
||||||
|
|
||||||
input1_scale_is_pot =
|
input1_scale_is_pot =
|
||||||
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
||||||
|
|
||||||
@ -122,14 +129,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
output_scale_is_pot =
|
output_scale_is_pot =
|
||||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||||
|
|
||||||
pot_scale_16bit = input1_scale_log2_rounded && input2_scale_log2_rounded &&
|
pot_scale_16bit &=
|
||||||
output_scale_log2_rounded;
|
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot;
|
||||||
}
|
}
|
||||||
|
|
||||||
data->pot_scale_16bit = pot_scale_16bit;
|
data->pot_scale_16bit = pot_scale_16bit;
|
||||||
|
|
||||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||||
pot_scale_16bit) {
|
!pot_scale_16bit) {
|
||||||
// 8bit -> 8bit general quantized path, with general rescalings
|
// 8bit -> 8bit general quantized path, with general rescalings
|
||||||
// as well as, 16bit -> 16bit with general rescalings
|
// as well as, 16bit -> 16bit with general rescalings
|
||||||
data->input1_offset = -input1->params.zero_point;
|
data->input1_offset = -input1->params.zero_point;
|
||||||
@ -139,7 +146,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
// The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly.
|
// The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly.
|
||||||
// In case of 16-bit we have 65535 << 15 which is less than 1 << 31,
|
// In case of 16-bit we have 65535 << 15 which is less than 1 << 31,
|
||||||
// therefore the addition will still fit in a 32 bit accumulator.
|
// therefore the addition will still fit in a 32 bit accumulator.
|
||||||
data->left_shift = pot_scale_16bit ? 15 : 20;
|
data->left_shift = !pot_scale_16bit ? 15 : 20;
|
||||||
const double twice_max_input_scale =
|
const double twice_max_input_scale =
|
||||||
2 * std::max(input1->params.scale, input2->params.scale);
|
2 * std::max(input1->params.scale, input2->params.scale);
|
||||||
const double real_input1_multiplier =
|
const double real_input1_multiplier =
|
||||||
@ -252,7 +259,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
const TfLiteTensor* input2,
|
const TfLiteTensor* input2,
|
||||||
TfLiteTensor* output) {
|
TfLiteTensor* output) {
|
||||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||||
data->pot_scale_16bit) {
|
!data->pot_scale_16bit) {
|
||||||
tflite::ArithmeticParams op_params;
|
tflite::ArithmeticParams op_params;
|
||||||
op_params.left_shift = data->left_shift;
|
op_params.left_shift = data->left_shift;
|
||||||
op_params.input1_offset = data->input1_offset;
|
op_params.input1_offset = data->input1_offset;
|
||||||
|
@ -88,7 +88,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
|
|||||||
/* max_version */ 2);
|
/* max_version */ 2);
|
||||||
AddBuiltin(BuiltinOperator_ADD, Register_ADD(),
|
AddBuiltin(BuiltinOperator_ADD, Register_ADD(),
|
||||||
/* min_version */ 1,
|
/* min_version */ 1,
|
||||||
/* max_version */ 2);
|
/* max_version */ 4);
|
||||||
AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(),
|
AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(),
|
||||||
/* min_version */ 1,
|
/* min_version */ 1,
|
||||||
/* max_version */ 3);
|
/* max_version */ 3);
|
||||||
@ -139,7 +139,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
|
|||||||
AddBuiltin(BuiltinOperator_DIV, Register_DIV());
|
AddBuiltin(BuiltinOperator_DIV, Register_DIV());
|
||||||
AddBuiltin(BuiltinOperator_SUB, Register_SUB(),
|
AddBuiltin(BuiltinOperator_SUB, Register_SUB(),
|
||||||
/* min_version */ 1,
|
/* min_version */ 1,
|
||||||
/* max_version */ 3);
|
/* max_version */ 5);
|
||||||
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version */ 1,
|
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version */ 1,
|
||||||
/* max_version */ 3);
|
/* max_version */ 3);
|
||||||
AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(),
|
AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(),
|
||||||
|
@ -225,19 +225,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
|
|
||||||
// 8bit -> 8bit general quantized path, with general rescalings
|
// 8bit -> 8bit general quantized path, with general rescalings
|
||||||
// as well as, 16bit -> 16bit with general rescalings
|
// as well as, 16bit -> 16bit with general rescalings
|
||||||
bool pot_scale_16bit = false;
|
bool pot_scale_16bit = true;
|
||||||
|
|
||||||
bool input1_scale_is_pot = false;
|
bool input1_scale_is_pot = false;
|
||||||
bool input2_scale_is_pot = false;
|
bool input2_scale_is_pot = false;
|
||||||
bool output_scale_is_pot = false;
|
bool output_scale_is_pot = false;
|
||||||
|
|
||||||
int input1_scale_log2_rounded;
|
int input1_scale_log2_rounded{0};
|
||||||
int input2_scale_log2_rounded;
|
int input2_scale_log2_rounded{0};
|
||||||
int output_scale_log2_rounded;
|
int output_scale_log2_rounded{0};
|
||||||
|
|
||||||
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
||||||
output->type == kTfLiteInt16) {
|
output->type == kTfLiteInt16) {
|
||||||
// Check that param scale is POT
|
// In case of 16-bit, there are two implementation:
|
||||||
|
// the scale parameter is a general number
|
||||||
|
// the scale parameter is POT and
|
||||||
|
// zero_point is zero for inputs/output.
|
||||||
|
pot_scale_16bit = (input1->params.zero_point == 0) &&
|
||||||
|
(input2->params.zero_point == 0) &&
|
||||||
|
(output->params.zero_point == 0);
|
||||||
|
|
||||||
input1_scale_is_pot =
|
input1_scale_is_pot =
|
||||||
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
||||||
|
|
||||||
@ -247,14 +254,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
output_scale_is_pot =
|
output_scale_is_pot =
|
||||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||||
|
|
||||||
pot_scale_16bit = input1_scale_log2_rounded && input2_scale_log2_rounded &&
|
pot_scale_16bit &=
|
||||||
output_scale_log2_rounded;
|
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot;
|
||||||
}
|
}
|
||||||
|
|
||||||
data->pot_scale_16bit = pot_scale_16bit;
|
data->pot_scale_16bit = pot_scale_16bit;
|
||||||
|
|
||||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||||
pot_scale_16bit) {
|
!pot_scale_16bit) {
|
||||||
TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2,
|
TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2,
|
||||||
output, params, data, -1));
|
output, params, data, -1));
|
||||||
} else if (output->type == kTfLiteInt16) {
|
} else if (output->type == kTfLiteInt16) {
|
||||||
@ -348,7 +355,7 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
} else {
|
} else {
|
||||||
TF_LITE_SUB(reference_integer_ops, Add, int8_t);
|
TF_LITE_SUB(reference_integer_ops, Add, int8_t);
|
||||||
}
|
}
|
||||||
} else if (data->pot_scale_16bit) {
|
} else if (!data->pot_scale_16bit) {
|
||||||
if (need_broadcast) {
|
if (need_broadcast) {
|
||||||
TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t);
|
TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t);
|
||||||
} else {
|
} else {
|
||||||
|
@ -49,11 +49,16 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
|
|||||||
{{OperatorType::kDepthwiseConv, 3}, "1.14.0"},
|
{{OperatorType::kDepthwiseConv, 3}, "1.14.0"},
|
||||||
{{OperatorType::kAdd, 1}, "1.5.0"},
|
{{OperatorType::kAdd, 1}, "1.5.0"},
|
||||||
{{OperatorType::kAdd, 2}, "1.14.0"},
|
{{OperatorType::kAdd, 2}, "1.14.0"},
|
||||||
|
{{OperatorType::kAdd, 3}, "1.15.0"},
|
||||||
|
{{OperatorType::kAdd, 4}, kPendingReleaseOpVersion},
|
||||||
{{OperatorType::kAddN, 1}, "1.14.0"},
|
{{OperatorType::kAddN, 1}, "1.14.0"},
|
||||||
{{OperatorType::kSpaceToBatchND, 1}, "1.6.0"},
|
{{OperatorType::kSpaceToBatchND, 1}, "1.6.0"},
|
||||||
{{OperatorType::kSpaceToBatchND, 2}, "1.14.0"},
|
{{OperatorType::kSpaceToBatchND, 2}, "1.14.0"},
|
||||||
{{OperatorType::kSub, 1}, "1.6.0"},
|
{{OperatorType::kSub, 1}, "1.6.0"},
|
||||||
{{OperatorType::kSub, 2}, "1.14.0"},
|
{{OperatorType::kSub, 2}, "1.14.0"},
|
||||||
|
{{OperatorType::kSub, 3}, "1.15.0"},
|
||||||
|
{{OperatorType::kSub, 4}, "1.15.0"},
|
||||||
|
{{OperatorType::kSub, 5}, kPendingReleaseOpVersion},
|
||||||
{{OperatorType::kDiv, 1}, "1.6.0"},
|
{{OperatorType::kDiv, 1}, "1.6.0"},
|
||||||
{{OperatorType::kBatchToSpaceND, 1}, "1.6.0"},
|
{{OperatorType::kBatchToSpaceND, 1}, "1.6.0"},
|
||||||
{{OperatorType::kBatchToSpaceND, 2}, "1.14.0"},
|
{{OperatorType::kBatchToSpaceND, 2}, "1.14.0"},
|
||||||
|
@ -22,6 +22,7 @@ cc_library(
|
|||||||
"//tensorflow/core:tflite_portable_logging",
|
"//tensorflow/core:tflite_portable_logging",
|
||||||
"//tensorflow/lite:minimal_logging",
|
"//tensorflow/lite:minimal_logging",
|
||||||
"//tensorflow/lite/kernels/internal:compatibility",
|
"//tensorflow/lite/kernels/internal:compatibility",
|
||||||
|
"//tensorflow/lite/kernels/internal:quantization_util",
|
||||||
"//tensorflow/lite/schema:schema_fbs",
|
"//tensorflow/lite/schema:schema_fbs",
|
||||||
"//tensorflow/lite/schema:schema_fbs_with_mutable",
|
"//tensorflow/lite/schema:schema_fbs_with_mutable",
|
||||||
"@com_google_absl//absl/memory",
|
"@com_google_absl//absl/memory",
|
||||||
|
@ -24,6 +24,7 @@ limitations under the License.
|
|||||||
#include "absl/strings/str_split.h"
|
#include "absl/strings/str_split.h"
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace {
|
namespace {
|
||||||
@ -359,7 +360,29 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
|
|||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
case BuiltinOperator_ADD:
|
||||||
|
if (op_sig.input_types.at(0) == TensorType_INT16 &&
|
||||||
|
op_sig.output_types.at(0) == TensorType_INT16) {
|
||||||
|
if (op_sig.options.addsub.pot_scale_int16) {
|
||||||
|
return 4;
|
||||||
|
} else {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (op_sig.input_types.at(0) == TensorType_INT8) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
|
||||||
case BuiltinOperator_SUB:
|
case BuiltinOperator_SUB:
|
||||||
|
if (op_sig.input_types.at(0) == TensorType_INT16 &&
|
||||||
|
op_sig.output_types.at(0) == TensorType_INT16) {
|
||||||
|
if (op_sig.options.addsub.pot_scale_int16) {
|
||||||
|
return 5;
|
||||||
|
} else {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (op_sig.options.broadcast.need_broadcast &&
|
if (op_sig.options.broadcast.need_broadcast &&
|
||||||
op_sig.options.broadcast.num_dims > 4) {
|
op_sig.options.broadcast.num_dims > 4) {
|
||||||
return 3;
|
return 3;
|
||||||
@ -370,7 +393,6 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
|
|||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case BuiltinOperator_AVERAGE_POOL_2D:
|
case BuiltinOperator_AVERAGE_POOL_2D:
|
||||||
case BuiltinOperator_ADD:
|
|
||||||
case BuiltinOperator_CONCATENATION:
|
case BuiltinOperator_CONCATENATION:
|
||||||
case BuiltinOperator_MAX_POOL_2D:
|
case BuiltinOperator_MAX_POOL_2D:
|
||||||
case BuiltinOperator_PAD:
|
case BuiltinOperator_PAD:
|
||||||
@ -487,6 +509,53 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op,
|
|||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
case BuiltinOperator_ADD:
|
||||||
|
case BuiltinOperator_SUB: {
|
||||||
|
op_sig.options.addsub.pot_scale_int16 = false;
|
||||||
|
const Tensor* input1_tensor =
|
||||||
|
subgraph->tensors()->Get(op->inputs()->Get(0));
|
||||||
|
const Tensor* input2_tensor =
|
||||||
|
subgraph->tensors()->Get(op->inputs()->Get(1));
|
||||||
|
const Tensor* output_tensor =
|
||||||
|
subgraph->tensors()->Get(op->outputs()->Get(0));
|
||||||
|
const QuantizationParameters* input1_quant =
|
||||||
|
input1_tensor->quantization();
|
||||||
|
const QuantizationParameters* input2_quant =
|
||||||
|
input2_tensor->quantization();
|
||||||
|
const QuantizationParameters* output_quant =
|
||||||
|
output_tensor->quantization();
|
||||||
|
if (input1_quant && input1_quant->scale() &&
|
||||||
|
input1_quant->scale()->Length() && input2_quant &&
|
||||||
|
input2_quant->scale() && input2_quant->scale()->Length() &&
|
||||||
|
output_quant && output_quant->scale() &&
|
||||||
|
output_quant->scale()->Length()) {
|
||||||
|
float input1_scale = input1_quant->scale()->Get(0);
|
||||||
|
float input2_scale = input2_quant->scale()->Get(0);
|
||||||
|
float output_scale = output_quant->scale()->Get(0);
|
||||||
|
|
||||||
|
int scale_log2_rounded = 0;
|
||||||
|
bool input1_scale_is_pot =
|
||||||
|
CheckedLog2(input1_scale, &scale_log2_rounded);
|
||||||
|
|
||||||
|
bool input2_scale_is_pot =
|
||||||
|
CheckedLog2(input2_scale, &scale_log2_rounded);
|
||||||
|
|
||||||
|
bool output_scale_is_pot =
|
||||||
|
CheckedLog2(output_scale, &scale_log2_rounded);
|
||||||
|
|
||||||
|
op_sig.options.addsub.pot_scale_int16 =
|
||||||
|
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op_code->builtin_code() == BuiltinOperator_SUB) {
|
||||||
|
op_sig.options.broadcast.need_broadcast =
|
||||||
|
!HaveSameShapes(subgraph, op, 0, 1);
|
||||||
|
op_sig.options.broadcast.num_dims =
|
||||||
|
std::max(GetNumDims(subgraph, op, 0), GetNumDims(subgraph, op, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
} break;
|
||||||
|
|
||||||
case BuiltinOperator_LSTM: {
|
case BuiltinOperator_LSTM: {
|
||||||
auto lstm_option = op->builtin_options_as_LSTMOptions();
|
auto lstm_option = op->builtin_options_as_LSTMOptions();
|
||||||
if (lstm_option) {
|
if (lstm_option) {
|
||||||
@ -512,7 +581,6 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op,
|
|||||||
op_sig.options.space_batch.num_dims = GetNumDims(subgraph, op, 0);
|
op_sig.options.space_batch.num_dims = GetNumDims(subgraph, op, 0);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case BuiltinOperator_SUB:
|
|
||||||
case BuiltinOperator_MAXIMUM:
|
case BuiltinOperator_MAXIMUM:
|
||||||
case BuiltinOperator_MINIMUM: {
|
case BuiltinOperator_MINIMUM: {
|
||||||
op_sig.options.broadcast.need_broadcast =
|
op_sig.options.broadcast.need_broadcast =
|
||||||
|
@ -59,6 +59,9 @@ typedef struct {
|
|||||||
int32_t num_dims;
|
int32_t num_dims;
|
||||||
bool need_broadcast;
|
bool need_broadcast;
|
||||||
} broadcast;
|
} broadcast;
|
||||||
|
struct {
|
||||||
|
bool pot_scale_int16;
|
||||||
|
} addsub;
|
||||||
} options;
|
} options;
|
||||||
} OpSignature;
|
} OpSignature;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user