230 lines
10 KiB
Protocol Buffer
230 lines
10 KiB
Protocol Buffer
// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
syntax = "proto2";
|
|
|
|
package toco;
|
|
|
|
import "tensorflow/lite/toco/types.proto";
|
|
|
|
// Supported I/O file formats. Some formats may be input-only or output-only.
|
|
enum FileFormat {
|
|
FILE_FORMAT_UNKNOWN = 0;
|
|
|
|
// GraphDef, third_party/tensorflow/core/framework/graph.proto
|
|
TENSORFLOW_GRAPHDEF = 1;
|
|
|
|
// Tensorflow's mobile inference model.
|
|
// third_party/tensorflow/lite/schema/schema.fbs
|
|
TFLITE = 2;
|
|
|
|
// GraphViz
|
|
// Export-only.
|
|
GRAPHVIZ_DOT = 3;
|
|
}
|
|
|
|
// TocoFlags encodes extra parameters that drive tooling operations, that
|
|
// are not normally encoded in model files and in general may not be thought
|
|
// of as properties of models, instead describing how models are to be
|
|
// processed in the context of the present tooling job.
|
|
//
|
|
// Next ID to use: 33.
|
|
message TocoFlags {
|
|
// Input file format
|
|
optional FileFormat input_format = 1;
|
|
|
|
// Output file format
|
|
optional FileFormat output_format = 2;
|
|
|
|
// Similar to inference_type, but allows to control specifically the
|
|
// quantization of input arrays, separately from other arrays.
|
|
//
|
|
// If not set, then the value of inference_type is implicitly used, i.e.
|
|
// by default input arrays are quantized like other arrays.
|
|
//
|
|
// Like inference_type, this only affects real-number arrays. By "real-number"
|
|
// we mean float arrays, and quantized arrays. This excludes plain
|
|
// integer arrays, strings arrays, and every other data type.
|
|
//
|
|
// The typical use for this flag is for vision models taking a bitmap
|
|
// as input, typically with uint8 channels, yet still requiring floating-point
|
|
// inference. For such image models, the uint8 input is quantized, i.e.
|
|
// the uint8 values are interpreted as real numbers, and the quantization
|
|
// parameters used for such input arrays are their mean_value, std_value
|
|
// parameters.
|
|
optional IODataType inference_input_type = 11;
|
|
|
|
// Sets the type of real-number arrays in the output file, that is, controls
|
|
// the representation (quantization) of real numbers in the output file,
|
|
// except for input arrays, which are controlled by inference_input_type.
|
|
//
|
|
// NOTE: this flag only impacts real-number arrays. By "real-number"
|
|
// we mean float arrays, and quantized arrays. This excludes plain
|
|
// integer arrays, strings arrays, and every other data type.
|
|
//
|
|
// For real-number arrays, the impact of this flag is to allow the output
|
|
// file to choose a different real-numbers representation (quantization)
|
|
// from what the input file used. For any other types of arrays, changing
|
|
// the data type would not make sense.
|
|
//
|
|
// Specifically:
|
|
// - If FLOAT, then real-numbers arrays will be of type float in
|
|
// the output file. If they were quantized in the input file, then
|
|
// they get dequantized.
|
|
// - If QUANTIZED_UINT8, then real-numbers arrays will be quantized
|
|
// as uint8 in the output file. If they were float in the input file,
|
|
// then they get quantized.
|
|
// - If not set, then all real-numbers arrays retain the same type in the
|
|
// output file as they have in the input file.
|
|
//
|
|
optional IODataType inference_type = 4;
|
|
|
|
// default_ranges_min and default_ranges_max are helpers to experiment
|
|
// with quantization of models. Normally, quantization requires the input
|
|
// model to have (min, max) range information for every activations array.
|
|
// This is needed in order to know how to quantize arrays and still achieve
|
|
// satisfactory accuracy. However, in some circumstances one would just like
|
|
// to estimate the performance of quantized inference, without caring about
|
|
// accuracy. That is what default_ranges_min and default_ranges_max are for:
|
|
// when specified, they will be used as default (min, max) range boundaries
|
|
// for all activation arrays that lack (min, max) range information, thus
|
|
// allowing for quantization to proceed.
|
|
//
|
|
// It should be clear from the above explanation that these parameters are
|
|
// for experimentation purposes only and should not be used in production:
|
|
// they make it easy to quantize models, but the resulting quantized model
|
|
// will be inaccurate.
|
|
//
|
|
// These values only apply to arrays quantized with the kUint8 data type.
|
|
optional float default_ranges_min = 5;
|
|
optional float default_ranges_max = 6;
|
|
// Equivalent versions of default_ranges_min/_max for arrays quantized with
|
|
// the kInt16 data type.
|
|
optional float default_int16_ranges_min = 15;
|
|
optional float default_int16_ranges_max = 16;
|
|
|
|
// Ignore and discard FakeQuant nodes. For instance, that can be used to
|
|
// generate plain float code without fake-quantization from a quantized
|
|
// graph.
|
|
optional bool drop_fake_quant = 7;
|
|
|
|
// Normally, FakeQuant nodes must be strict boundaries for graph
|
|
// transformations, in order to ensure that quantized inference has the
|
|
// exact same arithmetic behavior as quantized training --- which is the
|
|
// whole point of quantized training and of FakeQuant nodes in the first
|
|
// place. However, that entails subtle requirements on where exactly
|
|
// FakeQuant nodes must be placed in the graph. Some quantized graphs
|
|
// have FakeQuant nodes at unexpected locations, that prevent graph
|
|
// transformations that are necessary in order to generate inference
|
|
// code for these graphs. Such graphs should be fixed, but as a
|
|
// temporary work-around, setting this reorder_across_fake_quant flag
|
|
// allows toco to perform necessary graph transformations on them,
|
|
// at the cost of no longer faithfully matching inference and training
|
|
// arithmetic.
|
|
optional bool reorder_across_fake_quant = 8;
|
|
|
|
// If true, allow TOCO to create TF Lite Custom operators for all the
|
|
// unsupported Tensorflow ops.
|
|
optional bool allow_custom_ops = 10;
|
|
|
|
// Applies only to the case when the input format is TENSORFLOW_GRAPHDEF.
|
|
// If true, then control dependencies will be immediately dropped during
|
|
// import.
|
|
// If not set, the default behavior is as follows:
|
|
// - Default to false if the output format is TENSORFLOW_GRAPHDEF.
|
|
// - Default to true in all other cases.
|
|
optional bool drop_control_dependency = 12;
|
|
|
|
// Disables transformations that fuse subgraphs such as known LSTMs (not all
|
|
// LSTMs are identified).
|
|
optional bool debug_disable_recurrent_cell_fusion = 13;
|
|
|
|
// Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized
|
|
// array data types throughout the graph. The graph must be properly annotated
|
|
// with FakeQuant* ops on at least the edges and may contain additional ops on
|
|
// the interior of the graph to widen/narrow as desired.
|
|
//
|
|
// Input and output array data types may change because of this propagation
|
|
// and users must be sure to query the final data_type values.
|
|
optional bool propagate_fake_quant_num_bits = 14;
|
|
|
|
// Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0.
|
|
// This flag allows nudging them to 1 to allow proceeding, with moderate
|
|
// inaccuracy.
|
|
optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17;
|
|
|
|
// Minimum size of constant arrays to deduplicate; arrays smaller will not be
|
|
// deduplicated.
|
|
optional int64 dedupe_array_min_size_bytes = 18 [default = 64];
|
|
|
|
// Split the LSTM inputs from 5 tensors to 18 tensors for TFLite.
|
|
// Ignored if the output format is not TFLite.
|
|
optional bool split_tflite_lstm_inputs = 19 [default = true];
|
|
|
|
// Store weights as quantized weights followed by dequantize operations.
|
|
// Computation is still done in float, but reduces model size (at the cost of
|
|
// accuracy and latency).
|
|
// DEPRECATED: Please use post_training_quantize instead.
|
|
optional bool quantize_weights = 20 [default = false];
|
|
|
|
// Full filepath of folder to dump the graphs at various stages of processing
|
|
// GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order
|
|
// to keep the requirements of the output file.
|
|
optional string dump_graphviz_dir = 24;
|
|
|
|
// Boolean indicating whether to dump the graph after every graph
|
|
// transformation.
|
|
optional bool dump_graphviz_include_video = 25;
|
|
|
|
// Boolean indicating whether to quantize the weights of the converted float
|
|
// model. Model size will be reduced and there will be latency improvements
|
|
// (at the cost of accuracy).
|
|
optional bool post_training_quantize = 26 [default = false];
|
|
|
|
// This flag only works when converting to TensorFlow Lite format.
|
|
// When enabled, unsupported ops will be converted to select TensorFlow ops.
|
|
// TODO(ycling): Consider to rename the following 2 flags and don't call it
|
|
// "Flex".
|
|
// `enable_select_tf_ops` should always be used with `allow_custom_ops`.
|
|
// WARNING: Experimental interface, subject to change
|
|
optional bool enable_select_tf_ops = 27 [default = false];
|
|
|
|
// This flag only works when converting to TensorFlow Lite format.
|
|
// When enabled, all TensorFlow ops will be converted to select TensorFlow
|
|
// ops.
|
|
// This will force `enable_select_tf_ops` to true.
|
|
// `force_select_tf_ops` should always be used with `enable_select_tf_ops`.
|
|
// WARNING: Experimental interface, subject to change
|
|
optional bool force_select_tf_ops = 28 [default = false];
|
|
|
|
// Boolean indicating whether to convert float32 constant buffers to
|
|
// float16. This is typically done to reduce model size. Delegates may also
|
|
// wish to implement kernels on reduced precision floats for performance
|
|
// gains.
|
|
optional bool quantize_to_float16 = 29 [default = false];
|
|
|
|
// Boolean flag indicating whether the converter should allow models with
|
|
// dynamic Tensor shape. When set to False, the converter will generate
|
|
// runtime memory offsets for activation Tensors (with 128 bits alignment)
|
|
// and error out on models with undetermined Tensor shape. (Default: True)
|
|
optional bool allow_dynamic_tensors = 30 [default = true];
|
|
|
|
// Full filepath of the folder to dump conversion logs. This includes a global
|
|
// view of the conversion process, and user can choose to submit those logs.
|
|
optional string conversion_summary_dir = 31;
|
|
|
|
// String representing the custom ops OpDefs that are included in the
|
|
// GraphDef.
|
|
repeated string custom_opdefs = 32;
|
|
}
|