Merge pull request #44415 from Molkree:typos_in_lite_dir
PiperOrigin-RevId: 341738495 Change-Id: Id7b3b774f1ff0d3c9f34b7a224a83f9e51bfae6f
This commit is contained in:
commit
229a8af422
tensorflow/lite
c
core
delegates
experimental/writer
g3doc
guide
inference_with_metadata/task_library
performance
tutorials
java
kernels
micro
examples
kernels
tools/make
profiling
python/optimize
tools
@ -80,7 +80,7 @@ struct TfLiteRegistration;
|
||||
|
||||
// An external context is a collection of information unrelated to the TF Lite
|
||||
// framework, but useful to a subset of the ops. TF Lite knows very little
|
||||
// about about the actual contexts, but it keeps a list of them, and is able to
|
||||
// about the actual contexts, but it keeps a list of them, and is able to
|
||||
// refresh them if configurations like the number of recommended threads
|
||||
// change.
|
||||
typedef struct TfLiteExternalContext {
|
||||
|
@ -219,7 +219,7 @@ Subgraph::Subgraph(ErrorReporter* error_reporter,
|
||||
// Reserve some space for the tensors to avoid excessive resizing.
|
||||
tensors_.reserve(kTensorsReservedCapacity);
|
||||
nodes_and_registration().reserve(kTensorsReservedCapacity);
|
||||
// Invalid to call these these except from TfLiteDelegate
|
||||
// Invalid to call these except from TfLiteDelegate
|
||||
SwitchToKernelContext();
|
||||
}
|
||||
|
||||
@ -311,7 +311,7 @@ TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
|
||||
// Use `char*` for conveniently step through the allocated space by bytes.
|
||||
char* allocation = static_cast<char*>(malloc(allocation_size));
|
||||
|
||||
// Step 3: Fill all data structures structures.
|
||||
// Step 3: Fill all data structures.
|
||||
TfLiteDelegateParams* params =
|
||||
reinterpret_cast<TfLiteDelegateParams*>(allocation);
|
||||
params->delegate = delegate;
|
||||
|
@ -196,7 +196,7 @@ class TestDelegate : public ::testing::Test {
|
||||
kTfLiteOk);
|
||||
|
||||
if (simple->min_ops_per_subset() > 0) {
|
||||
// Build a new vector of ops from subsets with atleast the minimum
|
||||
// Build a new vector of ops from subsets with at least the minimum
|
||||
// size.
|
||||
std::vector<int> allowed_ops;
|
||||
for (int idx = 0; idx < num_partitions; ++idx) {
|
||||
@ -1304,7 +1304,7 @@ TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) {
|
||||
// Input: 0, Output:12.
|
||||
// All constants are 2, so the function is: (x + 2 + 2) * 2 + 2 = 2x + 10
|
||||
//
|
||||
// Delegate only supports ADD, so can have upto two delegated partitions.
|
||||
// Delegate only supports ADD, so can have up to two delegated partitions.
|
||||
// TODO(b/156707497): Add more cases here once we have landed CPU kernels
|
||||
// supporting FP16.
|
||||
class TestFP16Delegation : public ::testing::TestWithParam<int> {
|
||||
|
@ -364,7 +364,7 @@ struct InferenceOptions {
|
||||
};
|
||||
|
||||
// Returns a position number for the priority. If priority is missing,
|
||||
// then it it would return 'max num priorities + 1'.
|
||||
// then it would return 'max num priorities + 1'.
|
||||
int GetPosition(const InferenceOptions& options, InferencePriority p);
|
||||
|
||||
// Return true if options are valid.
|
||||
|
@ -242,7 +242,7 @@ bool IsConvConstantsSupported(const GpuInfo& gpu_info,
|
||||
const Convolution2DAttributes& attr) {
|
||||
if (gpu_info.IsAMD() && definition.precision != CalculationsPrecision::F32 &&
|
||||
definition.src_tensors[0].storage_type != TensorStorageType::BUFFER) {
|
||||
// BUG, some AMD gpus crashe without it
|
||||
// BUG, some AMD GPUs crash without it
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -44,10 +44,10 @@ enum class IOStructure {
|
||||
ONLY_DEFINITIONS,
|
||||
|
||||
// For inputs:
|
||||
// Source code runs computations using 'vec4 value_N' declared by
|
||||
// the compiler, where where N is an index of the input. Each value comes
|
||||
// from inputs using coordinates set by GlobalInvocationID and a dispatch
|
||||
// method, therefore, source code should not explicitly read values.
|
||||
// Source code runs computations using 'vec4 value_N' declared by the
|
||||
// compiler, where N is an index of the input. Each value comes from inputs
|
||||
// using coordinates set by GlobalInvocationID and a dispatch method,
|
||||
// therefore, source code should not explicitly read values.
|
||||
//
|
||||
// For outputs:
|
||||
// Source code runs computations and leaves results in 'vec4 value_N'
|
||||
|
@ -1390,7 +1390,7 @@ class NNAPIOpBuilder {
|
||||
if (tensor->allocation_type == kTfLiteMmapRo) {
|
||||
if (IsQuantized(tensor_type) && need_int8_conversion &&
|
||||
nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
|
||||
// We need to to add a tensor and convert the weights into uint8.
|
||||
// We need to add a tensor and convert the weights into uint8.
|
||||
// Currently this is only needed for fully_connected. The new_tensor is
|
||||
// needed for lifetime management for the converted weights.
|
||||
int new_tensor_index = -1;
|
||||
@ -2526,7 +2526,7 @@ bool NNAPIDelegateKernel::Validate(
|
||||
context->tensors[node->inputs->data[1]].dims;
|
||||
Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
|
||||
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
||||
"Condition and inputs tensors shuld have the same shape",
|
||||
"Condition and inputs tensors should have the same shape",
|
||||
&val_ctx);
|
||||
} break;
|
||||
case kTfLiteBuiltinGather: {
|
||||
|
@ -21,7 +21,7 @@ the ideas above. For more sophisticated examples, refer to [Flex delegate](https
|
||||
## Testing & Tooling
|
||||
|
||||
There are currently **two options** to plug in a newly created TFLite delegate
|
||||
to reuse existing TFLite kernel tests and and tooling:
|
||||
to reuse existing TFLite kernel tests and tooling:
|
||||
|
||||
- Utilize the **[delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates)**
|
||||
mechanism
|
||||
|
@ -120,7 +120,7 @@ class OpOptionData {
|
||||
const std::unordered_map<std::string, std::string>& op_to_option() {
|
||||
return op_to_option_;
|
||||
}
|
||||
// Maps from option to to C struct i.e. 'AddOptions' -> 'TfLiteAddOptions'
|
||||
// Maps from option to C struct i.e. 'AddOptions' -> 'TfLiteAddOptions'
|
||||
const std::unordered_map<std::string, std::string>& option_to_struct() {
|
||||
return option_to_struct_;
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ page for the detail.
|
||||
bazel build --config=elinux_aarch64 -c opt //tensorflow/lite:libtensorflowlite.so
|
||||
```
|
||||
|
||||
You can find a shared library library in:
|
||||
You can find a shared library in:
|
||||
`bazel-bin/tensorflow/lite/libtensorflowlite.so`.
|
||||
|
||||
Currently, there is no straightforward way to extract all header files needed,
|
||||
|
@ -119,7 +119,7 @@ cd tensorflow_src && ./tensorflow/lite/tools/make/download_dependencies.sh
|
||||
|
||||
You can use
|
||||
[ARM GCC toolchains](https://github.com/tensorflow/tensorflow/tree/master/third_party/toolchains/embedded/arm-linux)
|
||||
with Bazel to build an armhf shared library which is compatibile with Raspberry
|
||||
with Bazel to build an armhf shared library which is compatible with Raspberry
|
||||
Pi 2, 3 and 4.
|
||||
|
||||
Note: The generated shared library requires glibc 2.28 or higher to run.
|
||||
@ -165,7 +165,7 @@ page for the detail.
|
||||
bazel build --config=elinux_armhf -c opt //tensorflow/lite:libtensorflowlite.so
|
||||
```
|
||||
|
||||
You can find a shared library library in:
|
||||
You can find a shared library in:
|
||||
`bazel-bin/tensorflow/lite/libtensorflowlite.so`.
|
||||
|
||||
Currently, there is no straightforward way to extract all header files needed,
|
||||
|
@ -222,7 +222,7 @@ pip package version since 2.3 for Linux and 2.4 for other environments.
|
||||
### Performance
|
||||
|
||||
When using a mixture of both builtin and select TensorFlow ops, all of the same
|
||||
TensorFlow Lite optimizations and optimized builtin ops will be be available and
|
||||
TensorFlow Lite optimizations and optimized builtin ops will be available and
|
||||
usable with the converted model.
|
||||
|
||||
The following table describes the average time taken to run inference on
|
||||
|
@ -110,7 +110,7 @@ To build an API object,you must provide the following information by extending
|
||||
std::vector<QaAnswer>, // OutputType
|
||||
const std::string&, const std::string& // InputTypes
|
||||
> {
|
||||
// Convert API input into into tensors
|
||||
// Convert API input into tensors
|
||||
absl::Status BertQuestionAnswerer::Preprocess(
|
||||
const std::vector<TfLiteTensor*>& input_tensors, // input tensors of the model
|
||||
const std::string& context, const std::string& query // InputType of the API
|
||||
@ -230,7 +230,7 @@ following information by extending
|
||||
[`BaseTaskApi`](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/java/src/java/org/tensorflow/lite/task/core/BaseTaskApi.java),
|
||||
which provides JNI handlings for all Java Task APIs.
|
||||
|
||||
* __Determine the API I/O__ - This usually mirriors the native interfaces. e.g
|
||||
* __Determine the API I/O__ - This usually mirrors the native interfaces. e.g
|
||||
`BertQuestionAnswerer` takes `(String context, String question)` as input
|
||||
and outputs `List<QaAnswer>`. The implementation calls a private native
|
||||
function with similar signature, except it has an additional parameter `long
|
||||
|
@ -114,7 +114,7 @@ OR
|
||||
pod 'TensorFlowLiteSwift', '~> 0.0.1-nightly', :subspecs => ['Metal']
|
||||
```
|
||||
|
||||
You can do similiarly for `TensorFlowLiteC` if you want to use the C API.
|
||||
You can do similarly for `TensorFlowLiteC` if you want to use the C API.
|
||||
|
||||
#### Step 3. Enable the GPU delegate
|
||||
|
||||
@ -154,9 +154,9 @@ Lastly make sure to select Release-only builds on 64-bit architecture. Under
|
||||
|
||||
### Android
|
||||
|
||||
Note: The TensorFlow Lite Interpreter must be created on the same thread as when
|
||||
is is run. Otherwise, `TfLiteGpuDelegate Invoke: GpuDelegate must run on the
|
||||
same thread where it was initialized.` may occur.
|
||||
Note: The TensorFlow Lite Interpreter must be created on the same thread as
|
||||
where it is run. Otherwise, `TfLiteGpuDelegate Invoke: GpuDelegate must run on
|
||||
the same thread where it was initialized.` may occur.
|
||||
|
||||
Look at the demo to see how to add the delegate. In your application, add the
|
||||
AAR as above, import `org.tensorflow.lite.gpu.GpuDelegate` module, and use
|
||||
|
@ -84,7 +84,7 @@
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"To run this example, we first need to install serveral required packages, including Model Maker package that in github [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)."
|
||||
"To run this example, we first need to install several required packages, including Model Maker package that in GitHub [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -346,7 +346,7 @@
|
||||
"id": "NNRNv_mloS89"
|
||||
},
|
||||
"source": [
|
||||
"If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in github."
|
||||
"If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in GitHub."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -783,7 +783,7 @@
|
||||
"id": "-4jQaxyT5_KV"
|
||||
},
|
||||
"source": [
|
||||
"You can also evalute the tflite model with the `evaluate_tflite` method."
|
||||
"You can also evaluate the tflite model with the `evaluate_tflite` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -805,7 +805,7 @@
|
||||
"source": [
|
||||
"## Advanced Usage\n",
|
||||
"\n",
|
||||
"The `create` function is the critical part of this library. It uses transfer learning with a pretrained model similiar to the [tutorial](https://www.tensorflow.org/tutorials/images/transfer_learning).\n",
|
||||
"The `create` function is the critical part of this library. It uses transfer learning with a pretrained model similar to the [tutorial](https://www.tensorflow.org/tutorials/images/transfer_learning).\n",
|
||||
"\n",
|
||||
"The `create`function contains the following steps:\n",
|
||||
"\n",
|
||||
@ -843,7 +843,7 @@
|
||||
"id": "iyIo0d5TCzE2"
|
||||
},
|
||||
"source": [
|
||||
"Model Maker supports multiple post-training quantization options. Let's take full integer quantization as an instance. First, define the quantization config to enforce enforce full integer quantization for all ops including the input and output. The input type and output type are `uint8` by default. You may also change them to other types like `int8` by setting `inference_input_type` and `inference_output_type` in config."
|
||||
"Model Maker supports multiple post-training quantization options. Let's take full integer quantization as an instance. First, define the quantization config to enforce full integer quantization for all ops including the input and output. The input type and output type are `uint8` by default. You may also change them to other types like `int8` by setting `inference_input_type` and `inference_output_type` in config."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -1018,7 +1018,7 @@
|
||||
" `use_hub_library` is True. None by default.\n",
|
||||
"* `shuffle`: Boolean, whether the data should be shuffled. False by default.\n",
|
||||
"* `use_augmentation`: Boolean, use data augmentation for preprocessing. False by default.\n",
|
||||
"* `use_hub_library`: Boolean, use `make_image_classifier_lib` from tensorflow hub to retrain the model. This training pipline could achieve better performance for complicated dataset with many categories. True by default. \n",
|
||||
"* `use_hub_library`: Boolean, use `make_image_classifier_lib` from tensorflow hub to retrain the model. This training pipeline could achieve better performance for complicated dataset with many categories. True by default. \n",
|
||||
"* `warmup_steps`: Number of warmup steps for warmup schedule on learning rate. If None, the default warmup_steps is used which is the total training steps in two epochs. Only used when `use_hub_library` is False. None by default.\n",
|
||||
"* `model_dir`: Optional, the location of the model checkpoint files. Only used when `use_hub_library` is False. None by default.\n",
|
||||
"\n",
|
||||
|
@ -665,7 +665,7 @@
|
||||
"id": "HZKYthlVrTos"
|
||||
},
|
||||
"source": [
|
||||
"You can also evalute the tflite model with the `evaluate_tflite` method. This step is expected to take a long time."
|
||||
"You can also evaluate the tflite model with the `evaluate_tflite` method. This step is expected to take a long time."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -747,7 +747,7 @@
|
||||
"id": "HZKYthlVrTos"
|
||||
},
|
||||
"source": [
|
||||
"You can evalute the tflite model with `evaluate_tflite` method to get its accuracy."
|
||||
"You can evaluate the tflite model with `evaluate_tflite` method to get its accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -91,9 +91,9 @@ tflite_flex_android_library(
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# EXPERIMENTAL: Android target target for GPU acceleration. Note that this
|
||||
# library contains *only* the GPU delegate and its Java wrapper; clients must
|
||||
# also include the core `tensorflowlite` runtime.
|
||||
# EXPERIMENTAL: Android target for GPU acceleration. Note that this library
|
||||
# contains *only* the GPU delegate and its Java wrapper; clients must also
|
||||
# include the core `tensorflowlite` runtime.
|
||||
android_library(
|
||||
name = "tensorflowlite_gpu",
|
||||
srcs = ["//tensorflow/lite/delegates/gpu/java/src/main/java/org/tensorflow/lite/gpu:gpu_delegate"],
|
||||
|
@ -22,10 +22,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/cpu_backend_context.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
// NOLINTNEXTLINE - This header file should't go to the top.
|
||||
// NOLINTNEXTLINE - This header file shouldn't go to the top.
|
||||
#include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
|
||||
// NOLINTNEXTLINE - This header file should't go to the top.
|
||||
// NOLINTNEXTLINE - This header file shouldn't go to the top.
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
@ -204,7 +204,7 @@ TfLiteStatus ResizeAndTransposeWeights(TfLiteContext* context,
|
||||
TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, transposed_weights,
|
||||
transposed_weights_shape_array));
|
||||
|
||||
// Transpose the weights from from OHWI order to HWOI order.
|
||||
// Transpose the weights from OHWI order to HWOI order.
|
||||
TransposeParams transpose_params;
|
||||
transpose_params.perm_count = 4;
|
||||
transpose_params.perm[0] = 1;
|
||||
|
@ -44,7 +44,7 @@ class VariableOpsTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
void ConstructGraph() {
|
||||
// Construct a graph like ths:
|
||||
// Construct a graph like this:
|
||||
// Input: %0, %1, %2
|
||||
// Output: %3
|
||||
// variable_assign(%0, %2)
|
||||
|
@ -84,8 +84,8 @@ $ python train.py --model CNN --person true
|
||||
|
||||
#### Model type
|
||||
|
||||
In the `--model` argument, you can can provide `CNN` or `LSTM`. The CNN
|
||||
model has a smaller size and lower latency.
|
||||
In the `--model` argument, you can provide `CNN` or `LSTM`. The CNN model has a
|
||||
smaller size and lower latency.
|
||||
|
||||
## Collecting new data
|
||||
|
||||
|
@ -223,7 +223,7 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=esp generate_micro_spee
|
||||
|
||||
### Building the example
|
||||
|
||||
Go the the example project directory
|
||||
Go to the example project directory
|
||||
```
|
||||
cd tensorflow/lite/micro/tools/make/gen/esp_xtensa-esp32/prj/micro_speech/esp-idf
|
||||
```
|
||||
@ -577,7 +577,7 @@ using [ARM Mbed](https://github.com/ARMmbed/mbed-cli).
|
||||
|
||||
The following instructions will help you build and deploy this example to
|
||||
[HIMAX WE1 EVB](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_board_brief)
|
||||
board. To undstand more about using this board, please check
|
||||
board. To understand more about using this board, please check
|
||||
[HIMAX WE1 EVB user guide](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide).
|
||||
|
||||
### Initial Setup
|
||||
|
@ -291,7 +291,7 @@ void rb_abort(ringbuf_t *rb) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the ringbuffer and keep keep rb_write aborted.
|
||||
* Reset the ringbuffer and keep rb_write aborted.
|
||||
* Note that we are taking lock before even toggling `abort_write` variable.
|
||||
* This serves a special purpose to not allow this abort to be mixed with
|
||||
* rb_write.
|
||||
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
// This data was extracted from the larger feature data held in
|
||||
// no_features_data.cc and consists of the 29th spectrogram slice of 43 values.
|
||||
// This is the expected result of running the sample data in
|
||||
// no_30ms_sample_data.cc through through the preprocessing pipeline.
|
||||
// no_30ms_sample_data.cc through the preprocessing pipeline.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_NO_FEATURE_DATA_SLICE_H_
|
||||
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
// This data was extracted from the larger feature data held in
|
||||
// no_micro_features_data.cc and consists of the 26th spectrogram slice of 40
|
||||
// values. This is the expected result of running the sample data in
|
||||
// yes_30ms_sample_data.cc through through the preprocessing pipeline.
|
||||
// yes_30ms_sample_data.cc through the preprocessing pipeline.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_MICRO_FEATURES_YES_FEATURE_DATA_SLICE_H_
|
||||
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
// This data was extracted from the larger feature data held in
|
||||
// no_features_data.cc and consists of the 29th spectrogram slice of 43 values.
|
||||
// This is the expected result of running the sample data in
|
||||
// no_30ms_sample_data.cc through through the preprocessing pipeline.
|
||||
// no_30ms_sample_data.cc through the preprocessing pipeline.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_NO_POWER_SPECTRUM_DATA_H_
|
||||
|
@ -136,7 +136,7 @@ TfLiteStatus GenerateSimpleFeatures(tflite::ErrorReporter* error_reporter,
|
||||
// Quantize the result into eight bits, effectively multiplying by two.
|
||||
// The 127.5 constant here has to match the features_max value defined in
|
||||
// tensorflow/examples/speech_commands/input_data.py, and this also assumes
|
||||
// that features_min is zero. It it wasn't, we'd have to subtract it first.
|
||||
// that features_min is zero. If it wasn't, we'd have to subtract it first.
|
||||
int quantized_average = roundf(average * (255.0f / 127.5f));
|
||||
if (quantized_average < 0) {
|
||||
quantized_average = 0;
|
||||
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
// This data was extracted from the larger feature data held in
|
||||
// no_features_data.cc and consists of the 26th spectrogram slice of 43 values.
|
||||
// This is the expected result of running the sample data in
|
||||
// yes_30ms_sample_data.cc through through the preprocessing pipeline.
|
||||
// yes_30ms_sample_data.cc through the preprocessing pipeline.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_SIMPLE_FEATURES_YES_POWER_SPECTRUM_DATA_H_
|
||||
|
@ -311,7 +311,7 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=esp generate_person_det
|
||||
|
||||
### Building the example
|
||||
|
||||
Go the the example project directory
|
||||
Go to the example project directory
|
||||
```
|
||||
cd tensorflow/lite/micro/tools/make/gen/esp_xtensa-esp32/prj/person_detection/esp-idf
|
||||
```
|
||||
|
@ -85,7 +85,7 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteConvParams* params) {
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
// MLI optimized version only supports int8_t dataype, dilation factor of 1
|
||||
// MLI optimized version only supports int8_t datatype, dilation factor of 1
|
||||
// and per-axis quantization of weights (no broadcasting/per-tensor)
|
||||
bool ret_val = (filter->type == kTfLiteInt8) &&
|
||||
(input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) &&
|
||||
@ -159,7 +159,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
@ -241,7 +241,8 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
// Run Conv MLI kernel
|
||||
// MLI optimized version only supports int8_t dataype and dilation factor of 1
|
||||
// MLI optimized version only supports int8_t datatype and dilation factor of
|
||||
// 1
|
||||
if ((input->type == kTfLiteInt8) && (params->dilation_width_factor == 1) &&
|
||||
(params->dilation_height_factor == 1)) {
|
||||
mli_tensor mli_in = {};
|
||||
@ -299,7 +300,7 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
const int overlap = kernel_height - cfg.stride_height;
|
||||
|
||||
// for weight slicing (on output channels)
|
||||
// NHWC layout for weigths, output channel dimension is the first dimension.
|
||||
// NHWC layout for weights, output channel dimension is the first dimension.
|
||||
const int weight_out_ch_dimension = 0;
|
||||
int slice_channels =
|
||||
static_cast<int>(mli_weights.shape[weight_out_ch_dimension]);
|
||||
@ -362,9 +363,9 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
in_slice_height, cfg.padding_top,
|
||||
cfg.padding_bottom, overlap);
|
||||
|
||||
/* output tensor is alreade sliced in the output channel dimension.
|
||||
/* output tensor is already sliced in the output channel dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of output channels of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
iteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch and height dimension. */
|
||||
ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension,
|
||||
out_slice_height);
|
||||
|
@ -72,7 +72,7 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
|
||||
const int in_ch = SizeOfDimension(input, 3);
|
||||
const int filters_num = SizeOfDimension(filter, 3);
|
||||
|
||||
// MLI optimized version only supports int8_t dataype, dilation factor of 1
|
||||
// MLI optimized version only supports int8_t datatype, dilation factor of 1
|
||||
// and per-axis quantization of weights (no broadcasting/per-tensor) (in_ch ==
|
||||
// filters_num) || (in_ch == 1)) is a forbidding of channel multiplier logic
|
||||
// for multichannel input.
|
||||
@ -150,7 +150,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Per channel quantization is only needed for int8 inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
@ -280,7 +280,7 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
const int overlap = kernelHeight - cfg.stride_height;
|
||||
|
||||
// for weight slicing (on output channels)
|
||||
// HWCN layout for weigths, output channel dimension is the first dimension.
|
||||
// HWCN layout for weights, output channel dimension is the first dimension.
|
||||
const int weight_out_ch_dimension = 3;
|
||||
// bias has only 1 dimension
|
||||
const int bias_out_ch_dimension = 0;
|
||||
@ -345,9 +345,9 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr);
|
||||
mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr);
|
||||
|
||||
/* input tensor is alreade sliced in the channel dimension.
|
||||
/* input tensor is already sliced in the channel dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of channels of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
iteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch and height dimension. in_ch_slice.Sub() tensor
|
||||
contains batches of HWC tensors. so it is a 4 dimensional tensor. because
|
||||
the mli kernel will process one HWC tensor at a time, the 4 dimensional
|
||||
@ -360,9 +360,9 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
inSliceHeight, padding_top,
|
||||
padding_bottom, overlap);
|
||||
|
||||
/* output tensor is alreade sliced in the output channel dimension.
|
||||
/* output tensor is already sliced in the output channel dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of output channels of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
iteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch and height dimension. */
|
||||
ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension,
|
||||
outSliceHeight);
|
||||
|
@ -52,7 +52,7 @@ constexpr int kOutputTensor = 0;
|
||||
bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
const TfLiteFullyConnectedParams* params) {
|
||||
// MLI optimized version only supports int8_t dataype and no fused Relu and
|
||||
// MLI optimized version only supports int8_t datatype and no fused Relu and
|
||||
// symmetric per-tensor quantization of weights (not per-axis)
|
||||
bool ret_val = (filter->type == kTfLiteInt8) &&
|
||||
(input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) &&
|
||||
@ -190,9 +190,9 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
ops::micro::TensorSlicer in_slice(&mli_in, input_size_dimension,
|
||||
mli_in.shape[input_size_dimension]);
|
||||
|
||||
/* output tensor is alreade sliced in the output size dimension.
|
||||
/* output tensor is already sliced in the output size dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of output size of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
iteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch */
|
||||
ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), out_tensor_dimension,
|
||||
slice_size);
|
||||
|
@ -43,7 +43,7 @@ enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 };
|
||||
|
||||
bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLitePoolParams* params) {
|
||||
// MLI optimized version only supports int8_t dataype and no fused Relu
|
||||
// MLI optimized version only supports int8_t datatype and no fused Relu
|
||||
return (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone);
|
||||
}
|
||||
|
||||
|
@ -163,7 +163,7 @@ TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors(
|
||||
init_arc_scratch_buffers();
|
||||
/* strategy for FC kernels:
|
||||
first allocate input, because this cannot be sliced. (in case of batch
|
||||
processing, only a single input needs to be allocated) then weigths & bias
|
||||
processing, only a single input needs to be allocated) then weights & bias
|
||||
because if fully loaded, they can be reused over batches. then output.
|
||||
The number of output channels (for weights slicing) depends on size of
|
||||
output and size of weights&bias */
|
||||
@ -275,7 +275,7 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io(
|
||||
max_out_lines_for_input =
|
||||
(max_lines_in - kernel_height + 1) / stride_height;
|
||||
}
|
||||
// Ten compute how many ouput lines fit into the output tensor.
|
||||
// Then compute how many output lines fit into the output tensor.
|
||||
max_lines_out =
|
||||
std::min(out_height, static_cast<int>(out->capacity) / line_size_out);
|
||||
// the smallest of the two determines the slice height for the output, and
|
||||
|
@ -141,7 +141,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
|
@ -127,7 +127,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
@ -23,12 +23,12 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration)
|
||||
// lifecycle (init, prepare, invoke). All internal allocations are handled by
|
||||
// this class. Simply pass in the registration, list of required tensors, inputs
|
||||
// array, outputs array, and any pre-builtin data. Calling Invoke() will
|
||||
// automatically walk the kernel and outputs will be ready on the TfLiteTensor
|
||||
// output provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
|
@ -362,7 +362,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
@ -157,7 +157,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t),
|
||||
|
@ -145,7 +145,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t),
|
||||
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
|
||||
|
@ -325,7 +325,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
op_data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
@ -368,7 +368,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
op_data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
@ -32,7 +32,7 @@ struct OpData {
|
||||
};
|
||||
|
||||
// Number of unique int8_t and int16_t values. Used in exponent lookup table
|
||||
// conputation.
|
||||
// computation.
|
||||
constexpr int kInt8Range =
|
||||
std::numeric_limits<int8_t>::max() - std::numeric_limits<int8_t>::min() + 1;
|
||||
constexpr int kInt16Range = std::numeric_limits<int16_t>::max() -
|
||||
@ -52,7 +52,7 @@ constexpr int kMaxExponentValue = (1 << kExpFractionalBits);
|
||||
TfLiteStatus Softmax(OpData op_data, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
// The last dimension is depth. Outer size is the the total input size
|
||||
// The last dimension is depth. Outer size is the total input size
|
||||
// divided by depth.
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
@ -75,7 +75,7 @@ TfLiteStatus Softmax(OpData op_data, const RuntimeShape& input_shape,
|
||||
input_diff == 0 ? kMaxExponentValue : op_data.exp_lut[input_diff];
|
||||
}
|
||||
|
||||
// Ensure we cannnot overflow the full_range_output value. We need to
|
||||
// Ensure we cannot overflow the full_range_output value. We need to
|
||||
// guarantee that kInt16Range * max(input_data) / sum_of_exps < kInt16Range.
|
||||
TFLITE_DCHECK(sum_of_exps >= kMaxExponentValue);
|
||||
|
||||
|
@ -6,9 +6,9 @@ endif
|
||||
TENSORFLOW_ROOT :=
|
||||
MAKEFILE_DIR := tensorflow/lite/micro/tools/make
|
||||
|
||||
# Override this on make command line to to parse thirdy party downloads during project generation
|
||||
# make -f tensorflow/lite/micro/tools/make/Makefile PARSE_THIRD_PARTY=true TARGET=apollo3evb generate_hello_world_make_project
|
||||
PARSE_THIRD_PARTY :=
|
||||
# Override this on make command line to parse third party downloads during project generation
|
||||
# make -f tensorflow/lite/micro/tools/make/Makefile PARSE_THIRD_PARTY=true TARGET=apollo3evb generate_hello_world_make_project
|
||||
PARSE_THIRD_PARTY :=
|
||||
|
||||
|
||||
# Pull in some convenience functions.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# TensorFlow Lite Micro ARC Make Project for EM SDP Board.
|
||||
|
||||
This folder has been autogenerated by TensorFlow, and contains source, header, and project files needed to build a single TensorFlow Lite Micro target using make tool and and a Synopsys DesignWare ARC processor compatible toolchain, specifically the ARC MetaWare Development Toolkit (MWDT).
|
||||
This folder has been autogenerated by TensorFlow, and contains source, header, and project files needed to build a single TensorFlow Lite Micro target using make tool and a Synopsys DesignWare ARC processor compatible toolchain, specifically the ARC MetaWare Development Toolkit (MWDT).
|
||||
|
||||
This project has been generated for the ARC EM Software Development Platform (EM SDP). The built application can be run only on this platform.
|
||||
|
||||
|
@ -38,7 +38,7 @@ class ProfileSummaryFormatter {
|
||||
const std::map<uint32_t, std::unique_ptr<tensorflow::StatsCalculator>>&
|
||||
stats_calculator_map,
|
||||
const tensorflow::StatsCalculator& delegate_stats_calculator) const = 0;
|
||||
// Returns a string detailing the short summary of the the accumulated runtime
|
||||
// Returns a string detailing the short summary of the accumulated runtime
|
||||
// stats in StatsCalculator of ProfileSummarizer.
|
||||
virtual std::string GetShortSummary(
|
||||
const std::map<uint32_t, std::unique_ptr<tensorflow::StatsCalculator>>&
|
||||
|
@ -32,7 +32,7 @@ _calibration_wrapper = LazyLoader(
|
||||
|
||||
|
||||
def add_intermediate_tensors(model_content):
|
||||
"""Adds intermedaite tensors to fused op if needed."""
|
||||
"""Adds intermediate tensors to fused op if needed."""
|
||||
return _calibration_wrapper.AddIntermediateTensors(model_content)
|
||||
|
||||
|
||||
|
@ -80,7 +80,7 @@ struct TfLiteRegistration;
|
||||
|
||||
// An external context is a collection of information unrelated to the TF Lite
|
||||
// framework, but useful to a subset of the ops. TF Lite knows very little
|
||||
// about about the actual contexts, but it keeps a list of them, and is able to
|
||||
// about the actual contexts, but it keeps a list of them, and is able to
|
||||
// refresh them if configurations like the number of recommended threads
|
||||
// change.
|
||||
typedef struct TfLiteExternalContext {
|
||||
|
@ -239,7 +239,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
property.quantizable = false;
|
||||
break;
|
||||
}
|
||||
// TODO(jianlijianli): extend LSTM op spec to inlucde input, bias etc.
|
||||
// TODO(jianlijianli): extend LSTM op spec to include input, bias etc.
|
||||
// LSTM needs 5 intermediate tensors. This agrees with the fully quantized
|
||||
// kernels in lstm_eval.cc
|
||||
if (op_variant.use_layer_norm && op_variant.use_projection &&
|
||||
@ -522,7 +522,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
tensor_property_9.symmetric = true;
|
||||
// Without layer norm, we choose to quantize bias with the scale of
|
||||
// input and its corresponding weight. The other choice will
|
||||
// be to ues the scale of recurrent and its corresponding weight but we
|
||||
// be to use the scale of recurrent and its corresponding weight but we
|
||||
// choose to use the smaller scale, which means higher resolution.
|
||||
TensorProperty tensor_property_12;
|
||||
tensor_property_12.use_derived_scale = true;
|
||||
@ -574,7 +574,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
property.outputs = {{0, {}}};
|
||||
property.intermediates = {
|
||||
// Without layer normalization, intermediate tensors 0, 1, 2, 3 are
|
||||
// not used and and their quantization parameters are ignored.
|
||||
// not used and their quantization parameters are ignored.
|
||||
{0, {}},
|
||||
{1, {}},
|
||||
{2, {}},
|
||||
@ -589,7 +589,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
!op_variant.use_peephole) {
|
||||
// Without layer norm, we choose to quantize bias with the scale of
|
||||
// input and its corresponding weight. The other choice will
|
||||
// be to ues the scale of recurrent and its corresponding weight but we
|
||||
// be to use the scale of recurrent and its corresponding weight but we
|
||||
// choose to use the smaller scale, which means higher resolution.
|
||||
TensorProperty tensor_property_12;
|
||||
tensor_property_12.use_derived_scale = true;
|
||||
@ -656,7 +656,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
tensor_property_9.symmetric = true;
|
||||
// Without layer norm, we choose to quantize bias with the scale of
|
||||
// input and its corresponding weight. The other choice will
|
||||
// be to ues the scale of recurrent and its corresponding weight but we
|
||||
// be to use the scale of recurrent and its corresponding weight but we
|
||||
// choose to use the smaller scale, which means higher resolution.
|
||||
TensorProperty tensor_property_12;
|
||||
tensor_property_12.use_derived_scale = true;
|
||||
@ -722,7 +722,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
!op_variant.use_peephole) {
|
||||
// Without layer norm, we choose to quantize bias with the scale of
|
||||
// input and its corresponding weight. The other choice will
|
||||
// be to ues the scale of recurrent and its corresponding weight but we
|
||||
// be to use the scale of recurrent and its corresponding weight but we
|
||||
// choose to use the smaller scale, which means higher resolution.
|
||||
TensorProperty tensor_property_12;
|
||||
tensor_property_12.use_derived_scale = true;
|
||||
@ -949,7 +949,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
case BuiltinOperator_SVDF: {
|
||||
TensorProperty tensor_property_time;
|
||||
// Only 10bits are needed because 6bits are reserved for the reduce
|
||||
// operation after elemement-wise multiplication between state and time
|
||||
// operation after element-wise multiplication between state and time
|
||||
// weights.
|
||||
tensor_property_time.number_of_bits = 10;
|
||||
TensorProperty tensor_property_bias;
|
||||
|
@ -168,7 +168,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
|
||||
return 3;
|
||||
}
|
||||
// For float and uint8 fixed point kernels, if the weight is
|
||||
// Shuffled4x16Int8, is is version 2.
|
||||
// Shuffled4x16Int8, it is version 2.
|
||||
if (op_sig.options.fully_connected.weights_format ==
|
||||
FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8) {
|
||||
return 2;
|
||||
|
Loading…
Reference in New Issue
Block a user