Prefer the standard integral types over custom type-aliases.

PiperOrigin-RevId: 323036134
Change-Id: Id035d66f9e37485eb059cfa5756440c4f72871d1
This commit is contained in:
Advait Jain 2020-07-24 11:31:32 -07:00 committed by TensorFlower Gardener
parent 435d9b7858
commit 1d4a43b0bf
7 changed files with 278 additions and 254 deletions

View File

@ -209,6 +209,14 @@ config_setting(
},
)
config_setting(
name = "tf_lite_static_memory",
values = {
"copt": "-DTF_LITE_STATIC_MEMORY",
"cpu": "k8",
},
)
cc_library(
name = "common",
srcs = [],
@ -455,16 +463,12 @@ cc_library(
"reference/integer_ops/add.h",
"reference/integer_ops/conv.h",
"reference/integer_ops/depthwise_conv.h",
"reference/integer_ops/dequantize.h",
"reference/integer_ops/fully_connected.h",
"reference/integer_ops/l2normalization.h",
"reference/integer_ops/log_softmax.h",
"reference/integer_ops/logistic.h",
"reference/integer_ops/mean.h",
"reference/integer_ops/mul.h",
"reference/integer_ops/pooling.h",
"reference/integer_ops/tanh.h",
"reference/integer_ops/transpose_conv.h",
"reference/l2normalization.h",
"reference/logistic.h",
"reference/maximum_minimum.h",
@ -477,17 +481,25 @@ cc_library(
"reference/process_broadcast_shapes.h",
"reference/quantize.h",
"reference/reduce.h",
"reference/reference_ops.h",
"reference/requantize.h",
"reference/resize_nearest_neighbor.h",
"reference/round.h",
"reference/softmax.h",
"reference/sparse_ops/fully_connected.h",
"reference/strided_slice.h",
"reference/sub.h",
"reference/svdf.h",
"reference/tanh.h",
] + select({
":tf_lite_static_memory": [],
"//conditions:default": [
"reference/integer_ops/dequantize.h",
"reference/integer_ops/log_softmax.h",
"reference/integer_ops/mean.h",
"reference/integer_ops/transpose_conv.h",
"reference/reference_ops.h",
"reference/sparse_ops/fully_connected.h",
],
}),
build_for_embedded = True,
copts = tflite_copts(),
select_deps = {
@ -787,7 +799,12 @@ cc_library(
":freebsd": [
":sse_tensor_utils",
],
":windows": [":sse_tensor_utils"],
":windows": [
":sse_tensor_utils",
],
":tf_lite_static_memory": [
":portable_tensor_utils",
],
"//conditions:default": [
":portable_tensor_utils",
],

View File

@ -138,22 +138,23 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
#endif
}
inline int32 MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32 x, int32 quantized_multiplier, int left_shift) {
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32_t x, int32_t quantized_multiplier, int left_shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
return RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
}
inline int32 MultiplyByQuantizedMultiplierGreaterThanOne(
int32 x, int32 quantized_multiplier, int left_shift) {
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
int32_t x, int32_t quantized_multiplier, int left_shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
quantized_multiplier);
}
inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
int32_t quantized_multiplier,
int shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
@ -164,8 +165,8 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
right_shift);
}
inline int32 MultiplyByQuantizedMultiplier(int64_t x,
int32 quantized_multiplier,
inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
int32_t quantized_multiplier,
int shift) {
// Inputs:
// - quantized_multiplier has fixed point at bit 31
@ -173,7 +174,7 @@ inline int32 MultiplyByQuantizedMultiplier(int64_t x,
//
// Assumptions: The following input ranges are assumed
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
// - scaling is chosen so final scaled result fits in int32
// - scaling is chosen so final scaled result fits in int32_t
// - input x is in the range -(1<<47) <= x < (1<<47)
assert(quantized_multiplier >= 0);
assert(shift >= -31 && shift < 8);
@ -262,7 +263,7 @@ inline void gen_lut(const std::function<double(double)>& func, double min,
std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
}
// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
// 512 base value, lut[513] only for calculate slope
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
@ -413,21 +414,21 @@ SaturatingRoundingMultiplyByPOTParam(
SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
}
// Convert int32 multiplier to int16 with rounding.
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32,
int16_t* multiplier_int16) {
TFLITE_DCHECK_GE(multiplier_int32, 0);
// Convert int32_t multiplier to int16_t with rounding.
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
int16_t* multiplier_int16_t) {
TFLITE_DCHECK_GE(multiplier_int32_t, 0);
static constexpr int32_t kRoundingOffset = 1 << 15;
if (multiplier_int32 >=
if (multiplier_int32_t >=
std::numeric_limits<int32_t>::max() - kRoundingOffset) {
*multiplier_int16 = std::numeric_limits<int16_t>::max();
*multiplier_int16_t = std::numeric_limits<int16_t>::max();
return;
}
const int32_t result = (multiplier_int32 + kRoundingOffset) >> 16;
TFLITE_DCHECK_LE(result << 16, multiplier_int32 + kRoundingOffset);
TFLITE_DCHECK_GT(result << 16, multiplier_int32 - kRoundingOffset);
*multiplier_int16 = result;
TFLITE_DCHECK_EQ(*multiplier_int16, result);
const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
*multiplier_int16_t = result;
TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
}
// Minimum output bits to accommodate log of maximum input range. It actually
@ -438,15 +439,13 @@ inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32,
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
constexpr int min_log_x_output_bits(int input_bits) {
return input_bits > 90
? 7
: input_bits > 44
? 6
: input_bits > 21
? 5
: input_bits > 10
? 4
: input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
return input_bits > 90 ? 7
: input_bits > 44 ? 6
: input_bits > 21 ? 5
: input_bits > 10 ? 4
: input_bits > 4 ? 3
: input_bits > 1 ? 2
: 1;
}
// Although currently the name of this function says that it cannot handle
@ -454,17 +453,17 @@ constexpr int min_log_x_output_bits(int input_bits) {
// x_max is the largest representable input. In other words, the output range
// is symmetric.
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1_impl(
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
// The reason for accumulating the result with an extra bit of headroom is
// that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
// recip_denom will otherwise introduce an error.
static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 1488522236, std::log(2.0));
@ -492,10 +491,10 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// required shift "ourselves" instead of using, say, Rescale.
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
// z_a_pow_2 = input_integer_bits - z_a_headroom;
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
FixedPoint0 r_a_tmp =
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
const int32 r_a_raw =
const int32_t r_a_raw =
SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
// z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
// z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
@ -507,8 +506,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
FixedPoint0 z_b = z_a * sqrt_half;
int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
const int32 r_b_raw =
int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
const int32_t r_b_raw =
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
@ -536,9 +535,9 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
}
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1(
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
static_assert(
OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
"Output integer bits must be sufficient to accommodate logs of inputs.");
@ -547,25 +546,25 @@ log_x_for_x_greater_than_or_equal_to_1(
input_val);
}
inline int32 GetReciprocal(int32 x, int x_integer_digits,
inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
int* num_bits_over_unit) {
int headroom_plus_one = CountLeadingZeros(static_cast<uint32>(x));
int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
// This is the number of bits to the left of the binary point above 1.0.
// Consider x=1.25. In that case shifted_scale=0.8 and
// no later adjustment will be needed.
*num_bits_over_unit = x_integer_digits - headroom_plus_one;
const int32 shifted_sum_minus_one =
static_cast<int32>((static_cast<uint32>(x) << headroom_plus_one) -
(static_cast<uint32>(1) << 31));
const int32_t shifted_sum_minus_one =
static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
(static_cast<uint32_t>(1) << 31));
gemmlowp::FixedPoint<int32, 0> shifted_scale =
gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
gemmlowp::one_over_one_plus_x_for_x_in_0_1(
gemmlowp::FixedPoint<int32, 0>::FromRaw(shifted_sum_minus_one));
gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
return shifted_scale.raw();
}
inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
int32* output_inv_sqrt,
inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
int32_t* output_inv_sqrt,
int* output_shift) {
TFLITE_DCHECK_GE(input, 0);
if (input <= 1) {
@ -585,7 +584,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
++*output_shift;
}
const unsigned max_left_shift_bits =
CountLeadingZeros(static_cast<uint32>(input)) - 1;
CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
*output_shift -= left_shift_bit_pairs;
@ -597,8 +596,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
using gemmlowp::SaturatingRoundingMultiplyByPOT;
// Using 3 integer bits gives us enough room for the internal arithmetic in
// this Newton-Raphson iteration.
using F3 = FixedPoint<int32, 3>;
using F0 = FixedPoint<int32, 0>;
using F3 = FixedPoint<int32_t, 3>;
using F0 = FixedPoint<int32_t, 0>;
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
const F3 fixedpoint_half_input =
SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);

View File

@ -76,13 +76,15 @@ limitations under the License.
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
#endif
// TODO(ahentz): Clean up.
#ifndef TF_LITE_STATIC_MEMORY
// TODO(b/162019032): Consider removing these type-aliases.
using int8 = std::int8_t;
using uint8 = std::uint8_t;
using int16 = std::int16_t;
using uint16 = std::uint16_t;
using int32 = std::int32_t;
using uint32 = std::uint32_t;
#endif // !defined(TF_LITE_STATIC_MEMORY)
// TFLITE_DEPRECATED()
//

View File

@ -342,13 +342,13 @@ void NudgeQuantizationRange(const float min, const float max,
const float quant_max_float = static_cast<float>(quant_max);
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
uint16 nudged_zero_point;
uint16_t nudged_zero_point;
if (zero_point_from_min < quant_min_float) {
nudged_zero_point = static_cast<uint16>(quant_min);
nudged_zero_point = static_cast<uint16_t>(quant_min);
} else if (zero_point_from_min > quant_max_float) {
nudged_zero_point = static_cast<uint16>(quant_max);
nudged_zero_point = static_cast<uint16_t>(quant_max);
} else {
nudged_zero_point = static_cast<uint16>(TfLiteRound(zero_point_from_min));
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
}
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);

View File

@ -76,12 +76,12 @@ class VectorOfTensors {
// A list of quantized tensors in a format that can be used by kernels like
// split and concatenation.
class VectorOfQuantizedTensors : public VectorOfTensors<uint8> {
class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
public:
// Build with the tensors in 'tensor_list'.
VectorOfQuantizedTensors(const TfLiteContext& context,
const TfLiteIntArray& tensor_list)
: VectorOfTensors<uint8>(context, tensor_list) {
: VectorOfTensors<uint8_t>(context, tensor_list) {
for (int i = 0; i < tensor_list.size; ++i) {
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
zero_point_.push_back(t->params.zero_point);
@ -90,10 +90,10 @@ class VectorOfQuantizedTensors : public VectorOfTensors<uint8> {
}
const float* scale() const { return scale_.data(); }
const int32* zero_point() const { return zero_point_.data(); }
const int32_t* zero_point() const { return zero_point_.data(); }
private:
std::vector<int32> zero_point_;
std::vector<int32_t> zero_point_;
std::vector<float> scale_;
};

View File

@ -16,9 +16,9 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
#if defined(__SSSE3__)
#if defined(__SSSE3__) && !defined(TF_LITE_STATIC_MEMORY)
#include "tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h"
#elif defined(USE_NEON)
#elif defined(USE_NEON) && !defined(TF_LITE_STATIC_MEMORY)
#include "tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h"
#else
#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h"

View File

@ -24,24 +24,29 @@ limitations under the License.
namespace tflite {
enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
enum class PaddingType : uint8 { kNone, kSame, kValid };
enum class FusedActivationFunctionType : uint8_t {
kNone,
kRelu6,
kRelu1,
kRelu
};
enum class PaddingType : uint8_t { kNone, kSame, kValid };
struct PaddingValues {
int16 width;
int16 height;
int16_t width;
int16_t height;
// offset is used for calculating "remaining" padding, for example, `width`
// is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
// 1 + 1 = 2.
int16 width_offset;
int16_t width_offset;
// Same as width_offset except it's over the height dimension.
int16 height_offset;
int16_t height_offset;
};
// This enumeration allows for non-default formats for the weights array
// of a fully-connected operator, allowing the use of special optimized
// runtime paths.
enum class FullyConnectedWeightsFormat : uint8 {
enum class FullyConnectedWeightsFormat : uint8_t {
// Default format (flat 2D layout, the inner contiguous dimension
// is input_depth, the outer non-contiguous dimension is output_depth)
kDefault,
@ -88,11 +93,11 @@ enum class FullyConnectedWeightsFormat : uint8 {
// maximize arithmetic throughput.
//
// Finally, the 'Int8' part in the name refers to the fact that this
// weights format has each weights value encoded as a signed int8 value,
// even if the data type of the weights buffer is uint8. This is intended
// weights format has each weights value encoded as a signed int8_t value,
// even if the data type of the weights buffer is uint8_t. This is intended
// to save runtime kernels the effort to have to XOR the top bit of these
// bytes before using them in signed arithmetic, see this file for more
// explanations on the 'signed int8 trick' in matrix multiplication kernels:
// explanations on the 'signed int8_t trick' in matrix multiplication kernels:
//
// tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
//
@ -111,7 +116,7 @@ enum class FullyConnectedWeightsFormat : uint8 {
// the real 0 value, and scale designates the difference between the real values
// corresponding to consecutive quantized values differing by 1.
struct QuantizationParams {
int32 zero_point = 0;
int32_t zero_point = 0;
double scale = 0.0;
};
@ -141,19 +146,19 @@ class RuntimeShape {
#ifdef TF_LITE_STATIC_MEMORY
TFLITE_CHECK(false && "No shape resizing supported on this platform");
#else // TF_LITE_STATIC_MEMORY
dims_pointer_ = new int32[dimensions_count];
dims_pointer_ = new int32_t[dimensions_count];
#endif // TF_LITE_STATIC_MEMORY
}
}
RuntimeShape(int shape_size, int32 value) : size_(0) {
RuntimeShape(int shape_size, int32_t value) : size_(0) {
Resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
SetDim(i, value);
}
}
RuntimeShape(int dimensions_count, const int32* dims_data) : size_(0) {
RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(0) {
ReplaceWith(dimensions_count, dims_data);
}
@ -165,14 +170,15 @@ class RuntimeShape {
// rolls out.
RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
if (size_ > kMaxSmallSize) {
dims_pointer_ = new int32[size_];
dims_pointer_ = new int32_t[size_];
}
std::memcpy(DimsData(), other.DimsData(), sizeof(int32) * size_);
std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * size_);
}
bool operator==(const RuntimeShape& comp) const {
return this->size_ == comp.size_ &&
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32)) == 0;
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
0;
}
~RuntimeShape() {
@ -185,13 +191,13 @@ class RuntimeShape {
}
}
inline int32 DimensionsCount() const { return size_; }
inline int32 Dims(int i) const {
inline int32_t DimensionsCount() const { return size_; }
inline int32_t Dims(int i) const {
TFLITE_DCHECK_GE(i, 0);
TFLITE_DCHECK_LT(i, size_);
return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i];
}
inline void SetDim(int i, int32 val) {
inline void SetDim(int i, int32_t val) {
TFLITE_DCHECK_GE(i, 0);
TFLITE_DCHECK_LT(i, size_);
if (size_ > kMaxSmallSize) {
@ -201,14 +207,14 @@ class RuntimeShape {
}
}
inline int32* DimsData() {
inline int32_t* DimsData() {
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
}
inline const int32* DimsData() const {
inline const int32_t* DimsData() const {
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
}
// The caller must ensure that the shape is no bigger than 5-D.
inline const int32* DimsDataUpTo5D() const { return dims_; }
inline const int32_t* DimsDataUpTo5D() const { return dims_; }
inline void Resize(int dimensions_count) {
if (size_ > kMaxSmallSize) {
@ -223,15 +229,15 @@ class RuntimeShape {
#ifdef TF_LITE_STATIC_MEMORY
TFLITE_CHECK(false && "No shape resizing supported on this platform");
#else // TF_LITE_STATIC_MEMORY
dims_pointer_ = new int32[dimensions_count];
dims_pointer_ = new int32_t[dimensions_count];
#endif // TF_LITE_STATIC_MEMORY
}
}
inline void ReplaceWith(int dimensions_count, const int32* dims_data) {
inline void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
Resize(dimensions_count);
int32* dst_dims = DimsData();
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32));
int32_t* dst_dims = DimsData();
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
}
template <typename T>
@ -239,7 +245,7 @@ class RuntimeShape {
const int dimensions_count =
std::distance(src_iterable.begin(), src_iterable.end());
Resize(dimensions_count);
int32* data = DimsData();
int32_t* data = DimsData();
for (auto it : src_iterable) {
*data = it;
++data;
@ -288,13 +294,13 @@ class RuntimeShape {
SetDim(i, pad_value);
}
std::memcpy(DimsData() + size_increase, shape.DimsData(),
sizeof(int32) * shape.DimensionsCount());
sizeof(int32_t) * shape.DimensionsCount());
}
int32 size_;
int32_t size_;
union {
int32 dims_[kMaxSmallSize];
int32* dims_pointer_;
int32_t dims_[kMaxSmallSize];
int32_t* dims_pointer_;
};
};
@ -713,7 +719,7 @@ void ComputeStrides(Dims<N>* dims) {
}
}
enum class BroadcastableOpCategory : uint8 {
enum class BroadcastableOpCategory : uint8_t {
kNone,
kNonBroadcast, // Matching input shapes.
kFirstInputBroadcastsFast, // Fivefold nested loops.
@ -729,21 +735,21 @@ static_assert(sizeof(MinMax) == 8, "");
struct ActivationParams {
FusedActivationFunctionType activation_type;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
};
struct ReluParams : public ActivationParams {
int32 input_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
};
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
// op for pattern-specific optimization.
enum class ResizingCategory : uint8 {
enum class ResizingCategory : uint8_t {
kNone,
kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}.
kGenericResize,
@ -753,27 +759,27 @@ enum class ResizingCategory : uint8 {
struct ArithmeticParams {
// Shape dependent / common to data / op types.
BroadcastableOpCategory broadcast_category;
// uint8 inference params.
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
int32 output_multiplier;
// uint8_t inference params.
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// Add / Sub, not Mul, uint8 inference params.
// Add / Sub, not Mul, uint8_t inference params.
int left_shift;
int32 input1_multiplier;
int32_t input1_multiplier;
int input1_shift;
int32 input2_multiplier;
int32_t input2_multiplier;
int input2_shift;
// TODO(b/158622529): Union the following activation params.
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
// int64 activation params.
// int64_t activation params.
int64_t int64_activation_min;
int64_t int64_activation_max;
@ -790,22 +796,22 @@ struct ArithmeticParams {
};
struct ConcatenationParams {
int8 axis;
const int32* input_zeropoint;
int8_t axis;
const int32_t* input_zeropoint;
const float* input_scale;
uint16 inputs_count;
int32 output_zeropoint;
uint16_t inputs_count;
int32_t output_zeropoint;
float output_scale;
};
struct ComparisonParams {
// uint8 inference params.
// uint8_t inference params.
int left_shift;
int32 input1_offset;
int32 input1_multiplier;
int32_t input1_offset;
int32_t input1_multiplier;
int input1_shift;
int32 input2_offset;
int32 input2_multiplier;
int32_t input2_offset;
int32_t input2_multiplier;
int input2_shift;
// Shape dependent / common to inference types.
bool is_broadcast;
@ -815,81 +821,81 @@ struct ConvParams {
PaddingType padding_type;
PaddingValues padding_values;
// TODO(starka): This was just "stride", so check that width+height is OK.
int16 stride_width;
int16 stride_height;
int16 dilation_width_factor;
int16 dilation_height_factor;
// uint8 inference params.
int16_t stride_width;
int16_t stride_height;
int16_t dilation_width_factor;
int16_t dilation_height_factor;
// uint8_t inference params.
// TODO(b/65838351): Use smaller types if appropriate.
int32 input_offset;
int32 weights_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t weights_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
};
struct DepthToSpaceParams {
int32 block_size;
int32_t block_size;
};
struct DepthwiseParams {
PaddingType padding_type;
PaddingValues padding_values;
int16 stride_width;
int16 stride_height;
int16 dilation_width_factor;
int16 dilation_height_factor;
int16 depth_multiplier;
// uint8 inference params.
int16_t stride_width;
int16_t stride_height;
int16_t dilation_width_factor;
int16_t dilation_height_factor;
int16_t depth_multiplier;
// uint8_t inference params.
// TODO(b/65838351): Use smaller types if appropriate.
int32 input_offset;
int32 weights_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t weights_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
const int32* output_multiplier_per_channel;
const int32* output_shift_per_channel;
const int32_t* output_multiplier_per_channel;
const int32_t* output_shift_per_channel;
};
struct DequantizationParams {
double scale;
int32 zero_point;
int32_t zero_point;
};
struct PerChannelDequantizationParams {
const float* scale;
const int32* zero_point;
int32 quantized_dimension;
const int32_t* zero_point;
int32_t quantized_dimension;
};
struct FakeQuantParams {
MinMax minmax;
int32 num_bits;
int32_t num_bits;
};
struct FullyConnectedParams {
// uint8 inference params.
// uint8_t inference params.
// TODO(b/65838351): Use smaller types if appropriate.
int32 input_offset;
int32 weights_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t weights_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
@ -900,16 +906,16 @@ struct FullyConnectedParams {
};
struct GatherParams {
int16 axis;
int16_t axis;
};
struct L2NormalizationParams {
// uint8 inference params.
int32 input_zero_point;
// uint8_t inference params.
int32_t input_zero_point;
};
struct LocalResponseNormalizationParams {
int32 range;
int32_t range;
double bias;
double alpha;
double beta;
@ -937,50 +943,50 @@ struct HardSwishParams {
};
struct LogisticParams {
// uint8 inference params.
int32 input_zero_point;
int32 input_range_radius;
int32 input_multiplier;
// uint8_t inference params.
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
struct LstmCellParams {
int32 weights_zero_point;
int32 accum_multiplier;
int32_t weights_zero_point;
int32_t accum_multiplier;
int accum_shift;
int state_integer_bits;
};
struct MeanParams {
int8 axis_count;
int16 axis[4];
int8_t axis_count;
int16_t axis[4];
};
struct PackParams {
int8 axis;
const int32* input_zeropoint;
int8_t axis;
const int32_t* input_zeropoint;
const float* input_scale;
uint16 inputs_count;
int32 output_zeropoint;
uint16_t inputs_count;
int32_t output_zeropoint;
float output_scale;
};
struct PadParams {
int8 left_padding_count;
int32 left_padding[4];
int8 right_padding_count;
int32 right_padding[4];
int8_t left_padding_count;
int32_t left_padding[4];
int8_t right_padding_count;
int32_t right_padding[4];
ResizingCategory resizing_category;
};
struct PreluParams {
int32 input_offset;
int32 alpha_offset;
int32 output_offset;
int32 output_multiplier_1;
int32 output_shift_1;
int32 output_multiplier_2;
int32 output_shift_2;
int32_t input_offset;
int32_t alpha_offset;
int32_t output_offset;
int32_t output_multiplier_1;
int32_t output_shift_1;
int32_t output_multiplier_2;
int32_t output_shift_2;
};
struct PoolParams {
@ -991,17 +997,17 @@ struct PoolParams {
int stride_width;
int filter_height;
int filter_width;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
};
struct ReshapeParams {
int8 shape_count;
int32 shape[4];
int8_t shape_count;
int32_t shape[4];
};
struct ResizeBilinearParams {
@ -1018,22 +1024,22 @@ struct ResizeNearestNeighborParams {
};
struct SliceParams {
int8 begin_count;
int32 begin[4];
int8 size_count;
int32 size[4];
int8_t begin_count;
int32_t begin[4];
int8_t size_count;
int32_t size[4];
};
struct SoftmaxParams {
// beta is not really used (not a Tensorflow parameter) and not implemented
// for LogSoftmax.
double beta;
// uint8 inference params. Used even when beta defaults to 1.0.
int32 input_multiplier;
int32 input_left_shift;
// uint8_t inference params. Used even when beta defaults to 1.0.
int32_t input_multiplier;
int32_t input_left_shift;
// Reverse scaling is only used by LogSoftmax.
int32 reverse_scaling_divisor;
int32 reverse_scaling_right_shift;
int32_t reverse_scaling_divisor;
int32_t reverse_scaling_right_shift;
int diff_min;
int32_t zero_point;
float scale;
@ -1045,66 +1051,66 @@ struct SoftmaxParams {
};
struct SpaceToBatchParams {
// "Zero" padding for uint8 means padding with the output offset.
int32 output_offset;
// "Zero" padding for uint8_t means padding with the output offset.
int32_t output_offset;
};
struct SpaceToDepthParams {
int32 block_size;
int32_t block_size;
};
struct SplitParams {
// Graphs that split into, say, 2000 nodes are encountered. The indices in
// OperatorEdges are of type uint16.
uint16 num_split;
int16 axis;
// OperatorEdges are of type uint16_t.
uint16_t num_split;
int16_t axis;
};
struct SqueezeParams {
int8 squeeze_dims_count;
int32 squeeze_dims[4];
int8_t squeeze_dims_count;
int32_t squeeze_dims[4];
};
struct StridedSliceParams {
int8 start_indices_count;
int32 start_indices[5];
int8 stop_indices_count;
int32 stop_indices[5];
int8 strides_count;
int32 strides[5];
int8_t start_indices_count;
int32_t start_indices[5];
int8_t stop_indices_count;
int32_t stop_indices[5];
int8_t strides_count;
int32_t strides[5];
int16 begin_mask;
int16 ellipsis_mask;
int16 end_mask;
int16 new_axis_mask;
int16 shrink_axis_mask;
int16_t begin_mask;
int16_t ellipsis_mask;
int16_t end_mask;
int16_t new_axis_mask;
int16_t shrink_axis_mask;
};
struct TanhParams {
int32 input_zero_point;
int32 input_range_radius;
int32 input_multiplier;
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
struct TransposeParams {
int8 perm_count;
int32 perm[5];
int8_t perm_count;
int32_t perm[5];
};
struct UnpackParams {
uint16 num_split;
int16 axis;
uint16_t num_split;
int16_t axis;
};
struct LeakyReluParams {
float alpha;
int32 input_offset;
int32 output_offset;
int32 output_multiplier_alpha;
int32 output_shift_alpha;
int32 output_multiplier_identity;
int32 output_shift_identity;
int32_t input_offset;
int32_t output_offset;
int32_t output_multiplier_alpha;
int32_t output_shift_alpha;
int32_t output_multiplier_identity;
int32_t output_shift_identity;
};
template <typename P>
@ -1114,7 +1120,7 @@ inline void SetActivationParams(float min, float max, P* params) {
}
template <typename P>
inline void SetActivationParams(int32 min, int32 max, P* params) {
inline void SetActivationParams(int32_t min, int32_t max, P* params) {
params->quantized_activation_min = min;
params->quantized_activation_max = max;
}
@ -1126,7 +1132,7 @@ inline void SetActivationParams(int64_t min, int64_t max, P* params) {
}
template <typename P>
inline void GetActivationParams(const P& params, int32* min, int32* max) {
inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) {
*min = params.quantized_activation_min;
*max = params.quantized_activation_max;
}