Automated g4 rollback of changelist 201194552

PiperOrigin-RevId: 201241214
This commit is contained in:
A. Unique TensorFlower 2018-06-19 14:27:43 -07:00 committed by TensorFlower Gardener
parent 27c27c58e1
commit 48832eff28
11 changed files with 590 additions and 1000 deletions

View File

@ -251,11 +251,11 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
optimized_ops::Tanh(GetTensorData<uint8_t>(input), GetTensorShape(input),
optimized_ops::Tanh(GetTensorData<uint8_t>(input), GetTensorDims(input),
input->params.zero_point, data->input_range_radius,
data->input_multiplier, data->input_left_shift,
GetTensorData<uint8_t>(output),
GetTensorShape(output));
GetTensorDims(output));
return kTfLiteOk;
} break;
default:
@ -282,10 +282,10 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
}
case kTfLiteUInt8: {
optimized_ops::Logistic(
GetTensorData<uint8_t>(input), GetTensorShape(input),
GetTensorData<uint8_t>(input), GetTensorDims(input),
input->params.zero_point, data->input_range_radius,
data->input_multiplier, data->input_left_shift,
GetTensorData<uint8_t>(output), GetTensorShape(output));
GetTensorData<uint8_t>(output), GetTensorDims(output));
break;
}
default:
@ -341,26 +341,26 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
optimized_ops::Softmax(GetTensorData<uint8_t>(input),
GetTensorShape({batch_size, 1, 1, input_size}),
GetTensorDims({batch_size, 1, 1, input_size}),
data->input_multiplier, data->input_left_shift,
data->diff_min, GetTensorData<uint8_t>(output),
GetTensorShape({batch_size, 1, 1, input_size}));
GetTensorDims({batch_size, 1, 1, input_size}));
}
// Takes a 4D tensor and perform softmax along the forth dimension.
void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params) {
optimized_ops::Softmax(GetTensorData<float>(input), GetTensorShape(input),
optimized_ops::Softmax(GetTensorData<float>(input), GetTensorDims(input),
params->beta, GetTensorData<float>(output),
GetTensorShape(output));
GetTensorDims(output));
}
void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params, OpData* data) {
optimized_ops::Softmax(GetTensorData<uint8_t>(input), GetTensorShape(input),
optimized_ops::Softmax(GetTensorData<uint8_t>(input), GetTensorDims(input),
data->input_multiplier, data->input_left_shift,
data->diff_min, GetTensorData<uint8_t>(output),
GetTensorShape(output));
GetTensorDims(output));
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
@ -415,8 +415,8 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
switch (input->type) {
case kTfLiteFloat32:
optimized_ops::LogSoftmax(
GetTensorData<float>(input), GetTensorShape(input),
GetTensorData<float>(output), GetTensorShape(output));
GetTensorData<float>(input), GetTensorDims(input),
GetTensorData<float>(output), GetTensorDims(output));
return kTfLiteOk;
default:
context->ReportError(context, "Only float32 supported currently., got %d",

View File

@ -32,21 +32,19 @@ namespace tflite {
namespace {
void RunLogSoftmaxFloatReference(const uint8* input_data,
const RuntimeShape& shape_common,
int32 input_offset, const double input_scale,
int stride, float beta,
uint8* reference_output_data) {
const int ref_buffer_size = shape_common.FlatSize();
const Dims<4>& dims_common, int32 input_offset,
const double input_scale, int stride,
float beta, uint8* reference_output_data) {
const int ref_buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<float> reference_dequant_data(ref_buffer_size);
std::vector<float> reference_output_float_data(ref_buffer_size);
// Reference data generated via Dequant of input into float, and then applying
// float LogSoftmax.
reference_ops::Dequantize(
input_data, ToRuntimeDims(shape_common), input_offset, input_scale,
reference_dequant_data.data(), ToRuntimeDims(shape_common));
optimized_ops::LogSoftmax(reference_dequant_data.data(), shape_common,
reference_output_float_data.data(), shape_common);
reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale,
reference_dequant_data.data(), dims_common);
optimized_ops::LogSoftmax(reference_dequant_data.data(), dims_common,
reference_output_float_data.data(), dims_common);
// Work with quantized scaling for LogSoftmax, under which 255 represents 0,
// and -16 gets nudged up to 0.
for (int i = 0; i < ref_buffer_size; i++) {
@ -57,9 +55,9 @@ void RunLogSoftmaxFloatReference(const uint8* input_data,
}
void CheckOutputData(const uint8* test_output, const uint8* reference_output,
const RuntimeShape& shape_common,
const string& check_label, bool be_exacting) {
const int buffer_size = shape_common.FlatSize();
const Dims<4>& dims_common, const string& check_label,
bool be_exacting) {
const int buffer_size = RequiredBufferSizeForDims(dims_common);
// While calculating some metrics in floating point, we work with quantized
// scaling.
std::vector<int> diff(buffer_size);
@ -101,15 +99,15 @@ void CheckOutputData(const uint8* test_output, const uint8* reference_output,
// Runs the LogSoftmax and compares against the float reference implementation
// and the quantized reference implementation.
void RunOneLogSoftmaxTest(const uint8* input_data,
const RuntimeShape& shape_common, int32 input_offset,
const double input_scale, int stride, float beta) {
const int buffer_size = shape_common.FlatSize();
void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common,
int32 input_offset, const double input_scale,
int stride, float beta) {
const int buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<uint8> optimized_logsoftmax_output(buffer_size);
std::vector<uint8> reference_float_logsoftmax_output(buffer_size);
std::vector<uint8> reference_quant_logsoftmax_output(buffer_size);
RunLogSoftmaxFloatReference(input_data, shape_common, input_offset,
RunLogSoftmaxFloatReference(input_data, dims_common, input_offset,
input_scale, stride, beta,
reference_float_logsoftmax_output.data());
@ -128,23 +126,23 @@ void RunOneLogSoftmaxTest(const uint8* input_data,
const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
input_beta_left_shift);
optimized_ops::LogSoftmax(input_data, shape_common, input_beta_multiplier,
optimized_ops::LogSoftmax(input_data, dims_common, input_beta_multiplier,
input_beta_left_shift, reverse_scaling_divisor,
reverse_scaling_right_shift, diff_min,
optimized_logsoftmax_output.data(), shape_common);
optimized_logsoftmax_output.data(), dims_common);
reference_ops::LogSoftmax(
input_data, shape_common, input_beta_multiplier, input_beta_left_shift,
input_data, dims_common, input_beta_multiplier, input_beta_left_shift,
reverse_scaling_divisor, reverse_scaling_right_shift, diff_min,
reference_quant_logsoftmax_output.data(), shape_common);
reference_quant_logsoftmax_output.data(), dims_common);
CheckOutputData(optimized_logsoftmax_output.data(),
reference_float_logsoftmax_output.data(), shape_common,
reference_float_logsoftmax_output.data(), dims_common,
"Optimized vs float reference", false);
CheckOutputData(optimized_logsoftmax_output.data(),
reference_quant_logsoftmax_output.data(), shape_common,
reference_quant_logsoftmax_output.data(), dims_common,
"Optimized vs quant reference", true);
CheckOutputData(reference_quant_logsoftmax_output.data(),
reference_float_logsoftmax_output.data(), shape_common,
reference_float_logsoftmax_output.data(), dims_common,
"Quant reference vs float reference", false);
}
@ -167,13 +165,13 @@ bool TryOneUniformLogSoftmax() {
const int32 input_offset = UniformRandomInt(-256, 0);
static constexpr float beta = 1.0f;
auto shape_common =
RuntimeShape({batch, input_height, input_width, input_depth});
const int buffer_size = shape_common.FlatSize();
Dims<4> dims_common =
MakeDimsForInference(input_depth, input_width, input_height, batch);
const int buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<uint8> input_data(buffer_size);
FillRandom(&input_data);
RunOneLogSoftmaxTest(input_data.data(), shape_common, input_offset,
RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset,
input_scale, stride, beta);
return true;
}
@ -205,14 +203,14 @@ bool TryOneSkyscraperLogSoftmax(bool small_depth) {
const int middle_min = UniformRandomInt(0, 255);
const int sides_max = UniformRandomInt(0, middle_min);
auto shape_common =
RuntimeShape({batch, input_height, input_width, input_depth});
const int buffer_size = shape_common.FlatSize();
Dims<4> dims_common =
MakeDimsForInference(input_depth, input_width, input_height, batch);
const int buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<uint8> input_data(buffer_size);
FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min,
sides_max);
RunOneLogSoftmaxTest(input_data.data(), shape_common, input_offset,
RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset,
input_scale, stride, beta);
return true;
}

View File

@ -26,10 +26,6 @@ limitations under the License.
namespace tflite {
namespace optimized_ops {
// Unoptimized reference ops:
using reference_ops::Relu1;
using reference_ops::Relu6;
inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
return RuntimeShape(
{dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
@ -38,285 +34,15 @@ inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
template <FusedActivationFunctionType Ac>
void L2Normalization(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
L2Normalization<Ac>(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
return L2Normalization<Ac>(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, uint8* output_data,
const Dims<4>& output_dims) {
L2Normalization(input_data, DimsToShape(input_dims), input_zero_point,
output_data, DimsToShape(output_dims));
}
inline void Relu(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Relu(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight,
float output_activation_min,
float output_activation_max, float* output_data,
const Dims<4>& output_dims) {
AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, float* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, filter_width, filter_height,
output_activation_min, output_activation_max, output_data,
DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight,
float output_activation_min, float output_activation_max,
float* output_data, const Dims<4>& output_dims) {
MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int kwidth, int kheight, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, filter_width, filter_height,
output_activation_min, output_activation_max, output_data,
DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float output_activation_min, float output_activation_max,
float* output_data, const Dims<4>& output_dims) {
L2Pool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, filter_width, filter_height,
output_activation_min, output_activation_max, output_data,
DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
L2Pool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
L2Pool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void Softmax(const float* input_data, const Dims<4>& input_dims,
float beta, float* output_data,
const Dims<4>& output_dims) {
Softmax(input_data, DimsToShape(input_dims), beta, output_data,
DimsToShape(output_dims));
}
inline void Softmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_beta_multiplier, int32 input_beta_left_shift,
int diff_min, uint8* output_data,
const Dims<4>& output_dims) {
Softmax(input_data, DimsToShape(input_dims), input_beta_multiplier,
input_beta_left_shift, diff_min, output_data,
DimsToShape(output_dims));
}
inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
LogSoftmax(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_multiplier, int32 input_left_shift,
int32 reverse_scaling_divisor,
int32 reverse_scaling_right_shift, int diff_min,
uint8* output_data, const Dims<4>& output_dims) {
LogSoftmax(input_data, DimsToShape(input_dims), input_multiplier,
input_left_shift, reverse_scaling_divisor,
reverse_scaling_right_shift, diff_min, output_data,
DimsToShape(output_dims));
}
inline void Logistic(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Logistic(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Logistic(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const Dims<4>& output_dims) {
Logistic(input_data, DimsToShape(input_dims), input_zero_point,
input_range_radius, input_multiplier, input_left_shift, output_data,
DimsToShape(output_dims));
}
inline void Logistic(const int16* input_data, const Dims<4>& input_dims,
int16* output_data, const Dims<4>& output_dims) {
Logistic(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Tanh(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Tanh(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Tanh(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const Dims<4>& output_dims) {
Tanh(input_data, DimsToShape(input_dims), input_zero_point,
input_range_radius, input_multiplier, input_left_shift, output_data,
DimsToShape(output_dims));
}
inline void Tanh(const int16* input_data, const Dims<4>& input_dims,
int input_left_shift, int16* output_data,
const Dims<4>& output_dims) {
Tanh(input_data, DimsToShape(input_dims), input_left_shift, output_data,
DimsToShape(output_dims));
return L2Normalization(input_data, DimsToShape(input_dims), input_zero_point,
output_data, DimsToShape(output_dims));
}
} // namespace optimized_ops

View File

@ -85,12 +85,6 @@ using VectorMap = typename std::conditional<
Eigen::Dynamic, 1>>,
Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, 1>>>::type;
template <typename Scalar>
VectorMap<Scalar> MapAsVector(Scalar* data, const RuntimeShape& shape) {
const int size = shape.FlatSize();
return VectorMap<Scalar>(data, size, 1);
}
template <typename Scalar, int N>
VectorMap<Scalar> MapAsVector(Scalar* data, const Dims<N>& dims) {
const int size = FlatSize(dims);
@ -107,23 +101,6 @@ using MatrixMap = typename std::conditional<
Eigen::Dynamic, Eigen::Dynamic>>,
Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
template <typename Scalar>
MatrixMap<Scalar> MapAsMatrixWithLastDimAsRows(Scalar* data,
const RuntimeShape& shape) {
const int dims_count = shape.DimensionsCount();
const int rows = shape.Dims(dims_count - 1);
const int cols = FlatSizeSkipDim(shape, dims_count - 1);
return MatrixMap<Scalar>(data, rows, cols);
}
template <typename Scalar>
MatrixMap<Scalar> MapAsMatrixWithFirstDimAsCols(Scalar* data,
const RuntimeShape& shape) {
const int cols = shape.Dims(0);
const int rows = FlatSizeSkipDim(shape, 0);
return MatrixMap<Scalar>(data, rows, cols);
}
template <typename Scalar, int N>
MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data,
const Dims<N>& dims) {
@ -2366,12 +2343,12 @@ void GlobalBatchNormalization(const float* input_data,
}
}
inline void Relu(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
inline void Relu(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Relu (not fused)");
const auto input = MapAsVector(input_data, input_shape);
auto output = MapAsVector(output_data, output_shape);
const auto input = MapAsVector(input_data, input_dims);
auto output = MapAsVector(output_data, output_dims);
output = input.cwiseMax(0.0f);
}
@ -3752,25 +3729,23 @@ inline int NodeOffset(int b, int h, int w, int height, int width) {
return (b * height + h) * width + w;
}
inline void AveragePool(const float* input_data,
const RuntimeShape& input_shape, int stride_width,
int stride_height, int pad_width, int pad_height,
int kwidth, int kheight, float output_activation_min,
inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight,
float output_activation_min,
float output_activation_max, float* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("AveragePool");
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
// TODO(benoitjacob) make this a proper reference impl without Eigen!
const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
// TODO(benoitjacob) get rid of the dynamic memory allocation here!
Eigen::VectorXf out_count(out_mat.cols());
out_count.setZero();
@ -3808,9 +3783,9 @@ inline void AveragePool(const float* input_data,
for (int y = 0; y < output_height; ++y) {
for (int x = 0; x < output_width; ++x) {
for (int c = 0; c < depth; ++c) {
output_data[Offset(output_shape, b, y, x, c)] =
output_data[Offset(output_dims, c, x, y, b)] =
ActivationFunctionWithMinMax(
output_data[Offset(output_shape, b, y, x, c)],
output_data[Offset(output_dims, c, x, y, b)],
output_activation_min, output_activation_max);
}
}
@ -3818,23 +3793,44 @@ inline void AveragePool(const float* input_data,
}
}
inline void AveragePool(const uint8* input_data,
const RuntimeShape& input_shape, int stride_width,
int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, float* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("AveragePool/8bit");
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -3854,12 +3850,11 @@ inline void AveragePool(const uint8* input_data,
uint16 acc[kAccBufferMaxSize];
memset(acc, 0, depth * sizeof(acc[0]));
const uint8* input_ptr =
input_data +
depth * (in_x_origin +
input_width * (in_y_origin + input_height * batch));
input_data + input_dims.strides[1] * in_x_origin +
input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch;
for (int fy = filter_y_start; fy < filter_y_end; fy++) {
const uint8* input_row_ptr =
input_ptr + depth * (fy * input_width + filter_x_start);
const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] +
filter_x_start * input_dims.strides[1];
for (int fx = filter_x_start; fx < filter_x_end; fx++) {
int channel = 0;
#ifdef USE_NEON
@ -3890,7 +3885,7 @@ inline void AveragePool(const uint8* input_data,
}
}
uint8* output_ptr =
output_data + Offset(output_shape, batch, out_y, out_x, 0);
output_data + Offset(output_dims, 0, out_x, out_y, batch);
int channel = 0;
#ifdef USE_NEON
#define AVGPOOL_DIVIDING_BY(FILTER_COUNT) \
@ -3931,23 +3926,54 @@ inline void AveragePool(const uint8* input_data,
}
}
inline void MaxPool(const float* input_data, const RuntimeShape& input_shape,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight,
float output_activation_min, float output_activation_max,
float* output_data, const RuntimeShape& output_shape) {
float* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("MaxPool");
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
// Prefill the output to minimum representable float value
out_mat.setConstant(std::numeric_limits<float>::lowest());
for (int b = 0; b < batches; ++b) {
@ -3980,9 +4006,9 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape,
for (int y = 0; y < output_height; ++y) {
for (int x = 0; x < output_width; ++x) {
for (int c = 0; c < depth; ++c) {
output_data[Offset(output_shape, b, y, x, c)] =
output_data[Offset(output_dims, c, x, y, b)] =
ActivationFunctionWithMinMax(
output_data[Offset(output_shape, b, y, x, c)],
output_data[Offset(output_dims, c, x, y, b)],
output_activation_min, output_activation_max);
}
}
@ -3990,21 +4016,41 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int kwidth, int kheight, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("MaxPool/8bit");
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -4022,12 +4068,11 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
uint8 acc[kAccBufferMaxSize];
memset(acc, 0, depth * sizeof(acc[0]));
const uint8* input_ptr =
input_data +
depth * (in_x_origin +
input_width * (in_y_origin + input_height * batch));
input_data + input_dims.strides[1] * in_x_origin +
input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch;
for (int fy = filter_y_start; fy < filter_y_end; fy++) {
const uint8* input_row_ptr =
input_ptr + depth * (fy * input_width + filter_x_start);
const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] +
filter_x_start * input_dims.strides[1];
for (int fx = filter_x_start; fx < filter_x_end; fx++) {
int channel = 0;
#ifdef USE_NEON
@ -4053,7 +4098,7 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
}
}
uint8* output_ptr =
output_data + Offset(output_shape, batch, out_y, out_x, 0);
output_data + Offset(output_dims, 0, out_x, out_y, batch);
int channel = 0;
#ifdef USE_NEON
for (; channel <= depth - 16; channel += 16) {
@ -4080,23 +4125,53 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void L2Pool(const float* input_data, const RuntimeShape& input_shape,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float output_activation_min, float output_activation_max,
float* output_data, const RuntimeShape& output_shape) {
float* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("L2Pool");
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
// Actually carry out L2 Pool. Code is written in forward mode: we go through
// the input values once, and write to all the pooled regions that it maps to.
const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
Eigen::VectorXf in_square(in_mat.rows());
Eigen::VectorXf out_count(out_mat.cols());
out_count.setZero();
@ -4138,6 +4213,28 @@ inline void L2Pool(const float* input_data, const RuntimeShape& input_shape,
(out_mat.array().rowwise() * out_count.transpose().array()).cwiseSqrt();
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
L2Pool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
L2Pool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void LocalResponseNormalization(const float* input_data,
const Dims<4>& input_dims, int range,
float bias, float alpha, float beta,
@ -4183,14 +4280,14 @@ inline void LocalResponseNormalization(const float* input_data,
}
}
inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
inline void Softmax(const float* input_data, const Dims<4>& input_dims,
float beta, float* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Softmax");
MatchingFlatSize(input_shape, output_shape);
MatchingFlatSize(input_dims, output_dims);
const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
// Compute the exponential first, removing the max coefficient for numerical
// stability.
out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta;
@ -4202,10 +4299,10 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
out_mat.array().rowwise() *= scale;
}
inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
inline void Softmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_beta_multiplier, int32 input_beta_left_shift,
int diff_min, uint8* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
// -32 before multiplying by input_beta_multiplier, and therefore as large as
@ -4219,11 +4316,8 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
gemmlowp::ScopedProfilingLabel label("Softmax/8bit");
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int b = 0; b < outer_size; ++b) {
const uint8* input_data_ptr = input_data + b * depth;
@ -4413,14 +4507,11 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
// TODO(myenik): This is the same as the reference implementation, not actually
// optimized yet.
inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("LogSoftmax");
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int i = 0; i < outer_size; ++i) {
const float* block_input_data = input_data + i * depth;
@ -4561,11 +4652,11 @@ log_x_for_x_greater_than_or_equal_to_1(
}
// Currently just a copy of the reference code.
inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_multiplier, int32 input_left_shift,
int32 reverse_scaling_divisor,
int32 reverse_scaling_right_shift, int diff_min,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("LogSoftmax/Uint8");
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
@ -4580,11 +4671,8 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int i = 0; i < outer_size; ++i) {
const uint8* block_input_data = input_data + i * depth;
@ -4648,21 +4736,21 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void Logistic(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
inline void Logistic(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Logistic");
auto input_map = MapAsVector(input_data, input_shape);
auto output_map = MapAsVector(output_data, output_shape);
auto input_map = MapAsVector(input_data, input_dims);
auto output_map = MapAsVector(output_data, output_dims);
output_map.array() =
input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op<float>());
}
inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
inline void Logistic(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Logistic/Uint8");
const int size = MatchingFlatSize(input_shape, output_shape);
const int size = MatchingFlatSize(input_dims, output_dims);
int c = 0;
#ifdef USE_NEON
@ -4794,10 +4882,10 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
int16* output_data, const RuntimeShape& output_shape) {
inline void Logistic(const int16* input_data, const Dims<4>& input_dims,
int16* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Logistic/Int16");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
const int flat_size = MatchingFlatSize(output_dims, input_dims);
for (int i = 0; i < flat_size; i++) {
}
@ -4854,21 +4942,21 @@ inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
}
}
inline void Tanh(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
inline void Tanh(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Tanh");
auto input_map = MapAsVector(input_data, input_shape);
auto output_map = MapAsVector(output_data, output_shape);
auto input_map = MapAsVector(input_data, input_dims);
auto output_map = MapAsVector(output_data, output_dims);
output_map.array() = input_map.array().tanh();
}
inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
inline void Tanh(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
// Note that this is almost the exact same code as in Logistic().
gemmlowp::ScopedProfilingLabel label("Tanh");
const int size = MatchingFlatSize(input_shape, output_shape);
const int size = MatchingFlatSize(input_dims, output_dims);
int c = 0;
int32_t output_zero_point = 128;
@ -5009,16 +5097,16 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
inline void Tanh(const int16* input_data, const Dims<4>& input_dims,
int input_left_shift, int16* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Tanh/Int16");
// Support for shifts is limited until we have a parameterized version of
// SaturatingRoundingMultiplyByPOT().
TFLITE_DCHECK_GE(input_left_shift, 0);
TFLITE_DCHECK_LE(input_left_shift, 1);
const int flat_size = MatchingFlatSize(input_shape, output_shape);
const int flat_size = MatchingFlatSize(output_dims, input_dims);
int c = 0;
const int16* input_data_ptr = input_data;

View File

@ -34,297 +34,15 @@ inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
template <FusedActivationFunctionType Ac>
void L2Normalization(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
L2Normalization<Ac>(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
return L2Normalization<Ac>(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, uint8* output_data,
const Dims<4>& output_dims) {
L2Normalization(input_data, DimsToShape(input_dims), input_zero_point,
output_data, DimsToShape(output_dims));
}
inline void Relu(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Relu(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Relu1(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Relu1(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Relu6(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Relu6(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight,
float output_activation_min,
float output_activation_max, float* output_data,
const Dims<4>& output_dims) {
AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, float* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, filter_width, filter_height,
output_activation_min, output_activation_max, output_data,
DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int kwidth, int kheight,
float output_activation_min, float output_activation_max,
float* output_data, const Dims<4>& output_dims) {
MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int kwidth, int kheight, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, kwidth, kheight, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, filter_width, filter_height,
output_activation_min, output_activation_max, output_data,
DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float output_activation_min, float output_activation_max,
float* output_data, const Dims<4>& output_dims) {
L2Pool(input_data, DimsToShape(input_dims), stride_width, stride_height,
pad_width, pad_height, filter_width, filter_height,
output_activation_min, output_activation_max, output_data,
DimsToShape(output_dims));
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
L2Pool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
L2Pool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void Softmax(const float* input_data, const Dims<4>& input_dims,
float beta, float* output_data,
const Dims<4>& output_dims) {
Softmax(input_data, DimsToShape(input_dims), beta, output_data,
DimsToShape(output_dims));
}
inline void Softmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_beta_multiplier, int32 input_beta_left_shift,
int diff_min, uint8* output_data,
const Dims<4>& output_dims) {
Softmax(input_data, DimsToShape(input_dims), input_beta_multiplier,
input_beta_left_shift, diff_min, output_data,
DimsToShape(output_dims));
}
inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
LogSoftmax(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_multiplier, int32 input_left_shift,
int32 reverse_scaling_divisor,
int32 reverse_scaling_right_shift, int diff_min,
uint8* output_data, const Dims<4>& output_dims) {
LogSoftmax(input_data, DimsToShape(input_dims), input_multiplier,
input_left_shift, reverse_scaling_divisor,
reverse_scaling_right_shift, diff_min, output_data,
DimsToShape(output_dims));
}
inline void Logistic(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Logistic(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Logistic(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const Dims<4>& output_dims) {
Logistic(input_data, DimsToShape(input_dims), input_zero_point,
input_range_radius, input_multiplier, input_left_shift, output_data,
DimsToShape(output_dims));
}
inline void Logistic(const int16* input_data, const Dims<4>& input_dims,
int16* output_data, const Dims<4>& output_dims) {
Logistic(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Tanh(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
Tanh(input_data, DimsToShape(input_dims), output_data,
DimsToShape(output_dims));
}
inline void Tanh(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const Dims<4>& output_dims) {
Tanh(input_data, DimsToShape(input_dims), input_zero_point,
input_range_radius, input_multiplier, input_left_shift, output_data,
DimsToShape(output_dims));
}
inline void Tanh(const int16* input_data, const Dims<4>& input_dims,
int input_left_shift, int16* output_data,
const Dims<4>& output_dims) {
Tanh(input_data, DimsToShape(input_dims), input_left_shift, output_data,
DimsToShape(output_dims));
return L2Normalization(input_data, DimsToShape(input_dims), input_zero_point,
output_data, DimsToShape(output_dims));
}
} // namespace reference_ops

View File

@ -914,9 +914,9 @@ void GlobalBatchNormalization(const float* input_data,
}
}
inline void Relu(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
inline void Relu(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(input_dims, output_dims);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0;
@ -925,10 +925,9 @@ inline void Relu(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void Relu1(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
gemmlowp::ScopedProfilingLabel label("Relu1 (not fused)");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
inline void Relu1(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(input_dims, output_dims);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 1;
@ -938,10 +937,9 @@ inline void Relu1(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void Relu6(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
gemmlowp::ScopedProfilingLabel label("Relu6 (not fused)");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
inline void Relu6(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(input_dims, output_dims);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6;
@ -2247,21 +2245,18 @@ inline int NodeOffset(int b, int h, int w, int height, int width) {
return (b * height + h) * width + w;
}
inline void AveragePool(const float* input_data,
const RuntimeShape& input_shape, int stride_width,
int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height,
inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float output_activation_min,
float output_activation_max, float* output_data,
const RuntimeShape& output_shape) {
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const Dims<4>& output_dims) {
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -2285,12 +2280,12 @@ inline void AveragePool(const float* input_data,
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
total +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
input_data[Offset(input_dims, channel, in_x, in_y, batch)];
filter_count++;
}
}
const float average = total / filter_count;
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
output_data[Offset(output_dims, channel, out_x, out_y, batch)] =
ActivationFunctionWithMinMax(average, output_activation_min,
output_activation_max);
}
@ -2299,22 +2294,42 @@ inline void AveragePool(const float* input_data,
}
}
inline void AveragePool(const uint8* input_data,
const RuntimeShape& input_shape, int stride_width,
int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, float* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -2337,15 +2352,14 @@ inline void AveragePool(const uint8* input_data,
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
acc += input_data[Offset(input_dims, channel, in_x, in_y, batch)];
filter_count++;
}
}
acc = (acc + filter_count / 2) / filter_count;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
output_data[Offset(output_dims, channel, out_x, out_y, batch)] =
static_cast<uint8>(acc);
}
}
@ -2353,19 +2367,50 @@ inline void AveragePool(const uint8* input_data,
}
}
inline void L2Pool(const float* input_data, const RuntimeShape& input_shape,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width,
int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float output_activation_min, float output_activation_max,
float* output_data, const RuntimeShape& output_shape) {
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
float* output_data, const Dims<4>& output_dims) {
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -2389,13 +2434,13 @@ inline void L2Pool(const float* input_data, const RuntimeShape& input_shape,
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
const float val =
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
input_data[Offset(input_dims, channel, in_x, in_y, batch)];
sum_squares += val * val;
filter_count++;
}
}
const float l2pool_result = std::sqrt(sum_squares / filter_count);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
output_data[Offset(output_dims, channel, out_x, out_y, batch)] =
ActivationFunctionWithMinMax(l2pool_result, output_activation_min,
output_activation_max);
}
@ -2404,19 +2449,40 @@ inline void L2Pool(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void MaxPool(const float* input_data, const RuntimeShape& input_shape,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
L2Pool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
L2Pool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
float output_activation_min, float output_activation_max,
float* output_data, const RuntimeShape& output_shape) {
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
float* output_data, const Dims<4>& output_dims) {
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -2440,10 +2506,10 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape,
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
input_data[Offset(input_dims, channel, in_x, in_y, batch)]);
}
}
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
output_data[Offset(output_dims, channel, out_x, out_y, batch)] =
ActivationFunctionWithMinMax(max, output_activation_min,
output_activation_max);
}
@ -2452,22 +2518,42 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, float* output_data,
const Dims<4>& output_dims) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
float* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_data, output_dims);
}
inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width,
int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_GE(output_activation_min, 0);
TFLITE_DCHECK_LE(output_activation_max, 255);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
const int input_height = ArraySize(input_dims, 2);
const int input_width = ArraySize(input_dims, 1);
const int output_height = ArraySize(output_dims, 2);
const int output_width = ArraySize(output_dims, 1);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
@ -2491,12 +2577,12 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
input_data[Offset(input_dims, channel, in_x, in_y, batch)]);
}
}
max = std::max<uint8>(max, output_activation_min);
max = std::min<uint8>(max, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
output_data[Offset(output_dims, channel, out_x, out_y, batch)] =
static_cast<uint8>(max);
}
}
@ -2504,6 +2590,38 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape,
}
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims,
int stride_width, int stride_height, int pad_width, int pad_height,
int filter_width, int filter_height, int32 output_activation_min,
int32 output_activation_max, uint8* output_data,
const Dims<4>& output_dims) {
static_assert(Ac == FusedActivationFunctionType::kNone ||
Ac == FusedActivationFunctionType::kRelu ||
Ac == FusedActivationFunctionType::kRelu6 ||
Ac == FusedActivationFunctionType::kRelu1,
"");
if (Ac == FusedActivationFunctionType::kNone) {
TFLITE_DCHECK_EQ(output_activation_min, 0);
TFLITE_DCHECK_EQ(output_activation_max, 255);
}
MaxPool(input_data, input_dims, stride_width, stride_height, pad_width,
pad_height, filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
// legacy, for compatibility with old checked-in code
template <FusedActivationFunctionType Ac>
void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride,
int pad_width, int pad_height, int filter_width, int filter_height,
int32 output_activation_min, int32 output_activation_max,
uint8* output_data, const Dims<4>& output_dims) {
MaxPool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
filter_width, filter_height, output_activation_min,
output_activation_max, output_data, output_dims);
}
inline void LocalResponseNormalization(const float* input_data,
const Dims<4>& input_dims, int range,
float bias, float alpha, float beta,
@ -2527,14 +2645,11 @@ inline void LocalResponseNormalization(const float* input_data,
}
}
inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
inline void Softmax(const float* input_data, const Dims<4>& input_dims,
float beta, float* output_data,
const RuntimeShape& output_shape) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const Dims<4>& output_dims) {
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int i = 0; i < outer_size; ++i) {
// Find max element value which we'll use to ensure numerical stability
@ -2559,10 +2674,10 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
inline void Softmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_beta_multiplier, int32 input_beta_left_shift,
int diff_min, uint8* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
// -32 before multiplying by input_beta_multiplier, and therefore as large as
@ -2575,11 +2690,8 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int i = 0; i < outer_size; ++i) {
uint8 max_in_row = 0;
@ -2640,13 +2752,10 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int i = 0; i < outer_size; ++i) {
// Find max element value which we'll use to ensure numerical stability
@ -2786,11 +2895,11 @@ log_x_for_x_greater_than_or_equal_to_1(
input_val);
}
inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims,
int32 input_multiplier, int32 input_left_shift,
int32 reverse_scaling_divisor,
int32 reverse_scaling_right_shift, int diff_min,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
// -32 before multiplying by input_beta_multiplier, and therefore as large as
@ -2804,11 +2913,8 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims);
const int depth = MatchingArraySize(input_dims, 0, output_dims, 0);
for (int i = 0; i < outer_size; ++i) {
uint8 max_in_row = 0;
@ -2872,9 +2978,9 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void Logistic(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
inline void Logistic(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(output_dims, input_dims);
for (int i = 0; i < flat_size; i++) {
float val = input_data[i];
@ -2883,11 +2989,11 @@ inline void Logistic(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
inline void Logistic(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const RuntimeShape& output_shape) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
uint8* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(output_dims, input_dims);
for (int i = 0; i < flat_size; i++) {
const uint8 input_val_u8 = input_data[i];
@ -2921,9 +3027,9 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
int16* output_data, const RuntimeShape& output_shape) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
inline void Logistic(const int16* input_data, const Dims<4>& input_dims,
int16* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(output_dims, input_dims);
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
@ -2939,9 +3045,9 @@ inline void Logistic(const int16* input_data, const RuntimeShape& input_shape,
}
}
inline void Tanh(const float* input_data, const RuntimeShape& input_shape,
float* output_data, const RuntimeShape& output_shape) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
inline void Tanh(const float* input_data, const Dims<4>& input_dims,
float* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(output_dims, input_dims);
for (int i = 0; i < flat_size; i++) {
float val = input_data[i];
@ -2950,12 +3056,12 @@ inline void Tanh(const float* input_data, const RuntimeShape& input_shape,
}
}
inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
inline void Tanh(const uint8* input_data, const Dims<4>& input_dims,
int32 input_zero_point, int32 input_range_radius,
int32 input_multiplier, int input_left_shift,
uint8* output_data, const RuntimeShape& output_shape) {
uint8* output_data, const Dims<4>& output_dims) {
const int32 output_zero_point = 128;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
const int flat_size = MatchingFlatSize(output_dims, input_dims);
for (int i = 0; i < flat_size; i++) {
const uint8 input_val_u8 = input_data[i];
@ -2990,15 +3096,15 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape,
}
}
inline void Tanh(const int16* input_data, const RuntimeShape& input_shape,
inline void Tanh(const int16* input_data, const Dims<4>& input_dims,
int input_left_shift, int16* output_data,
const RuntimeShape& output_shape) {
const Dims<4>& output_dims) {
// Support for shifts is limited until we have a parameterized version of
// SaturatingRoundingMultiplyByPOT().
TFLITE_DCHECK_GE(input_left_shift, 0);
TFLITE_DCHECK_LE(input_left_shift, 1);
const int flat_size = MatchingFlatSize(input_shape, output_shape);
const int flat_size = MatchingFlatSize(output_dims, input_dims);
// F0 uses 0 integer bits, range [-1, 1].
// This is the return type of math functions such as tanh, logistic,

View File

@ -32,21 +32,19 @@ namespace tflite {
namespace {
void RunSoftmaxFloatReference(const uint8* input_data,
const RuntimeShape& shape_common,
int32 input_offset, const double input_scale,
int stride, float beta,
const Dims<4>& dims_common, int32 input_offset,
const double input_scale, int stride, float beta,
uint8* reference_output_data) {
const int ref_buffer_size = shape_common.FlatSize();
const int ref_buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<float> reference_dequant_data(ref_buffer_size);
std::vector<float> reference_output_float_data(ref_buffer_size);
// Reference data generated via Dequant of input into float, and then applying
// float Softmax.
reference_ops::Dequantize(
input_data, ToRuntimeDims(shape_common), input_offset, input_scale,
reference_dequant_data.data(), ToRuntimeDims(shape_common));
optimized_ops::Softmax(reference_dequant_data.data(), shape_common, beta,
reference_output_float_data.data(), shape_common);
reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale,
reference_dequant_data.data(), dims_common);
optimized_ops::Softmax(reference_dequant_data.data(), dims_common, beta,
reference_output_float_data.data(), dims_common);
// Work with quantized scaling for Softmax, under which 256 represents 1, but
// we limit this to 255.
for (int i = 0; i < ref_buffer_size; i++) {
@ -57,9 +55,9 @@ void RunSoftmaxFloatReference(const uint8* input_data,
}
void CheckOutputData(const uint8* test_output, const uint8* reference_output,
const RuntimeShape& shape_common,
const string& check_label, bool be_exacting) {
const int buffer_size = shape_common.FlatSize();
const Dims<4>& dims_common, const string& check_label,
bool be_exacting) {
const int buffer_size = RequiredBufferSizeForDims(dims_common);
// While calculating some metrics in floating point, we work with quantized
// scaling.
std::vector<int> diff(buffer_size);
@ -93,15 +91,15 @@ void CheckOutputData(const uint8* test_output, const uint8* reference_output,
// Runs the Softmax and compares against the float reference implementation and
// the quantized reference implementation.
void RunOneSoftmaxTest(const uint8* input_data,
const RuntimeShape& shape_common, int32 input_offset,
const double input_scale, int stride, float beta) {
const int buffer_size = shape_common.FlatSize();
void RunOneSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common,
int32 input_offset, const double input_scale, int stride,
float beta) {
const int buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<uint8> optimized_softmax_output(buffer_size);
std::vector<uint8> reference_float_softmax_output(buffer_size);
std::vector<uint8> reference_quant_softmax_output(buffer_size);
RunSoftmaxFloatReference(input_data, shape_common, input_offset, input_scale,
RunSoftmaxFloatReference(input_data, dims_common, input_offset, input_scale,
stride, beta, reference_float_softmax_output.data());
int32 input_beta_multiplier;
@ -115,21 +113,21 @@ void RunOneSoftmaxTest(const uint8* input_data,
const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
input_beta_left_shift);
optimized_ops::Softmax(input_data, shape_common, input_beta_multiplier,
optimized_ops::Softmax(input_data, dims_common, input_beta_multiplier,
input_beta_left_shift, diff_min,
optimized_softmax_output.data(), shape_common);
reference_ops::Softmax(input_data, shape_common, input_beta_multiplier,
optimized_softmax_output.data(), dims_common);
reference_ops::Softmax(input_data, dims_common, input_beta_multiplier,
input_beta_left_shift, diff_min,
reference_quant_softmax_output.data(), shape_common);
reference_quant_softmax_output.data(), dims_common);
CheckOutputData(optimized_softmax_output.data(),
reference_float_softmax_output.data(), shape_common,
reference_float_softmax_output.data(), dims_common,
"Optimized vs float reference", false);
CheckOutputData(optimized_softmax_output.data(),
reference_quant_softmax_output.data(), shape_common,
reference_quant_softmax_output.data(), dims_common,
"Optimized vs quant reference", true);
CheckOutputData(reference_quant_softmax_output.data(),
reference_float_softmax_output.data(), shape_common,
reference_float_softmax_output.data(), dims_common,
"Quant reference vs float reference", false);
}
@ -152,13 +150,13 @@ bool TryOneUniformSoftmax() {
const int32 input_offset = UniformRandomInt(-256, 0);
const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10);
auto shape_common =
RuntimeShape({batch, input_height, input_width, input_depth});
const int buffer_size = shape_common.FlatSize();
Dims<4> dims_common =
MakeDimsForInference(input_depth, input_width, input_height, batch);
const int buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<uint8> input_data(buffer_size);
FillRandom(&input_data);
RunOneSoftmaxTest(input_data.data(), shape_common, input_offset, input_scale,
RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale,
stride, beta);
return true;
}
@ -190,14 +188,14 @@ bool TryOneSkyscraperSoftmax(bool small_depth) {
const int middle_min = UniformRandomInt(0, 255);
const int sides_max = UniformRandomInt(0, middle_min);
auto shape_common =
RuntimeShape({batch, input_height, input_width, input_depth});
const int buffer_size = shape_common.FlatSize();
Dims<4> dims_common =
MakeDimsForInference(input_depth, input_width, input_height, batch);
const int buffer_size = RequiredBufferSizeForDims(dims_common);
std::vector<uint8> input_data(buffer_size);
FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min,
sides_max);
RunOneSoftmaxTest(input_data.data(), shape_common, input_offset, input_scale,
RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale,
stride, beta);
return true;
}

View File

@ -294,50 +294,6 @@ inline int RequiredBufferSizeForDims(const Dims<4>& dims) {
return FlatSize(dims);
}
// Flat size calculation, checking that dimensions match with one or more other
// arrays.
inline int MatchingFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0) {
const int dims_count = shape.DimensionsCount();
for (int i = 0; i < dims_count; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
}
return shape.FlatSize();
}
inline int MatchingFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1) {
const int dims_count = shape.DimensionsCount();
for (int i = 0; i < dims_count; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
}
return MatchingFlatSize(shape, check_shape_1);
}
inline int MatchingFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1,
const RuntimeShape& check_shape_2) {
const int dims_count = shape.DimensionsCount();
for (int i = 0; i < dims_count; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
}
return MatchingFlatSize(shape, check_shape_1, check_shape_2);
}
inline int MatchingFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1,
const RuntimeShape& check_shape_2,
const RuntimeShape& check_shape_3) {
const int dims_count = shape.DimensionsCount();
for (int i = 0; i < dims_count; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i));
}
return MatchingFlatSize(shape, check_shape_1, check_shape_2, check_shape_3);
}
// Flat size calculation, checking that dimensions match with one or more other
// arrays.
template <int N>
@ -364,7 +320,7 @@ inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0,
for (int i = 0; i < N; ++i) {
TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
}
return MatchingFlatSize(dims, check_dims_1, check_dims_2);
return FlatSize(dims, check_dims_1, check_dims_2);
}
template <int N>
@ -375,7 +331,7 @@ inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0,
for (int i = 0; i < N; ++i) {
TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i));
}
return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
return FlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
}
// Data is required to be contiguous, and so many operators can use either the

View File

@ -90,9 +90,10 @@ TEST(LogSoftmaxOpTest, CompareWithTFmini) {
m.Invoke();
std::unique_ptr<float[]> output_buffer(new float[input_size * batch_size]);
auto input_shape = RuntimeShape({batch_size, 1, 1, input_size});
tflite::reference_ops::LogSoftmax(input_buffer, input_shape,
output_buffer.get(), input_shape);
static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size},
{1, 0, 0, input_size}};
tflite::reference_ops::LogSoftmax(input_buffer, input_dims,
output_buffer.get(), input_dims);
std::vector<float> expected;
expected.insert(expected.end(), output_buffer.get(),

View File

@ -126,13 +126,12 @@ void AverageEvalFloat(TfLiteContext* context, TfLiteNode* node,
float activation_min, activation_max;
CalculateActivationRangeFloat(params->activation, &activation_min,
&activation_max);
#define TF_LITE_AVERAGE_POOL(type) \
type::AveragePool(GetTensorData<float>(input), GetTensorShape(input), \
params->stride_width, params->stride_height, \
data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, \
activation_min, activation_max, \
GetTensorData<float>(output), GetTensorShape(output))
#define TF_LITE_AVERAGE_POOL(type) \
type::AveragePool( \
GetTensorData<float>(input), GetTensorDims(input), params->stride_width, \
params->stride_height, data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, activation_min, \
activation_max, GetTensorData<float>(output), GetTensorDims(output))
if (kernel_type == kReference) {
TF_LITE_AVERAGE_POOL(reference_ops);
} else {
@ -149,13 +148,13 @@ void AverageEvalQuantized(TfLiteContext* context, TfLiteNode* node,
int32_t activation_max;
CalculateActivationRangeUint8(params->activation, output, &activation_min,
&activation_max);
#define TF_LITE_AVERAGE_POOL(type) \
type::AveragePool(GetTensorData<uint8_t>(input), GetTensorShape(input), \
params->stride_width, params->stride_height, \
data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, \
activation_min, activation_max, \
GetTensorData<uint8_t>(output), GetTensorShape(output))
#define TF_LITE_AVERAGE_POOL(type) \
type::AveragePool(GetTensorData<uint8_t>(input), GetTensorDims(input), \
params->stride_width, params->stride_height, \
data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, \
activation_min, activation_max, \
GetTensorData<uint8_t>(output), GetTensorDims(output))
if (kernel_type == kReference) {
TF_LITE_AVERAGE_POOL(reference_ops);
} else {
@ -171,13 +170,12 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
float activation_min, activation_max;
CalculateActivationRangeFloat(params->activation, &activation_min,
&activation_max);
#define TF_LITE_MAX_POOL(type) \
type::MaxPool(GetTensorData<float>(input), GetTensorShape(input), \
params->stride_width, params->stride_height, \
data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, activation_min, \
activation_max, GetTensorData<float>(output), \
GetTensorShape(output))
#define TF_LITE_MAX_POOL(type) \
type::MaxPool( \
GetTensorData<float>(input), GetTensorDims(input), params->stride_width, \
params->stride_height, data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, activation_min, \
activation_max, GetTensorData<float>(output), GetTensorDims(output))
if (kernel_type == kReference) {
TF_LITE_MAX_POOL(reference_ops);
} else {
@ -195,12 +193,12 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
CalculateActivationRangeUint8(params->activation, output, &activation_min,
&activation_max);
#define TF_LITE_MAX_POOL(type) \
type::MaxPool(GetTensorData<uint8_t>(input), GetTensorShape(input), \
type::MaxPool(GetTensorData<uint8_t>(input), GetTensorDims(input), \
params->stride_width, params->stride_height, \
data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, activation_min, \
activation_max, GetTensorData<uint8_t>(output), \
GetTensorShape(output))
GetTensorDims(output))
if (kernel_type == kReference) {
TF_LITE_MAX_POOL(reference_ops);
} else {
@ -216,13 +214,12 @@ void L2EvalFloat(TfLiteContext* context, TfLiteNode* node,
float activation_min, activation_max;
CalculateActivationRangeFloat(params->activation, &activation_min,
&activation_max);
#define TF_LITE_L2_POOL(type) \
type::L2Pool(GetTensorData<float>(input), GetTensorShape(input), \
params->stride_width, params->stride_height, \
data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, activation_min, \
activation_max, GetTensorData<float>(output), \
GetTensorShape(output))
#define TF_LITE_L2_POOL(type) \
type::L2Pool( \
GetTensorData<float>(input), GetTensorDims(input), params->stride_width, \
params->stride_height, data->padding.width, data->padding.height, \
params->filter_width, params->filter_height, activation_min, \
activation_max, GetTensorData<float>(output), GetTensorDims(output))
if (kernel_type == kReference) {
TF_LITE_L2_POOL(reference_ops);
} else {

View File

@ -92,9 +92,10 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaEq1) {
m.Invoke();
std::unique_ptr<float[]> output_buffer(new float[input_size * batch_size]);
auto input_shape = RuntimeShape({batch_size, 1, 1, input_size});
tflite::reference_ops::Softmax(input_buffer, input_shape, beta,
output_buffer.get(), input_shape);
static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size},
{1, 0, 0, input_size}};
tflite::reference_ops::Softmax(input_buffer, input_dims, beta,
output_buffer.get(), input_dims);
std::vector<float> expected;
expected.insert(expected.end(), output_buffer.get(),
@ -119,9 +120,10 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaNotEq1) {
m.Invoke();
std::unique_ptr<float[]> output_buffer(new float[input_size * batch_size]);
auto input_shape = RuntimeShape({batch_size, 1, 1, input_size});
tflite::reference_ops::Softmax(input_buffer, input_shape, beta,
output_buffer.get(), input_shape);
static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size},
{1, 0, 0, input_size}};
tflite::reference_ops::Softmax(input_buffer, input_dims, beta,
output_buffer.get(), input_dims);
std::vector<float> expected;
expected.insert(expected.end(), output_buffer.get(),