diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc index d03fa42c92a..add36b46c0b 100644 --- a/tensorflow/contrib/lite/kernels/activations.cc +++ b/tensorflow/contrib/lite/kernels/activations.cc @@ -251,11 +251,11 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } break; case kTfLiteUInt8: { - optimized_ops::Tanh(GetTensorData(input), GetTensorShape(input), + optimized_ops::Tanh(GetTensorData(input), GetTensorDims(input), input->params.zero_point, data->input_range_radius, data->input_multiplier, data->input_left_shift, GetTensorData(output), - GetTensorShape(output)); + GetTensorDims(output)); return kTfLiteOk; } break; default: @@ -282,10 +282,10 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) { } case kTfLiteUInt8: { optimized_ops::Logistic( - GetTensorData(input), GetTensorShape(input), + GetTensorData(input), GetTensorDims(input), input->params.zero_point, data->input_range_radius, data->input_multiplier, data->input_left_shift, - GetTensorData(output), GetTensorShape(output)); + GetTensorData(output), GetTensorDims(output)); break; } default: @@ -341,26 +341,26 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output, const int batch_size = input->dims->data[0]; const int input_size = input->dims->data[1]; optimized_ops::Softmax(GetTensorData(input), - GetTensorShape({batch_size, 1, 1, input_size}), + GetTensorDims({batch_size, 1, 1, input_size}), data->input_multiplier, data->input_left_shift, data->diff_min, GetTensorData(output), - GetTensorShape({batch_size, 1, 1, input_size})); + GetTensorDims({batch_size, 1, 1, input_size})); } // Takes a 4D tensor and perform softmax along the forth dimension. void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output, TfLiteSoftmaxParams* params) { - optimized_ops::Softmax(GetTensorData(input), GetTensorShape(input), + optimized_ops::Softmax(GetTensorData(input), GetTensorDims(input), params->beta, GetTensorData(output), - GetTensorShape(output)); + GetTensorDims(output)); } void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output, TfLiteSoftmaxParams* params, OpData* data) { - optimized_ops::Softmax(GetTensorData(input), GetTensorShape(input), + optimized_ops::Softmax(GetTensorData(input), GetTensorDims(input), data->input_multiplier, data->input_left_shift, data->diff_min, GetTensorData(output), - GetTensorShape(output)); + GetTensorDims(output)); } TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { @@ -415,8 +415,8 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) { switch (input->type) { case kTfLiteFloat32: optimized_ops::LogSoftmax( - GetTensorData(input), GetTensorShape(input), - GetTensorData(output), GetTensorShape(output)); + GetTensorData(input), GetTensorDims(input), + GetTensorData(output), GetTensorDims(output)); return kTfLiteOk; default: context->ReportError(context, "Only float32 supported currently., got %d", diff --git a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc index d2f1103e14b..e786f785abe 100644 --- a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc @@ -32,21 +32,19 @@ namespace tflite { namespace { void RunLogSoftmaxFloatReference(const uint8* input_data, - const RuntimeShape& shape_common, - int32 input_offset, const double input_scale, - int stride, float beta, - uint8* reference_output_data) { - const int ref_buffer_size = shape_common.FlatSize(); + const Dims<4>& dims_common, int32 input_offset, + const double input_scale, int stride, + float beta, uint8* reference_output_data) { + const int ref_buffer_size = RequiredBufferSizeForDims(dims_common); std::vector reference_dequant_data(ref_buffer_size); std::vector reference_output_float_data(ref_buffer_size); // Reference data generated via Dequant of input into float, and then applying // float LogSoftmax. - reference_ops::Dequantize( - input_data, ToRuntimeDims(shape_common), input_offset, input_scale, - reference_dequant_data.data(), ToRuntimeDims(shape_common)); - optimized_ops::LogSoftmax(reference_dequant_data.data(), shape_common, - reference_output_float_data.data(), shape_common); + reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale, + reference_dequant_data.data(), dims_common); + optimized_ops::LogSoftmax(reference_dequant_data.data(), dims_common, + reference_output_float_data.data(), dims_common); // Work with quantized scaling for LogSoftmax, under which 255 represents 0, // and -16 gets nudged up to 0. for (int i = 0; i < ref_buffer_size; i++) { @@ -57,9 +55,9 @@ void RunLogSoftmaxFloatReference(const uint8* input_data, } void CheckOutputData(const uint8* test_output, const uint8* reference_output, - const RuntimeShape& shape_common, - const string& check_label, bool be_exacting) { - const int buffer_size = shape_common.FlatSize(); + const Dims<4>& dims_common, const string& check_label, + bool be_exacting) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); // While calculating some metrics in floating point, we work with quantized // scaling. std::vector diff(buffer_size); @@ -101,15 +99,15 @@ void CheckOutputData(const uint8* test_output, const uint8* reference_output, // Runs the LogSoftmax and compares against the float reference implementation // and the quantized reference implementation. -void RunOneLogSoftmaxTest(const uint8* input_data, - const RuntimeShape& shape_common, int32 input_offset, - const double input_scale, int stride, float beta) { - const int buffer_size = shape_common.FlatSize(); +void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, + int32 input_offset, const double input_scale, + int stride, float beta) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); std::vector optimized_logsoftmax_output(buffer_size); std::vector reference_float_logsoftmax_output(buffer_size); std::vector reference_quant_logsoftmax_output(buffer_size); - RunLogSoftmaxFloatReference(input_data, shape_common, input_offset, + RunLogSoftmaxFloatReference(input_data, dims_common, input_offset, input_scale, stride, beta, reference_float_logsoftmax_output.data()); @@ -128,23 +126,23 @@ void RunOneLogSoftmaxTest(const uint8* input_data, const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_beta_left_shift); - optimized_ops::LogSoftmax(input_data, shape_common, input_beta_multiplier, + optimized_ops::LogSoftmax(input_data, dims_common, input_beta_multiplier, input_beta_left_shift, reverse_scaling_divisor, reverse_scaling_right_shift, diff_min, - optimized_logsoftmax_output.data(), shape_common); + optimized_logsoftmax_output.data(), dims_common); reference_ops::LogSoftmax( - input_data, shape_common, input_beta_multiplier, input_beta_left_shift, + input_data, dims_common, input_beta_multiplier, input_beta_left_shift, reverse_scaling_divisor, reverse_scaling_right_shift, diff_min, - reference_quant_logsoftmax_output.data(), shape_common); + reference_quant_logsoftmax_output.data(), dims_common); CheckOutputData(optimized_logsoftmax_output.data(), - reference_float_logsoftmax_output.data(), shape_common, + reference_float_logsoftmax_output.data(), dims_common, "Optimized vs float reference", false); CheckOutputData(optimized_logsoftmax_output.data(), - reference_quant_logsoftmax_output.data(), shape_common, + reference_quant_logsoftmax_output.data(), dims_common, "Optimized vs quant reference", true); CheckOutputData(reference_quant_logsoftmax_output.data(), - reference_float_logsoftmax_output.data(), shape_common, + reference_float_logsoftmax_output.data(), dims_common, "Quant reference vs float reference", false); } @@ -167,13 +165,13 @@ bool TryOneUniformLogSoftmax() { const int32 input_offset = UniformRandomInt(-256, 0); static constexpr float beta = 1.0f; - auto shape_common = - RuntimeShape({batch, input_height, input_width, input_depth}); - const int buffer_size = shape_common.FlatSize(); + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); std::vector input_data(buffer_size); FillRandom(&input_data); - RunOneLogSoftmaxTest(input_data.data(), shape_common, input_offset, + RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, stride, beta); return true; } @@ -205,14 +203,14 @@ bool TryOneSkyscraperLogSoftmax(bool small_depth) { const int middle_min = UniformRandomInt(0, 255); const int sides_max = UniformRandomInt(0, middle_min); - auto shape_common = - RuntimeShape({batch, input_height, input_width, input_depth}); - const int buffer_size = shape_common.FlatSize(); + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); std::vector input_data(buffer_size); FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min, sides_max); - RunOneLogSoftmaxTest(input_data.data(), shape_common, input_offset, + RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, stride, beta); return true; } diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h index 78167521327..c0dda4acf1a 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -26,10 +26,6 @@ limitations under the License. namespace tflite { namespace optimized_ops { -// Unoptimized reference ops: -using reference_ops::Relu1; -using reference_ops::Relu6; - inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) { return RuntimeShape( {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); @@ -38,285 +34,15 @@ inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) { template void L2Normalization(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - L2Normalization(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); + return L2Normalization(input_data, DimsToShape(input_dims), output_data, + DimsToShape(output_dims)); } inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, uint8* output_data, const Dims<4>& output_dims) { - L2Normalization(input_data, DimsToShape(input_dims), input_zero_point, - output_data, DimsToShape(output_dims)); -} - -inline void Relu(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Relu(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void AveragePool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int kwidth, int kheight, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int kwidth, int kheight, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, - int filter_height, float* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); -} - -inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, filter_width, filter_height, - output_activation_min, output_activation_max, output_data, - DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, - int filter_height, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -inline void MaxPool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int kwidth, int kheight, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, int pad_height, - int kwidth, int kheight, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, int filter_height, - float* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); -} - -inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, filter_width, filter_height, - output_activation_min, output_activation_max, output_data, - DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, int filter_height, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -inline void L2Pool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { - L2Pool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, filter_width, filter_height, - output_activation_min, output_activation_max, output_data, - DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void L2Pool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - L2Pool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, int filter_height, - float* output_data, const Dims<4>& output_dims) { - L2Pool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); -} - -inline void Softmax(const float* input_data, const Dims<4>& input_dims, - float beta, float* output_data, - const Dims<4>& output_dims) { - Softmax(input_data, DimsToShape(input_dims), beta, output_data, - DimsToShape(output_dims)); -} - -inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, - int32 input_beta_multiplier, int32 input_beta_left_shift, - int diff_min, uint8* output_data, - const Dims<4>& output_dims) { - Softmax(input_data, DimsToShape(input_dims), input_beta_multiplier, - input_beta_left_shift, diff_min, output_data, - DimsToShape(output_dims)); -} - -inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - LogSoftmax(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, - int32 input_multiplier, int32 input_left_shift, - int32 reverse_scaling_divisor, - int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const Dims<4>& output_dims) { - LogSoftmax(input_data, DimsToShape(input_dims), input_multiplier, - input_left_shift, reverse_scaling_divisor, - reverse_scaling_right_shift, diff_min, output_data, - DimsToShape(output_dims)); -} - -inline void Logistic(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Logistic(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const Dims<4>& output_dims) { - Logistic(input_data, DimsToShape(input_dims), input_zero_point, - input_range_radius, input_multiplier, input_left_shift, output_data, - DimsToShape(output_dims)); -} - -inline void Logistic(const int16* input_data, const Dims<4>& input_dims, - int16* output_data, const Dims<4>& output_dims) { - Logistic(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Tanh(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Tanh(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const Dims<4>& output_dims) { - Tanh(input_data, DimsToShape(input_dims), input_zero_point, - input_range_radius, input_multiplier, input_left_shift, output_data, - DimsToShape(output_dims)); -} - -inline void Tanh(const int16* input_data, const Dims<4>& input_dims, - int input_left_shift, int16* output_data, - const Dims<4>& output_dims) { - Tanh(input_data, DimsToShape(input_dims), input_left_shift, output_data, - DimsToShape(output_dims)); + return L2Normalization(input_data, DimsToShape(input_dims), input_zero_point, + output_data, DimsToShape(output_dims)); } } // namespace optimized_ops diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 930e26107e8..cf989ce51d6 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -85,12 +85,6 @@ using VectorMap = typename std::conditional< Eigen::Dynamic, 1>>, Eigen::Map>>::type; -template -VectorMap MapAsVector(Scalar* data, const RuntimeShape& shape) { - const int size = shape.FlatSize(); - return VectorMap(data, size, 1); -} - template VectorMap MapAsVector(Scalar* data, const Dims& dims) { const int size = FlatSize(dims); @@ -107,23 +101,6 @@ using MatrixMap = typename std::conditional< Eigen::Dynamic, Eigen::Dynamic>>, Eigen::Map>>::type; -template -MatrixMap MapAsMatrixWithLastDimAsRows(Scalar* data, - const RuntimeShape& shape) { - const int dims_count = shape.DimensionsCount(); - const int rows = shape.Dims(dims_count - 1); - const int cols = FlatSizeSkipDim(shape, dims_count - 1); - return MatrixMap(data, rows, cols); -} - -template -MatrixMap MapAsMatrixWithFirstDimAsCols(Scalar* data, - const RuntimeShape& shape) { - const int cols = shape.Dims(0); - const int rows = FlatSizeSkipDim(shape, 0); - return MatrixMap(data, rows, cols); -} - template MatrixMap MapAsMatrixWithFirstDimAsRows(Scalar* data, const Dims& dims) { @@ -2366,12 +2343,12 @@ void GlobalBatchNormalization(const float* input_data, } } -inline void Relu(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { +inline void Relu(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Relu (not fused)"); - const auto input = MapAsVector(input_data, input_shape); - auto output = MapAsVector(output_data, output_shape); + const auto input = MapAsVector(input_data, input_dims); + auto output = MapAsVector(output_data, output_dims); output = input.cwiseMax(0.0f); } @@ -3752,25 +3729,23 @@ inline int NodeOffset(int b, int h, int w, int height, int width) { return (b * height + h) * width + w; } -inline void AveragePool(const float* input_data, - const RuntimeShape& input_shape, int stride_width, - int stride_height, int pad_width, int pad_height, - int kwidth, int kheight, float output_activation_min, +inline void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int kwidth, int kheight, + float output_activation_min, float output_activation_max, float* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("AveragePool"); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); // TODO(benoitjacob) make this a proper reference impl without Eigen! - const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape); - auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape); + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); // TODO(benoitjacob) get rid of the dynamic memory allocation here! Eigen::VectorXf out_count(out_mat.cols()); out_count.setZero(); @@ -3808,9 +3783,9 @@ inline void AveragePool(const float* input_data, for (int y = 0; y < output_height; ++y) { for (int x = 0; x < output_width; ++x) { for (int c = 0; c < depth; ++c) { - output_data[Offset(output_shape, b, y, x, c)] = + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunctionWithMinMax( - output_data[Offset(output_shape, b, y, x, c)], + output_data[Offset(output_dims, c, x, y, b)], output_activation_min, output_activation_max); } } @@ -3818,23 +3793,44 @@ inline void AveragePool(const float* input_data, } } -inline void AveragePool(const uint8* input_data, - const RuntimeShape& input_shape, int stride_width, - int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int kwidth, int kheight, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, kwidth, kheight, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, float* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, uint8* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("AveragePool/8bit"); TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -3854,12 +3850,11 @@ inline void AveragePool(const uint8* input_data, uint16 acc[kAccBufferMaxSize]; memset(acc, 0, depth * sizeof(acc[0])); const uint8* input_ptr = - input_data + - depth * (in_x_origin + - input_width * (in_y_origin + input_height * batch)); + input_data + input_dims.strides[1] * in_x_origin + + input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch; for (int fy = filter_y_start; fy < filter_y_end; fy++) { - const uint8* input_row_ptr = - input_ptr + depth * (fy * input_width + filter_x_start); + const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] + + filter_x_start * input_dims.strides[1]; for (int fx = filter_x_start; fx < filter_x_end; fx++) { int channel = 0; #ifdef USE_NEON @@ -3890,7 +3885,7 @@ inline void AveragePool(const uint8* input_data, } } uint8* output_ptr = - output_data + Offset(output_shape, batch, out_y, out_x, 0); + output_data + Offset(output_dims, 0, out_x, out_y, batch); int channel = 0; #ifdef USE_NEON #define AVGPOOL_DIVIDING_BY(FILTER_COUNT) \ @@ -3931,23 +3926,54 @@ inline void AveragePool(const uint8* input_data, } } -inline void MaxPool(const float* input_data, const RuntimeShape& input_shape, +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int kwidth, int kheight, float output_activation_min, float output_activation_max, - float* output_data, const RuntimeShape& output_shape) { + float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("MaxPool"); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape); - auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape); + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); // Prefill the output to minimum representable float value out_mat.setConstant(std::numeric_limits::lowest()); for (int b = 0; b < batches; ++b) { @@ -3980,9 +4006,9 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape, for (int y = 0; y < output_height; ++y) { for (int x = 0; x < output_width; ++x) { for (int c = 0; c < depth; ++c) { - output_data[Offset(output_shape, b, y, x, c)] = + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunctionWithMinMax( - output_data[Offset(output_shape, b, y, x, c)], + output_data[Offset(output_dims, c, x, y, b)], output_activation_min, output_activation_max); } } @@ -3990,21 +4016,41 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape, } } -inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int kwidth, int kheight, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, kwidth, kheight, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("MaxPool/8bit"); TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -4022,12 +4068,11 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, uint8 acc[kAccBufferMaxSize]; memset(acc, 0, depth * sizeof(acc[0])); const uint8* input_ptr = - input_data + - depth * (in_x_origin + - input_width * (in_y_origin + input_height * batch)); + input_data + input_dims.strides[1] * in_x_origin + + input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch; for (int fy = filter_y_start; fy < filter_y_end; fy++) { - const uint8* input_row_ptr = - input_ptr + depth * (fy * input_width + filter_x_start); + const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] + + filter_x_start * input_dims.strides[1]; for (int fx = filter_x_start; fx < filter_x_end; fx++) { int channel = 0; #ifdef USE_NEON @@ -4053,7 +4098,7 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, } } uint8* output_ptr = - output_data + Offset(output_shape, batch, out_y, out_x, 0); + output_data + Offset(output_dims, 0, out_x, out_y, batch); int channel = 0; #ifdef USE_NEON for (; channel <= depth - 16; channel += 16) { @@ -4080,23 +4125,53 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void L2Pool(const float* input_data, const RuntimeShape& input_shape, +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, float output_activation_min, float output_activation_max, - float* output_data, const RuntimeShape& output_shape) { + float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("L2Pool"); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); // Actually carry out L2 Pool. Code is written in forward mode: we go through // the input values once, and write to all the pooled regions that it maps to. - const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape); - auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape); + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); Eigen::VectorXf in_square(in_mat.rows()); Eigen::VectorXf out_count(out_mat.cols()); out_count.setZero(); @@ -4138,6 +4213,28 @@ inline void L2Pool(const float* input_data, const RuntimeShape& input_shape, (out_mat.array().rowwise() * out_count.transpose().array()).cwiseSqrt(); } +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + L2Pool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + L2Pool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + inline void LocalResponseNormalization(const float* input_data, const Dims<4>& input_dims, int range, float bias, float alpha, float beta, @@ -4183,14 +4280,14 @@ inline void LocalResponseNormalization(const float* input_data, } } -inline void Softmax(const float* input_data, const RuntimeShape& input_shape, +inline void Softmax(const float* input_data, const Dims<4>& input_dims, float beta, float* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Softmax"); - MatchingFlatSize(input_shape, output_shape); + MatchingFlatSize(input_dims, output_dims); - const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape); - auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape); + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); // Compute the exponential first, removing the max coefficient for numerical // stability. out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta; @@ -4202,10 +4299,10 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape, out_mat.array().rowwise() *= scale; } -inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, +inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_beta_multiplier, int32 input_beta_left_shift, int diff_min, uint8* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as // -32 before multiplying by input_beta_multiplier, and therefore as large as @@ -4219,11 +4316,8 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, using FixedPoint0 = gemmlowp::FixedPoint; gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int b = 0; b < outer_size; ++b) { const uint8* input_data_ptr = input_data + b * depth; @@ -4413,14 +4507,11 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, // TODO(myenik): This is the same as the reference implementation, not actually // optimized yet. -inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { +inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("LogSoftmax"); - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int i = 0; i < outer_size; ++i) { const float* block_input_data = input_data + i * depth; @@ -4561,11 +4652,11 @@ log_x_for_x_greater_than_or_equal_to_1( } // Currently just a copy of the reference code. -inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, +inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_multiplier, int32 input_left_shift, int32 reverse_scaling_divisor, int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("LogSoftmax/Uint8"); // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as @@ -4580,11 +4671,8 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int i = 0; i < outer_size; ++i) { const uint8* block_input_data = input_data + i * depth; @@ -4648,21 +4736,21 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { +inline void Logistic(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Logistic"); - auto input_map = MapAsVector(input_data, input_shape); - auto output_map = MapAsVector(output_data, output_shape); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); output_map.array() = input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op()); } -inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, +inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Logistic/Uint8"); - const int size = MatchingFlatSize(input_shape, output_shape); + const int size = MatchingFlatSize(input_dims, output_dims); int c = 0; #ifdef USE_NEON @@ -4794,10 +4882,10 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, - int16* output_data, const RuntimeShape& output_shape) { +inline void Logistic(const int16* input_data, const Dims<4>& input_dims, + int16* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Logistic/Int16"); - const int flat_size = MatchingFlatSize(input_shape, output_shape); + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { } @@ -4854,21 +4942,21 @@ inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { +inline void Tanh(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Tanh"); - auto input_map = MapAsVector(input_data, input_shape); - auto output_map = MapAsVector(output_data, output_shape); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); output_map.array() = input_map.array().tanh(); } -inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, +inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { // Note that this is almost the exact same code as in Logistic(). gemmlowp::ScopedProfilingLabel label("Tanh"); - const int size = MatchingFlatSize(input_shape, output_shape); + const int size = MatchingFlatSize(input_dims, output_dims); int c = 0; int32_t output_zero_point = 128; @@ -5009,16 +5097,16 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, +inline void Tanh(const int16* input_data, const Dims<4>& input_dims, int input_left_shift, int16* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Tanh/Int16"); // Support for shifts is limited until we have a parameterized version of // SaturatingRoundingMultiplyByPOT(). TFLITE_DCHECK_GE(input_left_shift, 0); TFLITE_DCHECK_LE(input_left_shift, 1); - const int flat_size = MatchingFlatSize(input_shape, output_shape); + const int flat_size = MatchingFlatSize(output_dims, input_dims); int c = 0; const int16* input_data_ptr = input_data; diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h index 878b2441b4f..6f5f6a3e6fa 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h @@ -34,297 +34,15 @@ inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) { template void L2Normalization(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { - L2Normalization(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); + return L2Normalization(input_data, DimsToShape(input_dims), output_data, + DimsToShape(output_dims)); } inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, uint8* output_data, const Dims<4>& output_dims) { - L2Normalization(input_data, DimsToShape(input_dims), input_zero_point, - output_data, DimsToShape(output_dims)); -} - -inline void Relu(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Relu(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Relu1(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Relu1(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Relu6(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Relu6(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void AveragePool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int kwidth, int kheight, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int kwidth, int kheight, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, - int filter_height, float* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); -} - -inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, filter_width, filter_height, - output_activation_min, output_activation_max, output_data, - DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, - int filter_height, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -inline void MaxPool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int kwidth, int kheight, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, int pad_height, - int kwidth, int kheight, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, int filter_height, - float* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); -} - -inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, filter_width, filter_height, - output_activation_min, output_activation_max, output_data, - DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const uint8* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, int filter_height, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -inline void L2Pool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int filter_width, int filter_height, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { - L2Pool(input_data, DimsToShape(input_dims), stride_width, stride_height, - pad_width, pad_height, filter_width, filter_height, - output_activation_min, output_activation_max, output_data, - DimsToShape(output_dims)); -} - -// legacy, for compatibility with old checked-in code -template -void L2Pool(const float* input_data, const Dims<4>& input_dims, - int stride_width, int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - L2Pool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int filter_width, int filter_height, - float* output_data, const Dims<4>& output_dims) { - L2Pool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); -} - -inline void Softmax(const float* input_data, const Dims<4>& input_dims, - float beta, float* output_data, - const Dims<4>& output_dims) { - Softmax(input_data, DimsToShape(input_dims), beta, output_data, - DimsToShape(output_dims)); -} - -inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, - int32 input_beta_multiplier, int32 input_beta_left_shift, - int diff_min, uint8* output_data, - const Dims<4>& output_dims) { - Softmax(input_data, DimsToShape(input_dims), input_beta_multiplier, - input_beta_left_shift, diff_min, output_data, - DimsToShape(output_dims)); -} - -inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - LogSoftmax(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, - int32 input_multiplier, int32 input_left_shift, - int32 reverse_scaling_divisor, - int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const Dims<4>& output_dims) { - LogSoftmax(input_data, DimsToShape(input_dims), input_multiplier, - input_left_shift, reverse_scaling_divisor, - reverse_scaling_right_shift, diff_min, output_data, - DimsToShape(output_dims)); -} - -inline void Logistic(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Logistic(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const Dims<4>& output_dims) { - Logistic(input_data, DimsToShape(input_dims), input_zero_point, - input_range_radius, input_multiplier, input_left_shift, output_data, - DimsToShape(output_dims)); -} - -inline void Logistic(const int16* input_data, const Dims<4>& input_dims, - int16* output_data, const Dims<4>& output_dims) { - Logistic(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Tanh(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Tanh(input_data, DimsToShape(input_dims), output_data, - DimsToShape(output_dims)); -} - -inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const Dims<4>& output_dims) { - Tanh(input_data, DimsToShape(input_dims), input_zero_point, - input_range_radius, input_multiplier, input_left_shift, output_data, - DimsToShape(output_dims)); -} - -inline void Tanh(const int16* input_data, const Dims<4>& input_dims, - int input_left_shift, int16* output_data, - const Dims<4>& output_dims) { - Tanh(input_data, DimsToShape(input_dims), input_left_shift, output_data, - DimsToShape(output_dims)); + return L2Normalization(input_data, DimsToShape(input_dims), input_zero_point, + output_data, DimsToShape(output_dims)); } } // namespace reference_ops diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 1ac010dd7ed..1908f7fa6cf 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -914,9 +914,9 @@ void GlobalBatchNormalization(const float* input_data, } } -inline void Relu(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); +inline void Relu(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(input_dims, output_dims); for (int i = 0; i < flat_size; ++i) { const float val = input_data[i]; const float lower = 0; @@ -925,10 +925,9 @@ inline void Relu(const float* input_data, const RuntimeShape& input_shape, } } -inline void Relu1(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - gemmlowp::ScopedProfilingLabel label("Relu1 (not fused)"); - const int flat_size = MatchingFlatSize(input_shape, output_shape); +inline void Relu1(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(input_dims, output_dims); for (int i = 0; i < flat_size; ++i) { const float val = input_data[i]; const float upper = 1; @@ -938,10 +937,9 @@ inline void Relu1(const float* input_data, const RuntimeShape& input_shape, } } -inline void Relu6(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - gemmlowp::ScopedProfilingLabel label("Relu6 (not fused)"); - const int flat_size = MatchingFlatSize(input_shape, output_shape); +inline void Relu6(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(input_dims, output_dims); for (int i = 0; i < flat_size; ++i) { const float val = input_data[i]; const float upper = 6; @@ -2247,21 +2245,18 @@ inline int NodeOffset(int b, int h, int w, int height, int width) { return (b * height + h) * width + w; } -inline void AveragePool(const float* input_data, - const RuntimeShape& input_shape, int stride_width, - int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, +inline void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, float output_activation_min, float output_activation_max, float* output_data, - const RuntimeShape& output_shape) { - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -2285,12 +2280,12 @@ inline void AveragePool(const float* input_data, const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; total += - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + input_data[Offset(input_dims, channel, in_x, in_y, batch)]; filter_count++; } } const float average = total / filter_count; - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = ActivationFunctionWithMinMax(average, output_activation_min, output_activation_max); } @@ -2299,22 +2294,42 @@ inline void AveragePool(const float* input_data, } } -inline void AveragePool(const uint8* input_data, - const RuntimeShape& input_shape, int stride_width, - int stride_height, int pad_width, int pad_height, - int filter_width, int filter_height, +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, float* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, uint8* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -2337,15 +2352,14 @@ inline void AveragePool(const uint8* input_data, ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - acc += - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + acc += input_data[Offset(input_dims, channel, in_x, in_y, batch)]; filter_count++; } } acc = (acc + filter_count / 2) / filter_count; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = static_cast(acc); } } @@ -2353,19 +2367,50 @@ inline void AveragePool(const uint8* input_data, } } -inline void L2Pool(const float* input_data, const RuntimeShape& input_shape, +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, + int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +inline void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, float output_activation_min, float output_activation_max, - float* output_data, const RuntimeShape& output_shape) { - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -2389,13 +2434,13 @@ inline void L2Pool(const float* input_data, const RuntimeShape& input_shape, const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; const float val = - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + input_data[Offset(input_dims, channel, in_x, in_y, batch)]; sum_squares += val * val; filter_count++; } } const float l2pool_result = std::sqrt(sum_squares / filter_count); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = ActivationFunctionWithMinMax(l2pool_result, output_activation_min, output_activation_max); } @@ -2404,19 +2449,40 @@ inline void L2Pool(const float* input_data, const RuntimeShape& input_shape, } } -inline void MaxPool(const float* input_data, const RuntimeShape& input_shape, +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + L2Pool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + L2Pool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, float output_activation_min, float output_activation_max, - float* output_data, const RuntimeShape& output_shape) { - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + float* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -2440,10 +2506,10 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape, const int in_y = in_y_origin + filter_y; max = std::max( max, - input_data[Offset(input_shape, batch, in_y, in_x, channel)]); + input_data[Offset(input_dims, channel, in_x, in_y, batch)]); } } - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = ActivationFunctionWithMinMax(max, output_activation_min, output_activation_max); } @@ -2452,22 +2518,42 @@ inline void MaxPool(const float* input_data, const RuntimeShape& input_shape, } } -inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_data, output_dims); +} + +inline void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { TFLITE_DCHECK_LE(output_activation_min, output_activation_max); TFLITE_DCHECK_GE(output_activation_min, 0); TFLITE_DCHECK_LE(output_activation_max, 255); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -2491,12 +2577,12 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, const int in_y = in_y_origin + filter_y; max = std::max( max, - input_data[Offset(input_shape, batch, in_y, in_x, channel)]); + input_data[Offset(input_dims, channel, in_x, in_y, batch)]); } } max = std::max(max, output_activation_min); max = std::min(max, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + output_data[Offset(output_dims, channel, out_x, out_y, batch)] = static_cast(max); } } @@ -2504,6 +2590,38 @@ inline void MaxPool(const uint8* input_data, const RuntimeShape& input_shape, } } +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, int pad_width, int pad_height, + int filter_width, int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + MaxPool(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + MaxPool(input_data, input_dims, stride, stride, pad_width, pad_height, + filter_width, filter_height, output_activation_min, + output_activation_max, output_data, output_dims); +} + inline void LocalResponseNormalization(const float* input_data, const Dims<4>& input_dims, int range, float bias, float alpha, float beta, @@ -2527,14 +2645,11 @@ inline void LocalResponseNormalization(const float* input_data, } } -inline void Softmax(const float* input_data, const RuntimeShape& input_shape, +inline void Softmax(const float* input_data, const Dims<4>& input_dims, float beta, float* output_data, - const RuntimeShape& output_shape) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const Dims<4>& output_dims) { + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int i = 0; i < outer_size; ++i) { // Find max element value which we'll use to ensure numerical stability @@ -2559,10 +2674,10 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape, } } -inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, +inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_beta_multiplier, int32 input_beta_left_shift, int diff_min, uint8* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as // -32 before multiplying by input_beta_multiplier, and therefore as large as @@ -2575,11 +2690,8 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int i = 0; i < outer_size; ++i) { uint8 max_in_row = 0; @@ -2640,13 +2752,10 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); +inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int i = 0; i < outer_size; ++i) { // Find max element value which we'll use to ensure numerical stability @@ -2786,11 +2895,11 @@ log_x_for_x_greater_than_or_equal_to_1( input_val); } -inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, +inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_multiplier, int32 input_left_shift, int32 reverse_scaling_divisor, int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as // -32 before multiplying by input_beta_multiplier, and therefore as large as @@ -2804,11 +2913,8 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); for (int i = 0; i < outer_size; ++i) { uint8 max_in_row = 0; @@ -2872,9 +2978,9 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); +inline void Logistic(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { float val = input_data[i]; @@ -2883,11 +2989,11 @@ inline void Logistic(const float* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, +inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); + uint8* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { const uint8 input_val_u8 = input_data[i]; @@ -2921,9 +3027,9 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, - int16* output_data, const RuntimeShape& output_shape) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); +inline void Logistic(const int16* input_data, const Dims<4>& input_dims, + int16* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { // F0 uses 0 integer bits, range [-1, 1]. @@ -2939,9 +3045,9 @@ inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); +inline void Tanh(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { float val = input_data[i]; @@ -2950,12 +3056,12 @@ inline void Tanh(const float* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, +inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { + uint8* output_data, const Dims<4>& output_dims) { const int32 output_zero_point = 128; - const int flat_size = MatchingFlatSize(input_shape, output_shape); + const int flat_size = MatchingFlatSize(output_dims, input_dims); for (int i = 0; i < flat_size; i++) { const uint8 input_val_u8 = input_data[i]; @@ -2990,15 +3096,15 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, +inline void Tanh(const int16* input_data, const Dims<4>& input_dims, int input_left_shift, int16* output_data, - const RuntimeShape& output_shape) { + const Dims<4>& output_dims) { // Support for shifts is limited until we have a parameterized version of // SaturatingRoundingMultiplyByPOT(). TFLITE_DCHECK_GE(input_left_shift, 0); TFLITE_DCHECK_LE(input_left_shift, 1); - const int flat_size = MatchingFlatSize(input_shape, output_shape); + const int flat_size = MatchingFlatSize(output_dims, input_dims); // F0 uses 0 integer bits, range [-1, 1]. // This is the return type of math functions such as tanh, logistic, diff --git a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc index a7dad3c14e6..d781a7b6420 100644 --- a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc @@ -32,21 +32,19 @@ namespace tflite { namespace { void RunSoftmaxFloatReference(const uint8* input_data, - const RuntimeShape& shape_common, - int32 input_offset, const double input_scale, - int stride, float beta, + const Dims<4>& dims_common, int32 input_offset, + const double input_scale, int stride, float beta, uint8* reference_output_data) { - const int ref_buffer_size = shape_common.FlatSize(); + const int ref_buffer_size = RequiredBufferSizeForDims(dims_common); std::vector reference_dequant_data(ref_buffer_size); std::vector reference_output_float_data(ref_buffer_size); // Reference data generated via Dequant of input into float, and then applying // float Softmax. - reference_ops::Dequantize( - input_data, ToRuntimeDims(shape_common), input_offset, input_scale, - reference_dequant_data.data(), ToRuntimeDims(shape_common)); - optimized_ops::Softmax(reference_dequant_data.data(), shape_common, beta, - reference_output_float_data.data(), shape_common); + reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale, + reference_dequant_data.data(), dims_common); + optimized_ops::Softmax(reference_dequant_data.data(), dims_common, beta, + reference_output_float_data.data(), dims_common); // Work with quantized scaling for Softmax, under which 256 represents 1, but // we limit this to 255. for (int i = 0; i < ref_buffer_size; i++) { @@ -57,9 +55,9 @@ void RunSoftmaxFloatReference(const uint8* input_data, } void CheckOutputData(const uint8* test_output, const uint8* reference_output, - const RuntimeShape& shape_common, - const string& check_label, bool be_exacting) { - const int buffer_size = shape_common.FlatSize(); + const Dims<4>& dims_common, const string& check_label, + bool be_exacting) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); // While calculating some metrics in floating point, we work with quantized // scaling. std::vector diff(buffer_size); @@ -93,15 +91,15 @@ void CheckOutputData(const uint8* test_output, const uint8* reference_output, // Runs the Softmax and compares against the float reference implementation and // the quantized reference implementation. -void RunOneSoftmaxTest(const uint8* input_data, - const RuntimeShape& shape_common, int32 input_offset, - const double input_scale, int stride, float beta) { - const int buffer_size = shape_common.FlatSize(); +void RunOneSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, + int32 input_offset, const double input_scale, int stride, + float beta) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); std::vector optimized_softmax_output(buffer_size); std::vector reference_float_softmax_output(buffer_size); std::vector reference_quant_softmax_output(buffer_size); - RunSoftmaxFloatReference(input_data, shape_common, input_offset, input_scale, + RunSoftmaxFloatReference(input_data, dims_common, input_offset, input_scale, stride, beta, reference_float_softmax_output.data()); int32 input_beta_multiplier; @@ -115,21 +113,21 @@ void RunOneSoftmaxTest(const uint8* input_data, const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_beta_left_shift); - optimized_ops::Softmax(input_data, shape_common, input_beta_multiplier, + optimized_ops::Softmax(input_data, dims_common, input_beta_multiplier, input_beta_left_shift, diff_min, - optimized_softmax_output.data(), shape_common); - reference_ops::Softmax(input_data, shape_common, input_beta_multiplier, + optimized_softmax_output.data(), dims_common); + reference_ops::Softmax(input_data, dims_common, input_beta_multiplier, input_beta_left_shift, diff_min, - reference_quant_softmax_output.data(), shape_common); + reference_quant_softmax_output.data(), dims_common); CheckOutputData(optimized_softmax_output.data(), - reference_float_softmax_output.data(), shape_common, + reference_float_softmax_output.data(), dims_common, "Optimized vs float reference", false); CheckOutputData(optimized_softmax_output.data(), - reference_quant_softmax_output.data(), shape_common, + reference_quant_softmax_output.data(), dims_common, "Optimized vs quant reference", true); CheckOutputData(reference_quant_softmax_output.data(), - reference_float_softmax_output.data(), shape_common, + reference_float_softmax_output.data(), dims_common, "Quant reference vs float reference", false); } @@ -152,13 +150,13 @@ bool TryOneUniformSoftmax() { const int32 input_offset = UniformRandomInt(-256, 0); const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10); - auto shape_common = - RuntimeShape({batch, input_height, input_width, input_depth}); - const int buffer_size = shape_common.FlatSize(); + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); std::vector input_data(buffer_size); FillRandom(&input_data); - RunOneSoftmaxTest(input_data.data(), shape_common, input_offset, input_scale, + RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, stride, beta); return true; } @@ -190,14 +188,14 @@ bool TryOneSkyscraperSoftmax(bool small_depth) { const int middle_min = UniformRandomInt(0, 255); const int sides_max = UniformRandomInt(0, middle_min); - auto shape_common = - RuntimeShape({batch, input_height, input_width, input_depth}); - const int buffer_size = shape_common.FlatSize(); + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); std::vector input_data(buffer_size); FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min, sides_max); - RunOneSoftmaxTest(input_data.data(), shape_common, input_offset, input_scale, + RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, stride, beta); return true; } diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index 707d2d261a4..64f4881a468 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -294,50 +294,6 @@ inline int RequiredBufferSizeForDims(const Dims<4>& dims) { return FlatSize(dims); } -// Flat size calculation, checking that dimensions match with one or more other -// arrays. -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return shape.FlatSize(); -} - -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return MatchingFlatSize(shape, check_shape_1); -} - -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return MatchingFlatSize(shape, check_shape_1, check_shape_2); -} - -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2, - const RuntimeShape& check_shape_3) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return MatchingFlatSize(shape, check_shape_1, check_shape_2, check_shape_3); -} - // Flat size calculation, checking that dimensions match with one or more other // arrays. template @@ -364,7 +320,7 @@ inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, for (int i = 0; i < N; ++i) { TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); } - return MatchingFlatSize(dims, check_dims_1, check_dims_2); + return FlatSize(dims, check_dims_1, check_dims_2); } template @@ -375,7 +331,7 @@ inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, for (int i = 0; i < N; ++i) { TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); } - return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3); + return FlatSize(dims, check_dims_1, check_dims_2, check_dims_3); } // Data is required to be contiguous, and so many operators can use either the diff --git a/tensorflow/contrib/lite/kernels/log_softmax_test.cc b/tensorflow/contrib/lite/kernels/log_softmax_test.cc index 9a8d35e82cb..62820a2f511 100644 --- a/tensorflow/contrib/lite/kernels/log_softmax_test.cc +++ b/tensorflow/contrib/lite/kernels/log_softmax_test.cc @@ -90,9 +90,10 @@ TEST(LogSoftmaxOpTest, CompareWithTFmini) { m.Invoke(); std::unique_ptr output_buffer(new float[input_size * batch_size]); - auto input_shape = RuntimeShape({batch_size, 1, 1, input_size}); - tflite::reference_ops::LogSoftmax(input_buffer, input_shape, - output_buffer.get(), input_shape); + static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size}, + {1, 0, 0, input_size}}; + tflite::reference_ops::LogSoftmax(input_buffer, input_dims, + output_buffer.get(), input_dims); std::vector expected; expected.insert(expected.end(), output_buffer.get(), diff --git a/tensorflow/contrib/lite/kernels/pooling.cc b/tensorflow/contrib/lite/kernels/pooling.cc index 41771e60bc6..311e9b83997 100644 --- a/tensorflow/contrib/lite/kernels/pooling.cc +++ b/tensorflow/contrib/lite/kernels/pooling.cc @@ -126,13 +126,12 @@ void AverageEvalFloat(TfLiteContext* context, TfLiteNode* node, float activation_min, activation_max; CalculateActivationRangeFloat(params->activation, &activation_min, &activation_max); -#define TF_LITE_AVERAGE_POOL(type) \ - type::AveragePool(GetTensorData(input), GetTensorShape(input), \ - params->stride_width, params->stride_height, \ - data->padding.width, data->padding.height, \ - params->filter_width, params->filter_height, \ - activation_min, activation_max, \ - GetTensorData(output), GetTensorShape(output)) +#define TF_LITE_AVERAGE_POOL(type) \ + type::AveragePool( \ + GetTensorData(input), GetTensorDims(input), params->stride_width, \ + params->stride_height, data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), GetTensorDims(output)) if (kernel_type == kReference) { TF_LITE_AVERAGE_POOL(reference_ops); } else { @@ -149,13 +148,13 @@ void AverageEvalQuantized(TfLiteContext* context, TfLiteNode* node, int32_t activation_max; CalculateActivationRangeUint8(params->activation, output, &activation_min, &activation_max); -#define TF_LITE_AVERAGE_POOL(type) \ - type::AveragePool(GetTensorData(input), GetTensorShape(input), \ - params->stride_width, params->stride_height, \ - data->padding.width, data->padding.height, \ - params->filter_width, params->filter_height, \ - activation_min, activation_max, \ - GetTensorData(output), GetTensorShape(output)) +#define TF_LITE_AVERAGE_POOL(type) \ + type::AveragePool(GetTensorData(input), GetTensorDims(input), \ + params->stride_width, params->stride_height, \ + data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, \ + activation_min, activation_max, \ + GetTensorData(output), GetTensorDims(output)) if (kernel_type == kReference) { TF_LITE_AVERAGE_POOL(reference_ops); } else { @@ -171,13 +170,12 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, float activation_min, activation_max; CalculateActivationRangeFloat(params->activation, &activation_min, &activation_max); -#define TF_LITE_MAX_POOL(type) \ - type::MaxPool(GetTensorData(input), GetTensorShape(input), \ - params->stride_width, params->stride_height, \ - data->padding.width, data->padding.height, \ - params->filter_width, params->filter_height, activation_min, \ - activation_max, GetTensorData(output), \ - GetTensorShape(output)) +#define TF_LITE_MAX_POOL(type) \ + type::MaxPool( \ + GetTensorData(input), GetTensorDims(input), params->stride_width, \ + params->stride_height, data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), GetTensorDims(output)) if (kernel_type == kReference) { TF_LITE_MAX_POOL(reference_ops); } else { @@ -195,12 +193,12 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, CalculateActivationRangeUint8(params->activation, output, &activation_min, &activation_max); #define TF_LITE_MAX_POOL(type) \ - type::MaxPool(GetTensorData(input), GetTensorShape(input), \ + type::MaxPool(GetTensorData(input), GetTensorDims(input), \ params->stride_width, params->stride_height, \ data->padding.width, data->padding.height, \ params->filter_width, params->filter_height, activation_min, \ activation_max, GetTensorData(output), \ - GetTensorShape(output)) + GetTensorDims(output)) if (kernel_type == kReference) { TF_LITE_MAX_POOL(reference_ops); } else { @@ -216,13 +214,12 @@ void L2EvalFloat(TfLiteContext* context, TfLiteNode* node, float activation_min, activation_max; CalculateActivationRangeFloat(params->activation, &activation_min, &activation_max); -#define TF_LITE_L2_POOL(type) \ - type::L2Pool(GetTensorData(input), GetTensorShape(input), \ - params->stride_width, params->stride_height, \ - data->padding.width, data->padding.height, \ - params->filter_width, params->filter_height, activation_min, \ - activation_max, GetTensorData(output), \ - GetTensorShape(output)) +#define TF_LITE_L2_POOL(type) \ + type::L2Pool( \ + GetTensorData(input), GetTensorDims(input), params->stride_width, \ + params->stride_height, data->padding.width, data->padding.height, \ + params->filter_width, params->filter_height, activation_min, \ + activation_max, GetTensorData(output), GetTensorDims(output)) if (kernel_type == kReference) { TF_LITE_L2_POOL(reference_ops); } else { diff --git a/tensorflow/contrib/lite/kernels/softmax_test.cc b/tensorflow/contrib/lite/kernels/softmax_test.cc index 727822f6bea..6c5338ff0fd 100644 --- a/tensorflow/contrib/lite/kernels/softmax_test.cc +++ b/tensorflow/contrib/lite/kernels/softmax_test.cc @@ -92,9 +92,10 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaEq1) { m.Invoke(); std::unique_ptr output_buffer(new float[input_size * batch_size]); - auto input_shape = RuntimeShape({batch_size, 1, 1, input_size}); - tflite::reference_ops::Softmax(input_buffer, input_shape, beta, - output_buffer.get(), input_shape); + static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size}, + {1, 0, 0, input_size}}; + tflite::reference_ops::Softmax(input_buffer, input_dims, beta, + output_buffer.get(), input_dims); std::vector expected; expected.insert(expected.end(), output_buffer.get(), @@ -119,9 +120,10 @@ TEST(SoftmaxOpTest, CompareWithTFminiBetaNotEq1) { m.Invoke(); std::unique_ptr output_buffer(new float[input_size * batch_size]); - auto input_shape = RuntimeShape({batch_size, 1, 1, input_size}); - tflite::reference_ops::Softmax(input_buffer, input_shape, beta, - output_buffer.get(), input_shape); + static tflite::Dims<4> input_dims = {{input_size, 1, 1, batch_size}, + {1, 0, 0, input_size}}; + tflite::reference_ops::Softmax(input_buffer, input_dims, beta, + output_buffer.get(), input_dims); std::vector expected; expected.insert(expected.end(), output_buffer.get(),