diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h index 32b1b67fac0..352f2289a73 100644 --- a/tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h +++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h @@ -365,62 +365,6 @@ inline void FullyConnectedAsGEMV( } #endif // USE_NEON -struct GemmlowpOutputPipeline { - typedef gemmlowp::VectorMap - ColVectorMap; - typedef std::tuple, - gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, - gemmlowp::OutputStageClamp, - gemmlowp::OutputStageSaturatingCastToInt8> - Pipeline; - static Pipeline MakeExp(const int32* bias_data, int output_rows, - int32 output_offset, int32 output_multiplier, - int output_left_shift, int32 output_activation_min, - int32 output_activation_max) { - ColVectorMap bias_vector(bias_data, output_rows); - gemmlowp::OutputStageBiasAddition bias_addition_stage; - bias_addition_stage.bias_vector = bias_vector; - gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent quantize_down_stage; - quantize_down_stage.result_offset_after_shift = output_offset; - quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; - quantize_down_stage.result_exponent = output_left_shift; - gemmlowp::OutputStageClamp clamp_stage; - clamp_stage.min = output_activation_min; - clamp_stage.max = output_activation_max; - gemmlowp::OutputStageSaturatingCastToInt8 saturating_cast_stage; - return std::make_tuple(bias_addition_stage, quantize_down_stage, - clamp_stage, saturating_cast_stage); - } -}; - -struct GemmlowpOutputPipelineInt8 { - typedef gemmlowp::VectorMap - ColVectorMap; - typedef std::tuple, - gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, - gemmlowp::OutputStageClamp, - gemmlowp::OutputStageSaturatingCastToInt8> - Pipeline; - static Pipeline MakeExp(const int32* bias_data, int output_rows, - int32 output_offset, int32 output_multiplier, - int output_left_shift, int32 output_activation_min, - int32 output_activation_max) { - ColVectorMap bias_vector(bias_data, output_rows); - gemmlowp::OutputStageBiasAddition bias_addition_stage; - bias_addition_stage.bias_vector = bias_vector; - gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent quantize_down_stage; - quantize_down_stage.result_offset_after_shift = output_offset; - quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; - quantize_down_stage.result_exponent = output_left_shift; - gemmlowp::OutputStageClamp clamp_stage; - clamp_stage.min = output_activation_min; - clamp_stage.max = output_activation_max; - gemmlowp::OutputStageSaturatingCastToInt8 saturating_cast_stage; - return std::make_tuple(bias_addition_stage, quantize_down_stage, - clamp_stage, saturating_cast_stage); - } -}; - inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const int8* input_data, const RuntimeShape& filter_shape, diff --git a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h index 6ae6bb3b764..3300c890121 100644 --- a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -598,6 +598,62 @@ void FullyConnected(const float* input_data, const Dims<4>& input_dims, output_data, output_dims); } +struct GemmlowpOutputPipeline { + typedef gemmlowp::VectorMap + ColVectorMap; + typedef std::tuple, + gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, + gemmlowp::OutputStageClamp, + gemmlowp::OutputStageSaturatingCastToUint8> + Pipeline; + static Pipeline MakeExp(const int32* bias_data, int output_rows, + int32 output_offset, int32 output_multiplier, + int output_left_shift, int32 output_activation_min, + int32 output_activation_max) { + ColVectorMap bias_vector(bias_data, output_rows); + gemmlowp::OutputStageBiasAddition bias_addition_stage; + bias_addition_stage.bias_vector = bias_vector; + gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent quantize_down_stage; + quantize_down_stage.result_offset_after_shift = output_offset; + quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; + quantize_down_stage.result_exponent = output_left_shift; + gemmlowp::OutputStageClamp clamp_stage; + clamp_stage.min = output_activation_min; + clamp_stage.max = output_activation_max; + gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; + return std::make_tuple(bias_addition_stage, quantize_down_stage, + clamp_stage, saturating_cast_stage); + } +}; + +struct GemmlowpOutputPipelineInt8 { + typedef gemmlowp::VectorMap + ColVectorMap; + typedef std::tuple, + gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, + gemmlowp::OutputStageClamp, + gemmlowp::OutputStageSaturatingCastToInt8> + Pipeline; + static Pipeline MakeExp(const int32* bias_data, int output_rows, + int32 output_offset, int32 output_multiplier, + int output_left_shift, int32 output_activation_min, + int32 output_activation_max) { + ColVectorMap bias_vector(bias_data, output_rows); + gemmlowp::OutputStageBiasAddition bias_addition_stage; + bias_addition_stage.bias_vector = bias_vector; + gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent quantize_down_stage; + quantize_down_stage.result_offset_after_shift = output_offset; + quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; + quantize_down_stage.result_exponent = output_left_shift; + gemmlowp::OutputStageClamp clamp_stage; + clamp_stage.min = output_activation_min; + clamp_stage.max = output_activation_max; + gemmlowp::OutputStageSaturatingCastToInt8 saturating_cast_stage; + return std::make_tuple(bias_addition_stage, quantize_down_stage, + clamp_stage, saturating_cast_stage); + } +}; + #ifdef USE_NEON struct LegacyFullyConnectedAsGEMVWorkerTask : public gemmlowp::Task { LegacyFullyConnectedAsGEMVWorkerTask( diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 0bf85acdfbb..a4afcee5e18 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -1148,34 +1148,6 @@ inline void FullyConnectedAsGEMV( } #endif // USE_NEON -struct GemmlowpOutputPipeline { - typedef gemmlowp::VectorMap - ColVectorMap; - typedef std::tuple, - gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent, - gemmlowp::OutputStageClamp, - gemmlowp::OutputStageSaturatingCastToUint8> - Pipeline; - static Pipeline MakeExp(const int32* bias_data, int output_rows, - int32 output_offset, int32 output_multiplier, - int output_left_shift, int32 output_activation_min, - int32 output_activation_max) { - ColVectorMap bias_vector(bias_data, output_rows); - gemmlowp::OutputStageBiasAddition bias_addition_stage; - bias_addition_stage.bias_vector = bias_vector; - gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent quantize_down_stage; - quantize_down_stage.result_offset_after_shift = output_offset; - quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; - quantize_down_stage.result_exponent = output_left_shift; - gemmlowp::OutputStageClamp clamp_stage; - clamp_stage.min = output_activation_min; - clamp_stage.max = output_activation_max; - gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; - return std::make_tuple(bias_addition_stage, quantize_down_stage, - clamp_stage, saturating_cast_stage); - } -}; - inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape,