Use ruy's newly curated default set of paths.
This: - on all CPU architecturesremoves the reference code path, which is never used (the need for a portable non-simd fallback is already filled by the 'standard c++' path). - on x86, with --define=tflite_with_ruy=true (non-default), this removes a couple of experimental paths, keeping only AVX2 and AVX512 paths. - on x86 for per-channel-quantized ops where ruy is unconditionally used (no alternativee implementation), this enables AVX2 and AVX512 paths. This should fix slowness issues for such per-channel-quantized ops on x86. PiperOrigin-RevId: 307861040 Change-Id: I4914522b4dc57748095efbcd61049bb3193bbd36
This commit is contained in:
parent
e853835634
commit
7eb1c830f7
@ -78,19 +78,8 @@ struct GemmImplUsingRuy {
|
||||
ruy::MulParams<AccumScalar, DstScalar> ruy_mul_params;
|
||||
MakeRuyMulParams(params, &ruy_mul_params);
|
||||
|
||||
// If Ruy is not selected intentionally (TFLITE_WITH_RUY not defined)
|
||||
// and GEMMLOWP_NEON is absent, we fall back to Ruy for some quantized
|
||||
// kernels. Some Ruy paths are still experimental, so we restrict to reference
|
||||
// code in that case.
|
||||
#if !defined(TFLITE_WITH_RUY) && !defined(GEMMLOWP_NEON)
|
||||
constexpr ruy::Path kRuyPath =
|
||||
ruy::Path::kReference | ruy::Path::kStandardCpp;
|
||||
#else
|
||||
constexpr ruy::Path kRuyPath = ruy::kAllPaths;
|
||||
#endif
|
||||
|
||||
ruy::Mul<kRuyPath>(ruy_lhs, ruy_rhs, ruy_mul_params, context->ruy_context(),
|
||||
&ruy_dst);
|
||||
ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, context->ruy_context(),
|
||||
&ruy_dst);
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user