Fix the kernel layout description for the float NEON kernels.
<Rowmajor,4,8> is the correct layout, is essentially the same layout as <Rowmajor,1,8> as we were declaring since they're both row-major and differ only in the number of block rows, but it still makes a difference on padding: the kernel expects input packed matrix data to be padded to the next multiple of 4 rows, and by incorrectly describing the kernel layout as <rowmajor,1,8>, we did not honor that. As we reflected the same incorrect layout description in pack.h, we were also using packing code that wrote padded data, but we were not allocating a suitably enlarged buffer, so my guess is that we were overrunning buffers, explaining the non-deterministic failures. Asan didn't see anything because this is assembly code, we would need valgrind for that. PiperOrigin-RevId: 247550630
This commit is contained in:
parent
4224db98ef
commit
8a8a109e56
@ -453,8 +453,8 @@ void KernelFloatNeonDotprodInOrder(const KernelParamsFloat<8, 8>& params);
|
||||
template <>
|
||||
struct Kernel<Path::kNeon, float, float, float, BasicSpec<float, float>> {
|
||||
Tuning tuning = Tuning::kAuto;
|
||||
using LhsLayout = FixedKernelLayout<Order::kRowMajor, 1, 8>;
|
||||
using RhsLayout = FixedKernelLayout<Order::kRowMajor, 1, 8>;
|
||||
using LhsLayout = FixedKernelLayout<Order::kRowMajor, 4, 8>;
|
||||
using RhsLayout = FixedKernelLayout<Order::kRowMajor, 4, 8>;
|
||||
explicit Kernel(Tuning tuning_) : tuning(tuning_) {}
|
||||
void Run(const PackedMatrix<float>& lhs, const PackedMatrix<float>& rhs,
|
||||
const BasicSpec<float, float>& spec, int start_row, int start_col,
|
||||
|
@ -262,7 +262,7 @@ void PackFloatNeonInOrder(const float* src_ptr0, const float* src_ptr1,
|
||||
float* packed_ptr, int start_col, int end_col);
|
||||
|
||||
template <>
|
||||
struct PackImpl<Path::kNeon, FixedKernelLayout<Order::kRowMajor, 1, 8>, float,
|
||||
struct PackImpl<Path::kNeon, FixedKernelLayout<Order::kRowMajor, 4, 8>, float,
|
||||
float, float> {
|
||||
static void Run(Tuning tuning, const Matrix<float>& src_matrix,
|
||||
PackedMatrix<float>* packed_matrix, int start_col,
|
||||
|
Loading…
Reference in New Issue
Block a user