cutils: Remove aarch64 buffer zero checking
The revised integer version is 4 times faster than the neon version on an AppliedMicro Mustang. Even with hand scheduling and additional unrolling I cannot make any neon version run as fast as the integer. Signed-off-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
5e33a87222
commit
2250d3a293
@ -200,21 +200,6 @@ static bool select_accel_fn(const void *buf, size_t len)
|
|||||||
return buffer_zero_int(buf, len);
|
return buffer_zero_int(buf, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(__aarch64__)
|
|
||||||
#include "arm_neon.h"
|
|
||||||
|
|
||||||
#define DO_NONZERO(X) (vgetq_lane_u64((X), 0) | vgetq_lane_u64((X), 1))
|
|
||||||
ACCEL_BUFFER_ZERO(buffer_zero_neon, 128, uint64x2_t, DO_NONZERO)
|
|
||||||
|
|
||||||
static bool select_accel_fn(const void *buf, size_t len)
|
|
||||||
{
|
|
||||||
uintptr_t ibuf = (uintptr_t)buf;
|
|
||||||
if (len % 128 == 0 && ibuf % sizeof(uint64x2_t) == 0) {
|
|
||||||
return buffer_zero_neon(buf, len);
|
|
||||||
}
|
|
||||||
return buffer_zero_int(buf, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define select_accel_fn buffer_zero_int
|
#define select_accel_fn buffer_zero_int
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user