target/arm: Use clmul_16* routines
Use generic routines for 16-bit carry-less multiply. Remove our local version of pmull_w. Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
cf1b2cab83
commit
c6f0dcb1fd
@ -985,14 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
|
||||
* Polynomial multiply. We can always do this generating 64 bits
|
||||
* of the result at a time, so we don't need to use DO_2OP_L.
|
||||
*/
|
||||
#define VMULLPW_MASK 0x0000ffff0000ffffULL
|
||||
#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK)
|
||||
#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16)
|
||||
|
||||
DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even)
|
||||
DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd)
|
||||
DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW)
|
||||
DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW)
|
||||
DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even)
|
||||
DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd)
|
||||
|
||||
/*
|
||||
* Because the computation type is at least twice as large as required,
|
||||
|
@ -2029,19 +2029,6 @@ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
uint64_t pmull_w(uint64_t op1, uint64_t op2)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff;
|
||||
result ^= op2 & mask;
|
||||
op1 >>= 1;
|
||||
op2 <<= 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
int hi = simd_data(desc);
|
||||
|
@ -219,12 +219,6 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
|
||||
int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
|
||||
int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
|
||||
|
||||
/*
|
||||
* 16 x 16 -> 32 vector polynomial multiply where the inputs are
|
||||
* in the low 16 bits of each 32-bit element
|
||||
*/
|
||||
uint64_t pmull_w(uint64_t op1, uint64_t op2);
|
||||
|
||||
/**
|
||||
* bfdotadd:
|
||||
* @sum: addend
|
||||
|
Loading…
Reference in New Issue
Block a user