target/arm: Convert PMUL.8 to gvec
The gvec form will be needed for implementing SVE2. Extend the implementation to operate on uint64_t instead of uint32_t. Use a counted inner loop instead of terminating when op1 goes to zero, looking toward the required implementation for ARMv8.4-DIT. Tested-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200216214232.4230-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
87b74e8b6e
commit
a21bb78e58
@ -342,7 +342,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
|
|||||||
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
|
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
|
||||||
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
|
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
|
||||||
DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
|
DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
|
||||||
DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
|
|
||||||
DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
|
DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
|
||||||
|
|
||||||
DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
|
DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
|
||||||
@ -696,6 +695,8 @@ DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
|||||||
DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
#ifdef TARGET_AARCH64
|
#ifdef TARGET_AARCH64
|
||||||
#include "helper-a64.h"
|
#include "helper-a64.h"
|
||||||
#include "helper-sve.h"
|
#include "helper-sve.h"
|
||||||
|
@ -1131,28 +1131,6 @@ NEON_VOP(mul_u16, neon_u16, 2)
|
|||||||
|
|
||||||
/* Polynomial multiplication is like integer multiplication except the
|
/* Polynomial multiplication is like integer multiplication except the
|
||||||
partial products are XORed, not added. */
|
partial products are XORed, not added. */
|
||||||
uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t mask;
|
|
||||||
uint32_t result;
|
|
||||||
result = 0;
|
|
||||||
while (op1) {
|
|
||||||
mask = 0;
|
|
||||||
if (op1 & 1)
|
|
||||||
mask |= 0xff;
|
|
||||||
if (op1 & (1 << 8))
|
|
||||||
mask |= (0xff << 8);
|
|
||||||
if (op1 & (1 << 16))
|
|
||||||
mask |= (0xff << 16);
|
|
||||||
if (op1 & (1 << 24))
|
|
||||||
mask |= (0xff << 24);
|
|
||||||
result ^= op2 & mask;
|
|
||||||
op1 = (op1 >> 1) & 0x7f7f7f7f;
|
|
||||||
op2 = (op2 << 1) & 0xfefefefe;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
|
uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
|
||||||
{
|
{
|
||||||
uint64_t result = 0;
|
uint64_t result = 0;
|
||||||
|
@ -11169,9 +11169,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
|||||||
case 0x13: /* MUL, PMUL */
|
case 0x13: /* MUL, PMUL */
|
||||||
if (!u) { /* MUL */
|
if (!u) { /* MUL */
|
||||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
|
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
|
||||||
return;
|
} else { /* PMUL */
|
||||||
|
gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
|
||||||
}
|
}
|
||||||
break;
|
return;
|
||||||
case 0x12: /* MLA, MLS */
|
case 0x12: /* MLA, MLS */
|
||||||
if (u) {
|
if (u) {
|
||||||
gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
|
gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
|
||||||
@ -11301,11 +11302,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
|||||||
genfn = fns[size][u];
|
genfn = fns[size][u];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 0x13: /* MUL, PMUL */
|
|
||||||
assert(u); /* PMUL */
|
|
||||||
assert(size == 0);
|
|
||||||
genfn = gen_helper_neon_mul_p8;
|
|
||||||
break;
|
|
||||||
case 0x16: /* SQDMULH, SQRDMULH */
|
case 0x16: /* SQDMULH, SQRDMULH */
|
||||||
{
|
{
|
||||||
static NeonGenTwoOpEnvFn * const fns[2][2] = {
|
static NeonGenTwoOpEnvFn * const fns[2][2] = {
|
||||||
|
@ -5007,16 +5007,17 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
|||||||
|
|
||||||
case NEON_3R_VMUL: /* VMUL */
|
case NEON_3R_VMUL: /* VMUL */
|
||||||
if (u) {
|
if (u) {
|
||||||
/* Polynomial case allows only P8 and is handled below. */
|
/* Polynomial case allows only P8. */
|
||||||
if (size != 0) {
|
if (size != 0) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
|
||||||
|
0, gen_helper_gvec_pmul_b);
|
||||||
} else {
|
} else {
|
||||||
tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
|
tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
|
||||||
vec_size, vec_size);
|
vec_size, vec_size);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
break;
|
return 0;
|
||||||
|
|
||||||
case NEON_3R_VML: /* VMLA, VMLS */
|
case NEON_3R_VML: /* VMLA, VMLS */
|
||||||
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
|
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
|
||||||
@ -5206,10 +5207,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
|||||||
tmp2 = neon_load_reg(rd, pass);
|
tmp2 = neon_load_reg(rd, pass);
|
||||||
gen_neon_add(size, tmp, tmp2);
|
gen_neon_add(size, tmp, tmp2);
|
||||||
break;
|
break;
|
||||||
case NEON_3R_VMUL:
|
|
||||||
/* VMUL.P8; other cases already eliminated. */
|
|
||||||
gen_helper_neon_mul_p8(tmp, tmp, tmp2);
|
|
||||||
break;
|
|
||||||
case NEON_3R_VPMAX:
|
case NEON_3R_VPMAX:
|
||||||
GEN_NEON_INTEGER_OP(pmax);
|
GEN_NEON_INTEGER_OP(pmax);
|
||||||
break;
|
break;
|
||||||
|
@ -1134,3 +1134,33 @@ void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 8x8->8 polynomial multiply.
|
||||||
|
*
|
||||||
|
* Polynomial multiplication is like integer multiplication except the
|
||||||
|
* partial products are XORed, not added.
|
||||||
|
*
|
||||||
|
* TODO: expose this as a generic vector operation, as it is a common
|
||||||
|
* crypto building block.
|
||||||
|
*/
|
||||||
|
void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||||
|
{
|
||||||
|
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
||||||
|
uint64_t *d = vd, *n = vn, *m = vm;
|
||||||
|
|
||||||
|
for (i = 0; i < opr_sz / 8; ++i) {
|
||||||
|
uint64_t nn = n[i];
|
||||||
|
uint64_t mm = m[i];
|
||||||
|
uint64_t rr = 0;
|
||||||
|
|
||||||
|
for (j = 0; j < 8; ++j) {
|
||||||
|
uint64_t mask = (nn & 0x0101010101010101ull) * 0xff;
|
||||||
|
rr ^= mm & mask;
|
||||||
|
mm = (mm << 1) & 0xfefefefefefefefeull;
|
||||||
|
nn >>= 1;
|
||||||
|
}
|
||||||
|
d[i] = rr;
|
||||||
|
}
|
||||||
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user