target/arm: Convert PMULL.64 to gvec
The gvec form will be needed for implementing SVE2. Tested-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200216214232.4230-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
a21bb78e58
commit
b9ed510e46
@ -562,9 +562,6 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
|
||||
DEF_HELPER_2(dc_zva, void, env, i64)
|
||||
|
||||
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG,
|
||||
@ -696,6 +693,7 @@ DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
#ifdef TARGET_AARCH64
|
||||
#include "helper-a64.h"
|
||||
|
@ -2152,33 +2152,3 @@ void HELPER(neon_zip16)(void *vd, void *vm)
|
||||
rm[0] = m0;
|
||||
rd[0] = d0;
|
||||
}
|
||||
|
||||
/* Helper function for 64 bit polynomial multiply case:
|
||||
* perform PolynomialMult(op1, op2) and return either the top or
|
||||
* bottom half of the 128 bit result.
|
||||
*/
|
||||
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
|
||||
{
|
||||
int bitnum;
|
||||
uint64_t res = 0;
|
||||
|
||||
for (bitnum = 0; bitnum < 64; bitnum++) {
|
||||
if (op1 & (1ULL << bitnum)) {
|
||||
res ^= op2 << bitnum;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
|
||||
{
|
||||
int bitnum;
|
||||
uint64_t res = 0;
|
||||
|
||||
/* bit 0 of op1 can't influence the high 64 bits at all */
|
||||
for (bitnum = 1; bitnum < 64; bitnum++) {
|
||||
if (op1 & (1ULL << bitnum)) {
|
||||
res ^= op2 >> (64 - bitnum);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
@ -10657,30 +10657,6 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
|
||||
clear_vec_high(s, is_q, rd);
|
||||
}
|
||||
|
||||
static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
|
||||
{
|
||||
/* PMULL of 64 x 64 -> 128 is an odd special case because it
|
||||
* is the only three-reg-diff instruction which produces a
|
||||
* 128-bit wide result from a single operation. However since
|
||||
* it's possible to calculate the two halves more or less
|
||||
* separately we just use two helper calls.
|
||||
*/
|
||||
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_res = tcg_temp_new_i64();
|
||||
|
||||
read_vec_element(s, tcg_op1, rn, is_q, MO_64);
|
||||
read_vec_element(s, tcg_op2, rm, is_q, MO_64);
|
||||
gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
|
||||
write_vec_element(s, tcg_res, rd, 0, MO_64);
|
||||
gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
|
||||
write_vec_element(s, tcg_res, rd, 1, MO_64);
|
||||
|
||||
tcg_temp_free_i64(tcg_op1);
|
||||
tcg_temp_free_i64(tcg_op2);
|
||||
tcg_temp_free_i64(tcg_res);
|
||||
}
|
||||
|
||||
/* AdvSIMD three different
|
||||
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
|
||||
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
|
||||
@ -10745,7 +10721,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
|
||||
if (!fp_access_check(s)) {
|
||||
return;
|
||||
}
|
||||
handle_pmull_64(s, is_q, rd, rn, rm);
|
||||
/* The Q field specifies lo/hi half input for this insn. */
|
||||
gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
|
||||
gen_helper_gvec_pmull_q);
|
||||
return;
|
||||
}
|
||||
goto is_widening;
|
||||
|
@ -5870,23 +5870,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
* outside the loop below as it only performs a single pass.
|
||||
*/
|
||||
if (op == 14 && size == 2) {
|
||||
TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
|
||||
|
||||
if (!dc_isar_feature(aa32_pmull, s)) {
|
||||
return 1;
|
||||
}
|
||||
tcg_rn = tcg_temp_new_i64();
|
||||
tcg_rm = tcg_temp_new_i64();
|
||||
tcg_rd = tcg_temp_new_i64();
|
||||
neon_load_reg64(tcg_rn, rn);
|
||||
neon_load_reg64(tcg_rm, rm);
|
||||
gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
|
||||
neon_store_reg64(tcg_rd, rd);
|
||||
gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
|
||||
neon_store_reg64(tcg_rd, rd + 1);
|
||||
tcg_temp_free_i64(tcg_rn);
|
||||
tcg_temp_free_i64(tcg_rm);
|
||||
tcg_temp_free_i64(tcg_rd);
|
||||
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
|
||||
0, gen_helper_gvec_pmull_q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1164,3 +1164,36 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
/*
|
||||
* 64x64->128 polynomial multiply.
|
||||
* Because of the lanes are not accessed in strict columns,
|
||||
* this probably cannot be turned into a generic helper.
|
||||
*/
|
||||
void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
||||
intptr_t hi = simd_data(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < opr_sz / 8; i += 2) {
|
||||
uint64_t nn = n[i + hi];
|
||||
uint64_t mm = m[i + hi];
|
||||
uint64_t rhi = 0;
|
||||
uint64_t rlo = 0;
|
||||
|
||||
/* Bit 0 can only influence the low 64-bit result. */
|
||||
if (nn & 1) {
|
||||
rlo = mm;
|
||||
}
|
||||
|
||||
for (j = 1; j < 64; ++j) {
|
||||
uint64_t mask = -((nn >> j) & 1);
|
||||
rlo ^= (mm << j) & mask;
|
||||
rhi ^= (mm >> (64 - j)) & mask;
|
||||
}
|
||||
d[i] = rlo;
|
||||
d[i + 1] = rhi;
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user