target/arm: Implement SVE Partition Break Group

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20180613015641.5667-14-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2018-06-15 14:57:15 +01:00 committed by Peter Maydell
parent 38cadeba0d
commit 35da316f5e
4 changed files with 391 additions and 0 deletions

View File

@ -658,3 +658,21 @@ DEF_HELPER_FLAGS_5(sve_orn_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_nor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_nand_pppp, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_brkpa, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_brkpb, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_brkpas, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_brkpbs, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brka_z, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkb_z, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brka_m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkb_m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkas_z, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkbs_z, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkas_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkbs_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)

View File

@ -59,6 +59,7 @@
&rri_esz rd rn imm esz
&rrr_esz rd rn rm esz
&rpr_esz rd pg rn esz
&rpr_s rd pg rn s
&rprr_s rd pg rn rm s
&rprr_esz rd pg rn rm esz
&rprrr_esz rd pg rn rm ra esz
@ -78,6 +79,9 @@
@pd_pn ........ esz:2 .. .... ....... rn:4 . rd:4 &rr_esz
@rd_rn ........ esz:2 ...... ...... rn:5 rd:5 &rr_esz
# Two operand with governing predicate, flags setting
@pd_pg_pn_s ........ . s:1 ...... .. pg:4 . rn:4 . rd:4 &rpr_s
# Three operand with unused vector element size
@rd_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 &rrr_esz esz=0
@ -560,6 +564,21 @@ PFIRST 00100101 01 011 000 11000 00 .... 0 .... @pd_pn_e0
# SVE predicate next active
PNEXT 00100101 .. 011 001 11000 10 .... 0 .... @pd_pn
### SVE Partition Break Group
# SVE propagate break from previous partition
BRKPA 00100101 0. 00 .... 11 .... 0 .... 0 .... @pd_pg_pn_pm_s
BRKPB 00100101 0. 00 .... 11 .... 0 .... 1 .... @pd_pg_pn_pm_s
# SVE partition break condition
BRKA_z 00100101 0. 01000001 .... 0 .... 0 .... @pd_pg_pn_s
BRKB_z 00100101 1. 01000001 .... 0 .... 0 .... @pd_pg_pn_s
BRKA_m 00100101 0. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
BRKB_m 00100101 1. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
# SVE propagate break to next partition
BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s
### SVE Memory - 32-bit Gather and Unsized Contiguous Group
# SVE load predicate register

View File

@ -2476,3 +2476,251 @@ DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
#undef DO_CMP_PPZI_S
#undef DO_CMP_PPZI_D
#undef DO_CMP_PPZI
/* Similar to the ARM LastActive pseudocode function. */
static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
{
intptr_t i;
for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) {
uint64_t pg = *(uint64_t *)(vg + i);
if (pg) {
return (pow2floor(pg) & *(uint64_t *)(vd + i)) != 0;
}
}
return 0;
}
/* Compute a mask into RETB that is true for all G, up to and including
* (if after) or excluding (if !after) the first G & N.
* Return true if BRK found.
*/
static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
bool brk, bool after)
{
uint64_t b;
if (brk) {
b = 0;
} else if ((g & n) == 0) {
/* For all G, no N are set; break not found. */
b = g;
} else {
/* Break somewhere in N. Locate it. */
b = g & n; /* guard true, pred true */
b = b & -b; /* first such */
if (after) {
b = b | (b - 1); /* break after same */
} else {
b = b - 1; /* break before same */
}
brk = true;
}
*retb = b;
return brk;
}
/* Compute a zeroing BRK. */
static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
intptr_t oprsz, bool after)
{
bool brk = false;
intptr_t i;
for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
uint64_t this_b, this_g = g[i];
brk = compute_brk(&this_b, n[i], this_g, brk, after);
d[i] = this_b & this_g;
}
}
/* Likewise, but also compute flags. */
static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
intptr_t oprsz, bool after)
{
uint32_t flags = PREDTEST_INIT;
bool brk = false;
intptr_t i;
for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
uint64_t this_b, this_d, this_g = g[i];
brk = compute_brk(&this_b, n[i], this_g, brk, after);
d[i] = this_d = this_b & this_g;
flags = iter_predtest_fwd(this_d, this_g, flags);
}
return flags;
}
/* Compute a merging BRK. */
static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
intptr_t oprsz, bool after)
{
bool brk = false;
intptr_t i;
for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
uint64_t this_b, this_g = g[i];
brk = compute_brk(&this_b, n[i], this_g, brk, after);
d[i] = (this_b & this_g) | (d[i] & ~this_g);
}
}
/* Likewise, but also compute flags. */
static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g,
intptr_t oprsz, bool after)
{
uint32_t flags = PREDTEST_INIT;
bool brk = false;
intptr_t i;
for (i = 0; i < oprsz / 8; ++i) {
uint64_t this_b, this_d = d[i], this_g = g[i];
brk = compute_brk(&this_b, n[i], this_g, brk, after);
d[i] = this_d = (this_b & this_g) | (this_d & ~this_g);
flags = iter_predtest_fwd(this_d, this_g, flags);
}
return flags;
}
static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
{
/* It is quicker to zero the whole predicate than loop on OPRSZ.
* The compiler should turn this into 4 64-bit integer stores.
*/
memset(d, 0, sizeof(ARMPredicateReg));
return PREDTEST_INIT;
}
void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
if (last_active_pred(vn, vg, oprsz)) {
compute_brk_z(vd, vm, vg, oprsz, true);
} else {
do_zero(vd, oprsz);
}
}
uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
if (last_active_pred(vn, vg, oprsz)) {
return compute_brks_z(vd, vm, vg, oprsz, true);
} else {
return do_zero(vd, oprsz);
}
}
void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
if (last_active_pred(vn, vg, oprsz)) {
compute_brk_z(vd, vm, vg, oprsz, false);
} else {
do_zero(vd, oprsz);
}
}
uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
if (last_active_pred(vn, vg, oprsz)) {
return compute_brks_z(vd, vm, vg, oprsz, false);
} else {
return do_zero(vd, oprsz);
}
}
void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
compute_brk_z(vd, vn, vg, oprsz, true);
}
uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
return compute_brks_z(vd, vn, vg, oprsz, true);
}
void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
compute_brk_z(vd, vn, vg, oprsz, false);
}
uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
return compute_brks_z(vd, vn, vg, oprsz, false);
}
void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
compute_brk_m(vd, vn, vg, oprsz, true);
}
uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
return compute_brks_m(vd, vn, vg, oprsz, true);
}
void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
compute_brk_m(vd, vn, vg, oprsz, false);
}
uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
return compute_brks_m(vd, vn, vg, oprsz, false);
}
void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
if (!last_active_pred(vn, vg, oprsz)) {
do_zero(vd, oprsz);
}
}
/* As if PredTest(Ones(PL), D, esz). */
static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
uint64_t esz_mask)
{
uint32_t flags = PREDTEST_INIT;
intptr_t i;
for (i = 0; i < oprsz / 8; i++) {
flags = iter_predtest_fwd(d->p[i], esz_mask, flags);
}
if (oprsz & 7) {
uint64_t mask = ~(-1ULL << (8 * (oprsz & 7)));
flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags);
}
return flags;
}
uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
{
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
if (last_active_pred(vn, vg, oprsz)) {
return predtest_ones(vd, oprsz, -1);
} else {
return do_zero(vd, oprsz);
}
}

View File

@ -2853,6 +2853,112 @@ DO_PPZI(CMPLS, cmpls)
#undef DO_PPZI
/*
*** SVE Partition Break Group
*/
static bool do_brk3(DisasContext *s, arg_rprr_s *a,
gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
{
if (!sve_access_check(s)) {
return true;
}
unsigned vsz = pred_full_reg_size(s);
/* Predicate sizes may be smaller and cannot use simd_desc. */
TCGv_ptr d = tcg_temp_new_ptr();
TCGv_ptr n = tcg_temp_new_ptr();
TCGv_ptr m = tcg_temp_new_ptr();
TCGv_ptr g = tcg_temp_new_ptr();
TCGv_i32 t = tcg_const_i32(vsz - 2);
tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
if (a->s) {
fn_s(t, d, n, m, g, t);
do_pred_flags(t);
} else {
fn(d, n, m, g, t);
}
tcg_temp_free_ptr(d);
tcg_temp_free_ptr(n);
tcg_temp_free_ptr(m);
tcg_temp_free_ptr(g);
tcg_temp_free_i32(t);
return true;
}
static bool do_brk2(DisasContext *s, arg_rpr_s *a,
gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
{
if (!sve_access_check(s)) {
return true;
}
unsigned vsz = pred_full_reg_size(s);
/* Predicate sizes may be smaller and cannot use simd_desc. */
TCGv_ptr d = tcg_temp_new_ptr();
TCGv_ptr n = tcg_temp_new_ptr();
TCGv_ptr g = tcg_temp_new_ptr();
TCGv_i32 t = tcg_const_i32(vsz - 2);
tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
if (a->s) {
fn_s(t, d, n, g, t);
do_pred_flags(t);
} else {
fn(d, n, g, t);
}
tcg_temp_free_ptr(d);
tcg_temp_free_ptr(n);
tcg_temp_free_ptr(g);
tcg_temp_free_i32(t);
return true;
}
static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
{
return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
}
static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
{
return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
}
static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
{
return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
}
static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
{
return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
}
static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
{
return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
}
static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
{
return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
}
static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
{
return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
}
/*
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
*/