tcg/s390x: Implement ctpop operation

There is an older form that produces per-byte results,
and a newer form that produces per-register results.

Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-02-24 00:43:18 +00:00
parent bfff851842
commit 29a5ea738a
2 changed files with 38 additions and 2 deletions

View File

@ -206,6 +206,7 @@ typedef enum S390Opcode {
RRFc_LOCR = 0xb9f2,
RRFc_LOCGR = 0xb9e2,
RRFc_POPCNT = 0xb9e1,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@ -1435,6 +1436,32 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
}
static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
/* With MIE3, and bit 0 of m4 set, we get the complete result. */
if (HAVE_FACILITY(MISC_INSN_EXT3)) {
if (type == TCG_TYPE_I32) {
tgen_ext32u(s, dest, src);
src = dest;
}
tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
return;
}
/* Without MIE3, each byte gets the count of bits for the byte. */
tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
/* Multiply to sum each byte at the top of the word. */
if (type == TCG_TYPE_I32) {
tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
}
}
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@ -2584,6 +2611,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
break;
case INDEX_op_ctpop_i32:
tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
break;
case INDEX_op_ctpop_i64:
tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
break;
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@ -3146,6 +3180,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_extu_i32_i64:
case INDEX_op_extract_i32:
case INDEX_op_extract_i64:
case INDEX_op_ctpop_i32:
case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:

View File

@ -91,7 +91,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 0
#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_ctpop_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_extract_i32 1
#define TCG_TARGET_HAS_sextract_i32 0
@ -128,7 +128,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 1
#define TCG_TARGET_HAS_ctz_i64 0
#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_ctpop_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_extract_i64 1
#define TCG_TARGET_HAS_sextract_i64 0