Reorder do_constant_folding_cond test to satisfy valgrind.

Fix value of MAX_OPC_PARAM_IARGS.
 Add opcodes for vector nand, nor, eqv.
 Support vector nand, nor, eqv on PPC and S390X hosts.
 Support AVX512VL, AVX512BW, AVX512DQ, and AVX512VBMI2.
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmIiYXwdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8gPQf9EVo8KZUhF+GxLFYv
 5zYnJ3YSf2XLWu/30IQ5e8yJV/7mYKe7Rp8ibc1k+f4bE3KBVKv5RqLry2stuvEB
 9xMu3hZagNPlJB0aAgCscMZT1CdUg9PTUq/wD7vqBGkEXQhZXFxpuTWdYFqqyoQk
 U68zEUOpk9b2Otk0K68JlnqWqzCBS6mNzSYjE7T+4s7msuedz2txFRNUMeY75DaF
 cNJuOJVRmusQTJGEH8EI5l0xW1XpjK0Lp2yYUUTs1hfn+9ELuhk4DjsxvatKANGD
 2xI9UYosFkQaaAbxzb40KWSGBY8PhTxaz6cruaS07q2ELTP3joRKeifJF2/BhREb
 +pxmgw==
 =3qoH
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20220304' into staging

Reorder do_constant_folding_cond test to satisfy valgrind.
Fix value of MAX_OPC_PARAM_IARGS.
Add opcodes for vector nand, nor, eqv.
Support vector nand, nor, eqv on PPC and S390X hosts.
Support AVX512VL, AVX512BW, AVX512DQ, and AVX512VBMI2.

# gpg: Signature made Fri 04 Mar 2022 18:59:08 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20220304: (21 commits)
  tcg/i386: Implement bitsel for avx512
  tcg/i386: Implement more logical operations for avx512
  tcg/i386: Implement avx512 multiply
  tcg/i386: Implement avx512 min/max/abs
  tcg/i386: Expand scalar rotate with avx512 insns
  tcg/i386: Remove rotls_vec from tcg_target_op_def
  tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double
  tcg/i386: Support avx512vbmi2 vector shift-double instructions
  tcg/i386: Implement avx512 variable rotate
  tcg/i386: Implement avx512 immediate rotate
  tcg/i386: Implement avx512 immediate sari shift
  tcg/i386: Implement avx512 scalar shift
  tcg/i386: Implement avx512 variable shifts
  tcg/i386: Use tcg_can_emit_vec_op in expand_vec_cmp_noinv
  tcg/i386: Add tcg_out_evex_opc
  tcg/i386: Detect AVX512
  tcg/s390x: Implement vector NAND, NOR, EQV
  tcg/ppc: Implement vector NAND, NOR, EQV
  tcg: Add opcodes for vector nand, nor, eqv
  tcg: Set MAX_OPC_PARAM_IARGS to 7
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2022-03-05 14:43:19 +00:00
commit 2acf5e1d0e
17 changed files with 441 additions and 94 deletions

View File

@ -45,12 +45,26 @@
#ifndef bit_AVX2
#define bit_AVX2 (1 << 5)
#endif
#ifndef bit_AVX512F
#define bit_AVX512F (1 << 16)
#endif
#ifndef bit_BMI2
#define bit_BMI2 (1 << 8)
#endif
#ifndef bit_AVX512F
#define bit_AVX512F (1 << 16)
#endif
#ifndef bit_AVX512DQ
#define bit_AVX512DQ (1 << 17)
#endif
#ifndef bit_AVX512BW
#define bit_AVX512BW (1 << 30)
#endif
#ifndef bit_AVX512VL
#define bit_AVX512VL (1u << 31)
#endif
/* Leaf 7, %ecx */
#ifndef bit_AVX512VBMI2
#define bit_AVX512VBMI2 (1 << 6)
#endif
/* Leaf 0x80000001, %ecx */
#ifndef bit_LZCNT

View File

@ -245,6 +245,9 @@ DEF(or_vec, 1, 2, 0, IMPLVEC)
DEF(xor_vec, 1, 2, 0, IMPLVEC)
DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))

View File

@ -43,7 +43,7 @@
#else
#define MAX_OPC_PARAM_PER_ARG 1
#endif
#define MAX_OPC_PARAM_IARGS 6
#define MAX_OPC_PARAM_IARGS 7
#define MAX_OPC_PARAM_OARGS 1
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
@ -183,6 +183,9 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
#define TCG_TARGET_HAS_nand_vec 0
#define TCG_TARGET_HAS_nor_vec 0
#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0

View File

@ -131,6 +131,9 @@ typedef enum {
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec 1
#define TCG_TARGET_HAS_nand_vec 0
#define TCG_TARGET_HAS_nor_vec 0
#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1

View File

@ -130,6 +130,9 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec 1
#define TCG_TARGET_HAS_nand_vec 0
#define TCG_TARGET_HAS_nor_vec 0
#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1

View File

@ -45,6 +45,7 @@ C_O1_I2(r, r, rI)
C_O1_I2(x, x, x)
C_N1_I2(r, r, r)
C_N1_I2(r, r, rW)
C_O1_I3(x, 0, x, x)
C_O1_I3(x, x, x, x)
C_O1_I4(r, r, re, r, 0)
C_O1_I4(r, r, r, ri, ri)

View File

@ -171,6 +171,10 @@ bool have_bmi1;
bool have_popcnt;
bool have_avx1;
bool have_avx2;
bool have_avx512bw;
bool have_avx512dq;
bool have_avx512vbmi2;
bool have_avx512vl;
bool have_movbe;
#ifdef CONFIG_CPUID_H
@ -258,6 +262,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */
#define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */
#define P_VEXL 0x80000 /* Set VEX.L = 1 */
#define P_EVEX 0x100000 /* Requires EVEX encoding */
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
@ -308,6 +313,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
@ -334,15 +340,19 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMAXUB (0xde | P_EXT | P_DATA16)
#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16)
#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16)
#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16)
#define OPC_PMINSW (0xea | P_EXT | P_DATA16)
#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16)
#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMINUB (0xda | P_EXT | P_DATA16)
#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16)
#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16)
#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
@ -351,19 +361,21 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_POR (0xeb | P_EXT | P_DATA16)
#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16)
#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16)
#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16)
#define OPC_VPSRAQ (0x72 | P_EXT | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16)
#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16)
#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16)
@ -414,11 +426,29 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX)
#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX)
#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX)
#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VZEROUPPER (0x77 | P_EXT)
#define OPC_XCHG_ax_r32 (0x90)
@ -622,9 +652,57 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
tcg_out8(s, opc);
}
static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
int rm, int index)
{
/* The entire 4-byte evex prefix; with R' and V' set. */
uint32_t p = 0x08041062;
int mm, pp;
tcg_debug_assert(have_avx512vl);
/* EVEX.mm */
if (opc & P_EXT3A) {
mm = 3;
} else if (opc & P_EXT38) {
mm = 2;
} else if (opc & P_EXT) {
mm = 1;
} else {
g_assert_not_reached();
}
/* EVEX.pp */
if (opc & P_DATA16) {
pp = 1; /* 0x66 */
} else if (opc & P_SIMDF3) {
pp = 2; /* 0xf3 */
} else if (opc & P_SIMDF2) {
pp = 3; /* 0xf2 */
} else {
pp = 0;
}
p = deposit32(p, 8, 2, mm);
p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */
p = deposit32(p, 14, 1, (index & 8) == 0); /* EVEX.RXB.X */
p = deposit32(p, 15, 1, (r & 8) == 0); /* EVEX.RXB.R */
p = deposit32(p, 16, 2, pp);
p = deposit32(p, 19, 4, ~v);
p = deposit32(p, 23, 1, (opc & P_VEXW) != 0);
p = deposit32(p, 29, 2, (opc & P_VEXL) != 0);
tcg_out32(s, p);
tcg_out8(s, opc);
}
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
{
tcg_out_vex_opc(s, opc, r, v, rm, 0);
if (opc & P_EVEX) {
tcg_out_evex_opc(s, opc, r, v, rm, 0);
} else {
tcg_out_vex_opc(s, opc, r, v, rm, 0);
}
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
@ -2746,7 +2824,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
};
static int const mul_insn[4] = {
OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ
};
static int const shift_imm_insn[4] = {
OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
@ -2770,28 +2848,31 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
};
static int const smin_insn[4] = {
OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ
};
static int const smax_insn[4] = {
OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
};
static int const umin_insn[4] = {
OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
};
static int const umax_insn[4] = {
OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
};
static int const rotlv_insn[4] = {
OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
};
static int const rotrv_insn[4] = {
OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ
};
static int const shlv_insn[4] = {
/* TODO: AVX512 adds support for MO_16. */
OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
};
static int const shrv_insn[4] = {
/* TODO: AVX512 adds support for MO_16. */
OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ
};
static int const sarv_insn[4] = {
/* TODO: AVX512 adds support for MO_16, MO_64. */
OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ
};
static int const shls_insn[4] = {
OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
@ -2800,16 +2881,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
};
static int const sars_insn[4] = {
OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
};
static int const vpshldi_insn[4] = {
OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ
};
static int const vpshldv_insn[4] = {
OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ
};
static int const vpshrdv_insn[4] = {
OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
};
static int const abs_insn[4] = {
/* TODO: AVX512 adds support for MO_64. */
OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ
};
TCGType type = vecl + TCG_TYPE_V64;
int insn, sub;
TCGArg a0, a1, a2;
TCGArg a0, a1, a2, a3;
a0 = args[0];
a1 = args[1];
@ -2867,6 +2956,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sarv_vec:
insn = sarv_insn[vece];
goto gen_simd;
case INDEX_op_rotlv_vec:
insn = rotlv_insn[vece];
goto gen_simd;
case INDEX_op_rotrv_vec:
insn = rotrv_insn[vece];
goto gen_simd;
case INDEX_op_shls_vec:
insn = shls_insn[vece];
goto gen_simd;
@ -2888,6 +2983,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_x86_packus_vec:
insn = packus_insn[vece];
goto gen_simd;
case INDEX_op_x86_vpshldv_vec:
insn = vpshldv_insn[vece];
a1 = a2;
a2 = args[3];
goto gen_simd;
case INDEX_op_x86_vpshrdv_vec:
insn = vpshrdv_insn[vece];
a1 = a2;
a2 = args[3];
goto gen_simd;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_dup2_vec:
/* First merge the two 32-bit inputs to a single 64-bit element. */
@ -2931,17 +3036,30 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_shli_vec:
insn = shift_imm_insn[vece];
sub = 6;
goto gen_shift;
case INDEX_op_shri_vec:
insn = shift_imm_insn[vece];
sub = 2;
goto gen_shift;
case INDEX_op_sari_vec:
tcg_debug_assert(vece != MO_64);
if (vece == MO_64) {
insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX;
} else {
insn = shift_imm_insn[vece];
}
sub = 4;
goto gen_shift;
case INDEX_op_rotli_vec:
insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */
if (vece == MO_64) {
insn |= P_VEXW;
}
sub = 1;
goto gen_shift;
gen_shift:
tcg_debug_assert(vece != MO_8);
insn = shift_imm_insn[vece];
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
@ -2977,7 +3095,51 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
insn = OPC_VPERM2I128;
sub = args[3];
goto gen_simd_imm8;
case INDEX_op_x86_vpshldi_vec:
insn = vpshldi_insn[vece];
sub = args[3];
goto gen_simd_imm8;
case INDEX_op_not_vec:
insn = OPC_VPTERNLOGQ;
a2 = a1;
sub = 0x33; /* !B */
goto gen_simd_imm8;
case INDEX_op_nor_vec:
insn = OPC_VPTERNLOGQ;
sub = 0x11; /* norCB */
goto gen_simd_imm8;
case INDEX_op_nand_vec:
insn = OPC_VPTERNLOGQ;
sub = 0x77; /* nandCB */
goto gen_simd_imm8;
case INDEX_op_eqv_vec:
insn = OPC_VPTERNLOGQ;
sub = 0x99; /* xnorCB */
goto gen_simd_imm8;
case INDEX_op_orc_vec:
insn = OPC_VPTERNLOGQ;
sub = 0xdd; /* orB!C */
goto gen_simd_imm8;
case INDEX_op_bitsel_vec:
insn = OPC_VPTERNLOGQ;
a3 = args[3];
if (a0 == a1) {
a1 = a2;
a2 = a3;
sub = 0xca; /* A?B:C */
} else if (a0 == a2) {
a2 = a3;
sub = 0xe2; /* B?A:C */
} else {
tcg_out_mov(s, type, a0, a3);
sub = 0xb8; /* B?C:A */
}
goto gen_simd_imm8;
gen_simd_imm8:
tcg_debug_assert(insn != OPC_UD2);
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
@ -3196,6 +3358,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_or_vec:
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
case INDEX_op_orc_vec:
case INDEX_op_nand_vec:
case INDEX_op_nor_vec:
case INDEX_op_eqv_vec:
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
@ -3207,10 +3373,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
case INDEX_op_rotls_vec:
case INDEX_op_cmp_vec:
case INDEX_op_x86_shufps_vec:
case INDEX_op_x86_blend_vec:
@ -3219,6 +3386,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_x86_vperm2i128_vec:
case INDEX_op_x86_punpckl_vec:
case INDEX_op_x86_punpckh_vec:
case INDEX_op_x86_vpshldi_vec:
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_dup2_vec:
#endif
@ -3226,12 +3394,19 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_abs_vec:
case INDEX_op_dup_vec:
case INDEX_op_not_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
case INDEX_op_rotli_vec:
case INDEX_op_x86_psrldq_vec:
return C_O1_I1(x, x);
case INDEX_op_x86_vpshldv_vec:
case INDEX_op_x86_vpshrdv_vec:
return C_O1_I3(x, 0, x, x);
case INDEX_op_bitsel_vec:
case INDEX_op_x86_vpblendvb_vec:
return C_O1_I3(x, x, x, x);
@ -3249,53 +3424,96 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_or_vec:
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
case INDEX_op_orc_vec:
case INDEX_op_nand_vec:
case INDEX_op_nor_vec:
case INDEX_op_eqv_vec:
case INDEX_op_not_vec:
case INDEX_op_bitsel_vec:
return 1;
case INDEX_op_rotli_vec:
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
return -1;
case INDEX_op_rotli_vec:
return have_avx512vl && vece >= MO_32 ? 1 : -1;
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
/* We must expand the operation for MO_8. */
return vece == MO_8 ? -1 : 1;
case INDEX_op_sari_vec:
/* We must expand the operation for MO_8. */
if (vece == MO_8) {
switch (vece) {
case MO_8:
return -1;
}
/* We can emulate this for MO_64, but it does not pay off
unless we're producing at least 4 values. */
if (vece == MO_64) {
case MO_16:
case MO_32:
return 1;
case MO_64:
if (have_avx512vl) {
return 1;
}
/*
* We can emulate this for MO_64, but it does not pay off
* unless we're producing at least 4 values.
*/
return type >= TCG_TYPE_V256 ? -1 : 0;
}
return 1;
return 0;
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
return vece >= MO_16;
case INDEX_op_sars_vec:
return vece >= MO_16 && vece <= MO_32;
switch (vece) {
case MO_16:
case MO_32:
return 1;
case MO_64:
return have_avx512vl;
}
return 0;
case INDEX_op_rotls_vec:
return vece >= MO_16 ? -1 : 0;
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
return have_avx2 && vece >= MO_32;
switch (vece) {
case MO_16:
return have_avx512bw;
case MO_32:
case MO_64:
return have_avx2;
}
return 0;
case INDEX_op_sarv_vec:
return have_avx2 && vece == MO_32;
switch (vece) {
case MO_16:
return have_avx512bw;
case MO_32:
return have_avx2;
case MO_64:
return have_avx512vl;
}
return 0;
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
return have_avx2 && vece >= MO_32 ? -1 : 0;
switch (vece) {
case MO_16:
return have_avx512vbmi2 ? -1 : 0;
case MO_32:
case MO_64:
return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
}
return 0;
case INDEX_op_mul_vec:
if (vece == MO_8) {
/* We can expand the operation for MO_8. */
switch (vece) {
case MO_8:
return -1;
}
if (vece == MO_64) {
return 0;
case MO_64:
return have_avx512dq;
}
return 1;
@ -3309,7 +3527,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_umin_vec:
case INDEX_op_umax_vec:
case INDEX_op_abs_vec:
return vece <= MO_32;
return vece <= MO_32 || have_avx512vl;
default:
return 0;
@ -3427,6 +3645,12 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
return;
}
if (have_avx512vbmi2) {
vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
return;
}
t = tcg_temp_new_vec(type);
tcg_gen_shli_vec(vece, t, v1, imm);
tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
@ -3434,31 +3658,19 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
tcg_temp_free_vec(t);
}
static void expand_vec_rotls(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
{
TCGv_i32 rsh;
TCGv_vec t;
tcg_debug_assert(vece != MO_8);
t = tcg_temp_new_vec(type);
rsh = tcg_temp_new_i32();
tcg_gen_neg_i32(rsh, lsh);
tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
tcg_gen_shls_vec(vece, t, v1, lsh);
tcg_gen_shrs_vec(vece, v0, v1, rsh);
tcg_gen_or_vec(vece, v0, v0, t);
tcg_temp_free_vec(t);
tcg_temp_free_i32(rsh);
}
static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec sh, bool right)
{
TCGv_vec t = tcg_temp_new_vec(type);
TCGv_vec t;
if (have_avx512vbmi2) {
vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
tcgv_vec_arg(v1), tcgv_vec_arg(sh));
return;
}
t = tcg_temp_new_vec(type);
tcg_gen_dupi_vec(vece, t, 8 << vece);
tcg_gen_sub_vec(vece, t, t, sh);
if (right) {
@ -3472,6 +3684,35 @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
tcg_temp_free_vec(t);
}
static void expand_vec_rotls(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
{
TCGv_vec t = tcg_temp_new_vec(type);
tcg_debug_assert(vece != MO_8);
if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) {
tcg_gen_dup_i32_vec(vece, t, lsh);
if (vece >= MO_32) {
tcg_gen_rotlv_vec(vece, v0, v1, t);
} else {
expand_vec_rotv(type, vece, v0, v1, t, false);
}
} else {
TCGv_i32 rsh = tcg_temp_new_i32();
tcg_gen_neg_i32(rsh, lsh);
tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
tcg_gen_shls_vec(vece, t, v1, lsh);
tcg_gen_shrs_vec(vece, v0, v1, rsh);
tcg_gen_or_vec(vece, v0, v0, t);
tcg_temp_free_i32(rsh);
}
tcg_temp_free_vec(t);
}
static void expand_vec_mul(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
{
@ -3567,28 +3808,28 @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
fixup = NEED_SWAP | NEED_INV;
break;
case TCG_COND_LEU:
if (vece <= MO_32) {
if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
fixup = NEED_UMIN;
} else {
fixup = NEED_BIAS | NEED_INV;
}
break;
case TCG_COND_GTU:
if (vece <= MO_32) {
if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
fixup = NEED_UMIN | NEED_INV;
} else {
fixup = NEED_BIAS;
}
break;
case TCG_COND_GEU:
if (vece <= MO_32) {
if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
fixup = NEED_UMAX;
} else {
fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
}
break;
case TCG_COND_LTU:
if (vece <= MO_32) {
if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
fixup = NEED_UMAX | NEED_INV;
} else {
fixup = NEED_BIAS | NEED_SWAP;
@ -3839,12 +4080,12 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
static void tcg_target_init(TCGContext *s)
{
#ifdef CONFIG_CPUID_H
unsigned a, b, c, d, b7 = 0;
unsigned a, b, c, d, b7 = 0, c7 = 0;
unsigned max = __get_cpuid_max(0, 0);
if (max >= 7) {
/* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
__cpuid_count(7, 0, a, b7, c, d);
__cpuid_count(7, 0, a, b7, c7, d);
have_bmi1 = (b7 & bit_BMI) != 0;
have_bmi2 = (b7 & bit_BMI2) != 0;
}
@ -3874,6 +4115,22 @@ static void tcg_target_init(TCGContext *s)
if ((xcrl & 6) == 6) {
have_avx1 = (c & bit_AVX) != 0;
have_avx2 = (b7 & bit_AVX2) != 0;
/*
* There are interesting instructions in AVX512, so long
* as we have AVX512VL, which indicates support for EVEX
* on sizes smaller than 512 bits. We are required to
* check that OPMASK and all extended ZMM state are enabled
* even if we're not using them -- the insns will fault.
*/
if ((xcrl & 0xe0) == 0xe0
&& (b7 & bit_AVX512F)
&& (b7 & bit_AVX512VL)) {
have_avx512vl = true;
have_avx512bw = (b7 & bit_AVX512BW) != 0;
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
}
}
}
}

View File

@ -103,6 +103,10 @@ extern bool have_bmi1;
extern bool have_popcnt;
extern bool have_avx1;
extern bool have_avx2;
extern bool have_avx512bw;
extern bool have_avx512dq;
extern bool have_avx512vbmi2;
extern bool have_avx512vl;
extern bool have_movbe;
/* optional instructions */
@ -184,20 +188,23 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_v256 have_avx2
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec 0
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_orc_vec have_avx512vl
#define TCG_TARGET_HAS_nand_vec have_avx512vl
#define TCG_TARGET_HAS_nor_vec have_avx512vl
#define TCG_TARGET_HAS_eqv_vec have_avx512vl
#define TCG_TARGET_HAS_not_vec have_avx512vl
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_roti_vec have_avx512vl
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_rotv_vec have_avx512vl
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 1
#define TCG_TARGET_HAS_shv_vec have_avx2
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
#define TCG_TARGET_HAS_bitsel_vec 0
#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
#define TCG_TARGET_HAS_cmpsel_vec -1
#define TCG_TARGET_deposit_i32_valid(ofs, len) \

View File

@ -33,3 +33,6 @@ DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)

View File

@ -359,13 +359,13 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
CASE_OP_32_64_VEC(orc):
return x | ~y;
CASE_OP_32_64(eqv):
CASE_OP_32_64_VEC(eqv):
return ~(x ^ y);
CASE_OP_32_64(nand):
CASE_OP_32_64_VEC(nand):
return ~(x & y);
CASE_OP_32_64(nor):
CASE_OP_32_64_VEC(nor):
return ~(x | y);
case INDEX_op_clz_i32:
@ -552,10 +552,10 @@ static bool do_constant_folding_cond_eq(TCGCond c)
static int do_constant_folding_cond(TCGType type, TCGArg x,
TCGArg y, TCGCond c)
{
uint64_t xv = arg_info(x)->val;
uint64_t yv = arg_info(y)->val;
if (arg_is_const(x) && arg_is_const(y)) {
uint64_t xv = arg_info(x)->val;
uint64_t yv = arg_info(y)->val;
switch (type) {
case TCG_TYPE_I32:
return do_constant_folding_cond_32(xv, yv, c);
@ -567,7 +567,7 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
}
} else if (args_are_copies(x, y)) {
return do_constant_folding_cond_eq(c);
} else if (arg_is_const(y) && yv == 0) {
} else if (arg_is_const(y) && arg_info(y)->val == 0) {
switch (c) {
case TCG_COND_LTU:
return 0;
@ -2119,7 +2119,7 @@ void tcg_optimize(TCGContext *s)
case INDEX_op_dup2_vec:
done = fold_dup2(&ctx, op);
break;
CASE_OP_32_64(eqv):
CASE_OP_32_64_VEC(eqv):
done = fold_eqv(&ctx, op);
break;
CASE_OP_32_64(extract):
@ -2170,13 +2170,13 @@ void tcg_optimize(TCGContext *s)
CASE_OP_32_64(mulu2):
done = fold_multiply2(&ctx, op);
break;
CASE_OP_32_64(nand):
CASE_OP_32_64_VEC(nand):
done = fold_nand(&ctx, op);
break;
CASE_OP_32_64(neg):
done = fold_neg(&ctx, op);
break;
CASE_OP_32_64(nor):
CASE_OP_32_64_VEC(nor):
done = fold_nor(&ctx, op);
break;
CASE_OP_32_64_VEC(not):

View File

@ -3122,6 +3122,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
case INDEX_op_not_vec:
case INDEX_op_nor_vec:
case INDEX_op_eqv_vec:
case INDEX_op_nand_vec:
return 1;
case INDEX_op_orc_vec:
return have_isa_2_07;
@ -3400,6 +3403,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_orc_vec:
insn = VORC;
break;
case INDEX_op_nand_vec:
insn = VNAND;
break;
case INDEX_op_nor_vec:
insn = VNOR;
break;
case INDEX_op_eqv_vec:
insn = VEQV;
break;
case INDEX_op_cmp_vec:
switch (args[3]) {
@ -3787,6 +3799,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
case INDEX_op_orc_vec:
case INDEX_op_nor_vec:
case INDEX_op_eqv_vec:
case INDEX_op_nand_vec:
case INDEX_op_cmp_vec:
case INDEX_op_ssadd_vec:
case INDEX_op_sssub_vec:

View File

@ -162,6 +162,9 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
#define TCG_TARGET_HAS_nand_vec have_isa_2_07
#define TCG_TARGET_HAS_nor_vec 1
#define TCG_TARGET_HAS_eqv_vec have_isa_2_07
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
#define TCG_TARGET_HAS_abs_vec 0

View File

@ -290,7 +290,9 @@ typedef enum S390Opcode {
VRRc_VMXL = 0xe7fd,
VRRc_VN = 0xe768,
VRRc_VNC = 0xe769,
VRRc_VNN = 0xe76e,
VRRc_VNO = 0xe76b,
VRRc_VNX = 0xe76c,
VRRc_VO = 0xe76a,
VRRc_VOC = 0xe76f,
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
@ -2805,6 +2807,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_xor_vec:
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
break;
case INDEX_op_nand_vec:
tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
break;
case INDEX_op_nor_vec:
tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
break;
case INDEX_op_eqv_vec:
tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
break;
case INDEX_op_shli_vec:
tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
@ -2901,7 +2912,10 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_and_vec:
case INDEX_op_andc_vec:
case INDEX_op_bitsel_vec:
case INDEX_op_eqv_vec:
case INDEX_op_nand_vec:
case INDEX_op_neg_vec:
case INDEX_op_nor_vec:
case INDEX_op_not_vec:
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
@ -3246,6 +3260,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
case INDEX_op_xor_vec:
case INDEX_op_nand_vec:
case INDEX_op_nor_vec:
case INDEX_op_eqv_vec:
case INDEX_op_cmp_vec:
case INDEX_op_mul_vec:
case INDEX_op_rotlv_vec:

View File

@ -145,6 +145,9 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
#define TCG_TARGET_HAS_nand_vec HAVE_FACILITY(VECTOR_ENH1)
#define TCG_TARGET_HAS_nor_vec 1
#define TCG_TARGET_HAS_eqv_vec HAVE_FACILITY(VECTOR_ENH1)
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1

View File

@ -371,23 +371,32 @@ void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
/* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
tcg_gen_and_vec(0, r, a, b);
tcg_gen_not_vec(0, r, r);
if (TCG_TARGET_HAS_nand_vec) {
vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
} else {
tcg_gen_and_vec(0, r, a, b);
tcg_gen_not_vec(0, r, r);
}
}
void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
/* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
tcg_gen_or_vec(0, r, a, b);
tcg_gen_not_vec(0, r, r);
if (TCG_TARGET_HAS_nor_vec) {
vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
} else {
tcg_gen_or_vec(0, r, a, b);
tcg_gen_not_vec(0, r, r);
}
}
void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
/* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
tcg_gen_xor_vec(0, r, a, b);
tcg_gen_not_vec(0, r, r);
if (TCG_TARGET_HAS_eqv_vec) {
vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
} else {
tcg_gen_xor_vec(0, r, a, b);
tcg_gen_not_vec(0, r, r);
}
}
static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)

View File

@ -1407,6 +1407,12 @@ bool tcg_op_supported(TCGOpcode op)
return have_vec && TCG_TARGET_HAS_andc_vec;
case INDEX_op_orc_vec:
return have_vec && TCG_TARGET_HAS_orc_vec;
case INDEX_op_nand_vec:
return have_vec && TCG_TARGET_HAS_nand_vec;
case INDEX_op_nor_vec:
return have_vec && TCG_TARGET_HAS_nor_vec;
case INDEX_op_eqv_vec:
return have_vec && TCG_TARGET_HAS_eqv_vec;
case INDEX_op_mul_vec:
return have_vec && TCG_TARGET_HAS_mul_vec;
case INDEX_op_shli_vec:

View File

@ -197,7 +197,7 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R0,
};
#if MAX_OPC_PARAM_IARGS != 6
#if MAX_OPC_PARAM_IARGS != 7
# error Fix needed, number of supported input arguments changed!
#endif