Finalize implementation of the "Vector Facility" for s390x TCG. Add it

to the QEMU CPU model, so it is enabled as default.
 Also:
 - One fix (and one workaround) for the STFLE instruction
 - Fix the alignment of vector registers (and change the data type)
 - Properly generate ELF_HWCAP for s390x for linux-user
 - Use a gvec helper for VECTOR SELECT
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAlz6X1kRHGRhdmlkQHJl
 ZGhhdC5jb20ACgkQTd4Q9wD/g1ph4A//a7lpgvjtlEZiD9snPdrIZHI/QiBWh0Nc
 y7diavq2eAEcmXHHaUquBtRPogtKlw9K9Z9NCncad1l5rmp+HZ/CItCvcMi8QFW+
 DUhEeyyv3A1INbqHfN2CFkRXz1wVURopIO2fRYSrBKe6bKkXjBa/DmKRnq73VLpX
 9C9CYPmyATl2kTTd0n7R0DFe0HK3Ayd3jmsjq9AIz2vayV1R8+pISOWo8XSlK6PH
 sOi7PpJlk3WTYIWgqnG6kwREOKhxejjFUpk4iUsKiKfWbv9A1wKh4LUcLuT/gaHP
 9rEX7ofccAkmsd5Wg/xcbHV6Q9kgwF7Yx4ret55mK+ZAcYcoyyIBu/Gl45MDk40L
 2+est/70Q2pDeMLLWtEdXGKNrIJzocQ2NwG7NvEnPJhNRat/zU3MxUUuX1fuMuUz
 xzSVVvqwmn/T3AwjhO0+b+O6IktALD+JJeyXYB3j+HJd+PclEdNLtFLfWpYq3ACZ
 DzbZf0E+lxz6RWjCMukC6ZXA/pq9p5Z9Q8Xf4dBRyJa7Qnvv0+0wCJddRddBDnSI
 MRpuqRiiWY/J7ebJqZw864270J3vOCnurZx6h1xHmxCsybVCBoLhXs6Fg+3zQBcX
 pLZnIwt/6Zb7ozBb6ByeTjiPcyFVUPpFxkNULM25JOSeDIqzWcd1qNi8CkRChNAp
 KBSjPvrB1m8=
 =KdI+
 -----END PGP SIGNATURE-----

Merge tag 's390x-tcg-2019-06-07' into s390-next-staging

Finalize implementation of the "Vector Facility" for s390x TCG. Add it
to the QEMU CPU model, so it is enabled as default.
Also:
- One fix (and one workaround) for the STFLE instruction
- Fix the alignment of vector registers (and change the data type)
- Properly generate ELF_HWCAP for s390x for linux-user
- Use a gvec helper for VECTOR SELECT

# gpg: Signature made Fri 07 Jun 2019 02:58:01 PM CEST
# gpg:                using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A
# gpg:                issuer "david@redhat.com"
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [full]
# gpg:                 aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]

* tag 's390x-tcg-2019-06-07': (33 commits)
  linux-user: elf: ELF_HWCAP for s390x
  s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT
  s390x: Bump the "qemu" CPU model up to a stripped-down z13
  s390x/tcg: We support the Vector Facility
  s390x/tcg: Allow linux-user to use vector instructions
  s390x/tcg: Implement VECTOR FP TEST DATA CLASS IMMEDIATE
  s390x/tcg: Implement VECTOR FP SUBTRACT
  s390x/tcg: Implement VECTOR FP SQUARE ROOT
  s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION
  s390x/tcg: Implement VECTOR FP MULTIPLY AND (ADD|SUBTRACT)
  s390x/tcg: Implement VECTOR FP MULTIPLY
  s390x/tcg: Implement VECTOR LOAD ROUNDED
  s390x/tcg: Implement VECTOR LOAD LENGTHENED
  s390x/tcg: Implement VECTOR LOAD FP INTEGER
  s390x/tcg: Implement VECTOR FP DIVIDE
  s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL 64-BIT
  s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT
  s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL 64-BIT
  s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED 64-BIT
  s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL)
  ...

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
This commit is contained in:
Cornelia Huck 2019-06-07 16:06:09 +02:00
commit c984c4e8e3
26 changed files with 1930 additions and 139 deletions

View File

@ -669,7 +669,9 @@ DEFINE_CCW_MACHINE(4_1, "4.1", true);
static void ccw_machine_4_0_instance_options(MachineState *machine)
{
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_0 };
ccw_machine_4_1_instance_options(machine);
s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat);
}
static void ccw_machine_4_0_class_options(MachineClass *mc)

View File

@ -598,6 +598,7 @@ typedef struct {
#define HWCAP_S390_ETF3EH 256
#define HWCAP_S390_HIGH_GPRS 512
#define HWCAP_S390_TE 1024
#define HWCAP_S390_VXRS 2048
/* M68K specific definitions. */
/* We use the top 24 bits to encode information about the

View File

@ -1308,6 +1308,34 @@ static inline void init_thread(struct target_pt_regs *regs,
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
#include "elf.h"
#define ELF_HWCAP get_elf_hwcap()
#define GET_FEATURE(_feat, _hwcap) \
do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0)
static uint32_t get_elf_hwcap(void)
{
/*
* Let's assume we always have esan3 and zarch.
* 31-bit processes can use 64-bit registers (high gprs).
*/
uint32_t hwcap = HWCAP_S390_ESAN3 | HWCAP_S390_ZARCH | HWCAP_S390_HIGH_GPRS;
GET_FEATURE(S390_FEAT_STFLE, HWCAP_S390_STFLE);
GET_FEATURE(S390_FEAT_MSA, HWCAP_S390_MSA);
GET_FEATURE(S390_FEAT_LONG_DISPLACEMENT, HWCAP_S390_LDISP);
GET_FEATURE(S390_FEAT_EXTENDED_IMMEDIATE, HWCAP_S390_EIMM);
if (s390_has_feat(S390_FEAT_EXTENDED_TRANSLATION_3) &&
s390_has_feat(S390_FEAT_ETF3_ENH)) {
hwcap |= HWCAP_S390_ETF3EH;
}
GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
return hwcap;
}
static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
{
regs->psw.addr = infop->entry;

View File

@ -123,7 +123,7 @@ static void save_sigregs(CPUS390XState *env, target_sigregs *sregs)
*/
//save_fp_regs(&current->thread.fp_regs); FIXME
for (i = 0; i < 16; i++) {
__put_user(get_freg(env, i)->ll, &sregs->fpregs.fprs[i]);
__put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]);
}
}
@ -254,7 +254,7 @@ restore_sigregs(CPUS390XState *env, target_sigregs *sc)
__get_user(env->aregs[i], &sc->regs.acrs[i]);
}
for (i = 0; i < 16; i++) {
__get_user(get_freg(env, i)->ll, &sc->fpregs.fprs[i]);
__get_user(*get_freg(env, i), &sc->fpregs.fprs[i]);
}
return err;

View File

@ -1,7 +1,8 @@
obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o
obj-$(CONFIG_TCG) += vec_fpu_helper.o
obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
obj-$(CONFIG_SOFTMMU) += sigp.o
obj-$(CONFIG_KVM) += kvm.o

View File

@ -104,7 +104,7 @@ static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id)
note->hdr.n_type = cpu_to_be32(NT_FPREGSET);
note->contents.fpregset.fpc = cpu_to_be32(cpu->env.fpc);
for (i = 0; i <= 15; i++) {
note->contents.fpregset.fprs[i] = cpu_to_be64(get_freg(cs, i)->ll);
note->contents.fpregset.fprs[i] = cpu_to_be64(*get_freg(cs, i));
}
}
@ -114,7 +114,7 @@ static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu, int id)
note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_LOW);
for (i = 0; i <= 15; i++) {
note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1].ll);
note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1]);
}
}
@ -127,8 +127,8 @@ static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu, int id)
note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_HIGH);
for (i = 0; i <= 15; i++) {
temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0].ll);
temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1].ll);
temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0]);
temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1]);
}
}

View File

@ -145,6 +145,9 @@ static void s390_cpu_full_reset(CPUState *s)
#if defined(CONFIG_USER_ONLY)
/* user mode should always be allowed to use the full FPU */
env->cregs[0] |= CR0_AFP;
if (s390_has_feat(S390_FEAT_VECTOR)) {
env->cregs[0] |= CR0_VECTOR;
}
#endif
/* architectured initial value for Breaking-Event-Address register */

View File

@ -66,7 +66,7 @@ struct CPUS390XState {
* The floating point registers are part of the vector registers.
* vregs[0][0] -> vregs[15][0] are 16 floating point registers
*/
CPU_DoubleU vregs[32][2]; /* vector registers */
uint64_t vregs[32][2] QEMU_ALIGNED(16); /* vector registers */
uint32_t aregs[16]; /* access registers */
uint8_t riccb[64]; /* runtime instrumentation control */
uint64_t gscb[4]; /* guarded storage control */
@ -153,7 +153,7 @@ struct CPUS390XState {
};
static inline CPU_DoubleU *get_freg(CPUS390XState *cs, int nr)
static inline uint64_t *get_freg(CPUS390XState *cs, int nr)
{
return &cs->vregs[nr][0];
}
@ -215,6 +215,7 @@ extern const struct VMStateDescription vmstate_s390_cpu;
#define PGM_SPECIAL_OP 0x0013
#define PGM_OPERAND 0x0015
#define PGM_TRACE_TABLE 0x0016
#define PGM_VECTOR_PROCESSING 0x001b
#define PGM_SPACE_SWITCH 0x001c
#define PGM_HFP_SQRT 0x001d
#define PGM_PC_TRANS_SPEC 0x001f

View File

@ -86,8 +86,8 @@ static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM 8562 GA1"),
};
#define QEMU_MAX_CPU_TYPE 0x2827
#define QEMU_MAX_CPU_GEN 12
#define QEMU_MAX_CPU_TYPE 0x2964
#define QEMU_MAX_CPU_GEN 13
#define QEMU_MAX_CPU_EC_GA 2
static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
static S390FeatBitmap qemu_max_cpu_feat;

View File

@ -62,6 +62,21 @@ void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra);
}
void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
uintptr_t ra)
{
g_assert(vxc <= 0xff);
#if !defined(CONFIG_USER_ONLY)
/* Always store the VXC into the lowcore, without AFP it is undefined */
stl_phys(CPU(s390_env_get_cpu(env))->as,
env->psa + offsetof(LowCore, data_exc_code), vxc);
#endif
/* Always store the VXC into the FPC, without AFP it is undefined */
env->fpc = deposit32(env->fpc, 8, 8, vxc);
tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ILEN_AUTO, ra);
}
void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
{
tcg_s390_data_exception(env, dxc, GETPC());
@ -390,8 +405,8 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
}
for (i = 0; i < 32; i++) {
sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0].ll);
sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1].ll);
sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
}
cpu_physical_memory_unmap(sa, len, 1, len);
@ -429,7 +444,7 @@ static void do_mchk_interrupt(CPUS390XState *env)
lowcore->ar_access_id = 1;
for (i = 0; i < 16; i++) {
lowcore->floating_pt_save_area[i] = cpu_to_be64(get_freg(env, i)->ll);
lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);

View File

@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
}
}
static inline int float_comp_to_cc(CPUS390XState *env, int float_compare)
int float_comp_to_cc(CPUS390XState *env, int float_compare)
{
S390CPU *cpu = s390_env_get_cpu(env);
@ -746,7 +746,7 @@ static inline uint16_t dcmask(int bit, bool neg)
}
#define DEF_FLOAT_DCMASK(_TYPE) \
static uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \
uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \
{ \
const bool neg = _TYPE##_is_neg(f1); \
\

View File

@ -116,7 +116,7 @@ static int cpu_read_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
case S390_FPC_REGNUM:
return gdb_get_reg32(mem_buf, env->fpc);
case S390_F0_REGNUM ... S390_F15_REGNUM:
return gdb_get_reg64(mem_buf, get_freg(env, n - S390_F0_REGNUM)->ll);
return gdb_get_reg64(mem_buf, *get_freg(env, n - S390_F0_REGNUM));
default:
return 0;
}
@ -129,7 +129,7 @@ static int cpu_write_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
env->fpc = ldl_p(mem_buf);
return 4;
case S390_F0_REGNUM ... S390_F15_REGNUM:
get_freg(env, n - S390_F0_REGNUM)->ll = ldtul_p(mem_buf);
*get_freg(env, n - S390_F0_REGNUM) = ldtul_p(mem_buf);
return 8;
default:
return 0;
@ -150,11 +150,11 @@ static int cpu_read_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
switch (n) {
case S390_V0L_REGNUM ... S390_V15L_REGNUM:
ret = gdb_get_reg64(mem_buf, env->vregs[n][1].ll);
ret = gdb_get_reg64(mem_buf, env->vregs[n][1]);
break;
case S390_V16_REGNUM ... S390_V31_REGNUM:
ret = gdb_get_reg64(mem_buf, env->vregs[n][0].ll);
ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1].ll);
ret = gdb_get_reg64(mem_buf, env->vregs[n][0]);
ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1]);
break;
default:
ret = 0;
@ -167,11 +167,11 @@ static int cpu_write_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
{
switch (n) {
case S390_V0L_REGNUM ... S390_V15L_REGNUM:
env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
env->vregs[n][1] = ldtul_p(mem_buf + 8);
return 8;
case S390_V16_REGNUM ... S390_V31_REGNUM:
env->vregs[n][0].ll = ldtul_p(mem_buf);
env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
env->vregs[n][0] = ldtul_p(mem_buf);
env->vregs[n][1] = ldtul_p(mem_buf + 8);
return 16;
default:
return 0;

View File

@ -689,7 +689,7 @@ static uint16_t qemu_V3_1[] = {
S390_FEAT_MSA_EXT_4,
};
static uint16_t qemu_LATEST[] = {
static uint16_t qemu_V4_0[] = {
/*
* Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not
* implemented yet).
@ -698,10 +698,13 @@ static uint16_t qemu_LATEST[] = {
S390_FEAT_ZPCI,
};
static uint16_t qemu_LATEST[] = {
S390_FEAT_STFLE_53,
S390_FEAT_VECTOR,
};
/* add all new definitions before this point */
static uint16_t qemu_MAX[] = {
/* z13+ features */
S390_FEAT_STFLE_53,
/* generates a dependency warning, leave it out for now */
S390_FEAT_MSA_EXT_5,
};
@ -820,6 +823,7 @@ static FeatGroupDefSpec FeatGroupDef[] = {
static FeatGroupDefSpec QemuFeatDef[] = {
QEMU_FEAT_INITIALIZER(V2_11),
QEMU_FEAT_INITIALIZER(V3_1),
QEMU_FEAT_INITIALIZER(V4_0),
QEMU_FEAT_INITIALIZER(LATEST),
QEMU_FEAT_INITIALIZER(MAX),
};

View File

@ -249,7 +249,7 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1);
}
for (i = 0; i < 16; ++i) {
sa->fprs[i] = cpu_to_be64(get_freg(&cpu->env, i)->ll);
sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i));
}
for (i = 0; i < 16; ++i) {
sa->grs[i] = cpu_to_be64(cpu->env.regs[i]);
@ -299,8 +299,8 @@ int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len)
if (s390_has_feat(S390_FEAT_VECTOR)) {
for (i = 0; i < 32; i++) {
sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0].ll);
sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1].ll);
sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]);
sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]);
}
}
if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) {
@ -341,13 +341,13 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags)
if (s390_has_feat(S390_FEAT_VECTOR)) {
for (i = 0; i < 32; i++) {
qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c",
i, env->vregs[i][0].ll, env->vregs[i][1].ll,
i, env->vregs[i][0], env->vregs[i][1],
i % 2 ? '\n' : ' ');
}
} else {
for (i = 0; i < 16; i++) {
qemu_fprintf(f, "F%02d=%016" PRIx64 "%c",
i, get_freg(env, i)->ll,
i, *get_freg(env, i),
(i % 4) == 3 ? '\n' : ' ');
}
}

View File

@ -211,6 +211,90 @@ DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
/* === Vector String Instructions === */
DEF_HELPER_FLAGS_4(gvec_vfae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vfae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfee8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vfee16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_3(gvec_vistr8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vistr16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
/* === Vector Floating-Point Instructions */
DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
#ifndef CONFIG_USER_ONLY
DEF_HELPER_3(servc, i32, env, i64, i64)
DEF_HELPER_4(diag, void, env, i32, i32, i32)

View File

@ -1191,6 +1191,64 @@
/* VECTOR TEST UNDER MASK */
F(0xe7d8, VTM, VRR_a, V, 0, 0, 0, 0, vtm, 0, IF_VEC)
/* === Vector String Instructions === */
/* VECTOR FIND ANY ELEMENT EQUAL */
F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC)
/* VECTOR FIND ELEMENT EQUAL */
F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC)
/* VECTOR FIND ELEMENT NOT EQUAL */
F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC)
/* VECTOR ISOLATE STRING */
F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC)
/* VECTOR STRING RANGE COMPARE */
F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC)
/* === Vector Floating-Point Instructions */
/* VECTOR FP ADD */
F(0xe7e3, VFA, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
/* VECTOR FP COMPARE SCALAR */
F(0xe7cb, WFC, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC)
/* VECTOR FP COMPARE AND SIGNAL SCALAR */
F(0xe7ca, WFK, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC)
/* VECTOR FP COMPARE EQUAL */
F(0xe7e8, VFCE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC)
/* VECTOR FP COMPARE HIGH */
F(0xe7eb, VFCH, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC)
/* VECTOR FP COMPARE HIGH OR EQUAL */
F(0xe7ea, VFCHE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC)
/* VECTOR FP CONVERT FROM FIXED 64-BIT */
F(0xe7c3, VCDG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */
F(0xe7c1, VCDLG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
/* VECTOR FP CONVERT TO FIXED 64-BIT */
F(0xe7c2, VCGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
/* VECTOR FP CONVERT TO LOGICAL 64-BIT */
F(0xe7c0, VCLGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
/* VECTOR FP DIVIDE */
F(0xe7e5, VFD, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
/* VECTOR LOAD FP INTEGER */
F(0xe7c7, VFI, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
/* VECTOR LOAD LENGTHENED */
F(0xe7c4, VFLL, VRR_a, V, 0, 0, 0, 0, vfll, 0, IF_VEC)
/* VECTOR LOAD ROUNDED */
F(0xe7c5, VFLR, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
/* VECTOR FP MULTIPLY */
F(0xe7e7, VFM, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
/* VECTOR FP MULTIPLY AND ADD */
F(0xe78f, VFMA, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC)
/* VECTOR FP MULTIPLY AND SUBTRACT */
F(0xe78e, VFMS, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC)
/* VECTOR FP PERFORM SIGN OPERATION */
F(0xe7cc, VFPSO, VRR_a, V, 0, 0, 0, 0, vfpso, 0, IF_VEC)
/* VECTOR FP SQUARE ROOT */
F(0xe7ce, VFSQ, VRR_a, V, 0, 0, 0, 0, vfsq, 0, IF_VEC)
/* VECTOR FP SUBTRACT */
F(0xe7e2, VFS, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
/* VECTOR FP TEST DATA CLASS IMMEDIATE */
F(0xe74a, VFTCI, VRI_e, V, 0, 0, 0, 0, vftci, 0, IF_VEC)
#ifndef CONFIG_USER_ONLY
/* COMPARE AND SWAP AND PURGE */
E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)

View File

@ -285,6 +285,10 @@ uint32_t set_cc_nz_f128(float128 v);
uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
int float_comp_to_cc(CPUS390XState *env, int float_compare);
uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
/* gdbstub.c */

View File

@ -418,21 +418,21 @@ int kvm_arch_put_registers(CPUState *cs, int level)
if (can_sync_regs(cs, KVM_SYNC_VRS)) {
for (i = 0; i < 32; i++) {
cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0].ll;
cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1].ll;
cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0];
cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1];
}
cs->kvm_run->s.regs.fpc = env->fpc;
cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_VRS;
} else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
for (i = 0; i < 16; i++) {
cs->kvm_run->s.regs.fprs[i] = get_freg(env, i)->ll;
cs->kvm_run->s.regs.fprs[i] = *get_freg(env, i);
}
cs->kvm_run->s.regs.fpc = env->fpc;
cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_FPRS;
} else {
/* Floating point */
for (i = 0; i < 16; i++) {
fpu.fprs[i] = get_freg(env, i)->ll;
fpu.fprs[i] = *get_freg(env, i);
}
fpu.fpc = env->fpc;
@ -586,13 +586,13 @@ int kvm_arch_get_registers(CPUState *cs)
/* Floating point and vector registers */
if (can_sync_regs(cs, KVM_SYNC_VRS)) {
for (i = 0; i < 32; i++) {
env->vregs[i][0].ll = cs->kvm_run->s.regs.vrs[i][0];
env->vregs[i][1].ll = cs->kvm_run->s.regs.vrs[i][1];
env->vregs[i][0] = cs->kvm_run->s.regs.vrs[i][0];
env->vregs[i][1] = cs->kvm_run->s.regs.vrs[i][1];
}
env->fpc = cs->kvm_run->s.regs.fpc;
} else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
for (i = 0; i < 16; i++) {
get_freg(env, i)->ll = cs->kvm_run->s.regs.fprs[i];
*get_freg(env, i) = cs->kvm_run->s.regs.fprs[i];
}
env->fpc = cs->kvm_run->s.regs.fpc;
} else {
@ -601,7 +601,7 @@ int kvm_arch_get_registers(CPUState *cs)
return r;
}
for (i = 0; i < 16; i++) {
get_freg(env, i)->ll = fpu.fprs[i];
*get_freg(env, i) = fpu.fprs[i];
}
env->fpc = fpu.fpc;
}

View File

@ -66,22 +66,22 @@ static const VMStateDescription vmstate_fpu = {
.minimum_version_id = 1,
.needed = fpu_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[2][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[3][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[4][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[5][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[6][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[7][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[8][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[9][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[10][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[11][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[12][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[13][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[14][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[15][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[0][0], S390CPU),
VMSTATE_UINT64(env.vregs[1][0], S390CPU),
VMSTATE_UINT64(env.vregs[2][0], S390CPU),
VMSTATE_UINT64(env.vregs[3][0], S390CPU),
VMSTATE_UINT64(env.vregs[4][0], S390CPU),
VMSTATE_UINT64(env.vregs[5][0], S390CPU),
VMSTATE_UINT64(env.vregs[6][0], S390CPU),
VMSTATE_UINT64(env.vregs[7][0], S390CPU),
VMSTATE_UINT64(env.vregs[8][0], S390CPU),
VMSTATE_UINT64(env.vregs[9][0], S390CPU),
VMSTATE_UINT64(env.vregs[10][0], S390CPU),
VMSTATE_UINT64(env.vregs[11][0], S390CPU),
VMSTATE_UINT64(env.vregs[12][0], S390CPU),
VMSTATE_UINT64(env.vregs[13][0], S390CPU),
VMSTATE_UINT64(env.vregs[14][0], S390CPU),
VMSTATE_UINT64(env.vregs[15][0], S390CPU),
VMSTATE_UINT32(env.fpc, S390CPU),
VMSTATE_END_OF_LIST()
}
@ -99,54 +99,54 @@ static const VMStateDescription vmstate_vregs = {
.needed = vregs_needed,
.fields = (VMStateField[]) {
/* vregs[0][0] -> vregs[15][0] and fregs are overlays */
VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[17][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[18][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[19][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[20][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[21][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[22][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[23][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[24][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[25][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[26][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[27][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[28][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[29][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[30][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[31][0].ll, S390CPU),
VMSTATE_UINT64(env.vregs[0][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[1][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[2][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[3][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[4][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[5][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[6][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[7][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[8][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[9][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[10][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[11][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[12][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[13][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[14][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[15][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[16][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[17][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[18][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[19][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[20][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[21][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[22][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[23][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[24][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[25][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[26][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[27][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[28][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[29][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[30][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[31][1].ll, S390CPU),
VMSTATE_UINT64(env.vregs[16][0], S390CPU),
VMSTATE_UINT64(env.vregs[17][0], S390CPU),
VMSTATE_UINT64(env.vregs[18][0], S390CPU),
VMSTATE_UINT64(env.vregs[19][0], S390CPU),
VMSTATE_UINT64(env.vregs[20][0], S390CPU),
VMSTATE_UINT64(env.vregs[21][0], S390CPU),
VMSTATE_UINT64(env.vregs[22][0], S390CPU),
VMSTATE_UINT64(env.vregs[23][0], S390CPU),
VMSTATE_UINT64(env.vregs[24][0], S390CPU),
VMSTATE_UINT64(env.vregs[25][0], S390CPU),
VMSTATE_UINT64(env.vregs[26][0], S390CPU),
VMSTATE_UINT64(env.vregs[27][0], S390CPU),
VMSTATE_UINT64(env.vregs[28][0], S390CPU),
VMSTATE_UINT64(env.vregs[29][0], S390CPU),
VMSTATE_UINT64(env.vregs[30][0], S390CPU),
VMSTATE_UINT64(env.vregs[31][0], S390CPU),
VMSTATE_UINT64(env.vregs[0][1], S390CPU),
VMSTATE_UINT64(env.vregs[1][1], S390CPU),
VMSTATE_UINT64(env.vregs[2][1], S390CPU),
VMSTATE_UINT64(env.vregs[3][1], S390CPU),
VMSTATE_UINT64(env.vregs[4][1], S390CPU),
VMSTATE_UINT64(env.vregs[5][1], S390CPU),
VMSTATE_UINT64(env.vregs[6][1], S390CPU),
VMSTATE_UINT64(env.vregs[7][1], S390CPU),
VMSTATE_UINT64(env.vregs[8][1], S390CPU),
VMSTATE_UINT64(env.vregs[9][1], S390CPU),
VMSTATE_UINT64(env.vregs[10][1], S390CPU),
VMSTATE_UINT64(env.vregs[11][1], S390CPU),
VMSTATE_UINT64(env.vregs[12][1], S390CPU),
VMSTATE_UINT64(env.vregs[13][1], S390CPU),
VMSTATE_UINT64(env.vregs[14][1], S390CPU),
VMSTATE_UINT64(env.vregs[15][1], S390CPU),
VMSTATE_UINT64(env.vregs[16][1], S390CPU),
VMSTATE_UINT64(env.vregs[17][1], S390CPU),
VMSTATE_UINT64(env.vregs[18][1], S390CPU),
VMSTATE_UINT64(env.vregs[19][1], S390CPU),
VMSTATE_UINT64(env.vregs[20][1], S390CPU),
VMSTATE_UINT64(env.vregs[21][1], S390CPU),
VMSTATE_UINT64(env.vregs[22][1], S390CPU),
VMSTATE_UINT64(env.vregs[23][1], S390CPU),
VMSTATE_UINT64(env.vregs[24][1], S390CPU),
VMSTATE_UINT64(env.vregs[25][1], S390CPU),
VMSTATE_UINT64(env.vregs[26][1], S390CPU),
VMSTATE_UINT64(env.vregs[27][1], S390CPU),
VMSTATE_UINT64(env.vregs[28][1], S390CPU),
VMSTATE_UINT64(env.vregs[29][1], S390CPU),
VMSTATE_UINT64(env.vregs[30][1], S390CPU),
VMSTATE_UINT64(env.vregs[31][1], S390CPU),
VMSTATE_END_OF_LIST()
}
};

View File

@ -669,7 +669,7 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
{
const uintptr_t ra = GETPC();
const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
const int max_bytes = ROUND_UP(used_stfl_bytes, 8);
int max_bytes;
int i;
if (addr & 0x7) {
@ -677,7 +677,14 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
}
prepare_stfl();
for (i = 0; i < count_bytes; ++i) {
max_bytes = ROUND_UP(used_stfl_bytes, 8);
/*
* The PoP says that doublewords beyond the highest-numbered facility
* bit may or may not be stored. However, existing hardware appears to
* not store the words, and existing software depend on that.
*/
for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
}

View File

@ -18,5 +18,7 @@ void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
int ilen, uintptr_t ra);
void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
uintptr_t ra);
void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
uintptr_t ra);
#endif /* TCG_S390X_H */

View File

@ -149,7 +149,7 @@ void s390x_translate_init(void)
static inline int vec_full_reg_offset(uint8_t reg)
{
g_assert(reg < 32);
return offsetof(CPUS390XState, vregs[reg][0].d);
return offsetof(CPUS390XState, vregs[reg][0]);
}
static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp es)

View File

@ -52,6 +52,11 @@
#define ES_64 MO_64
#define ES_128 4
/* Floating-Point Format */
#define FPF_SHORT 2
#define FPF_LONG 3
#define FPF_EXT 4
static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
{
return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
@ -188,6 +193,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
#define gen_gvec_2s(v1, v2, c, gen) \
tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
16, 16, c, gen)
#define gen_gvec_2_ool(v1, v2, data, fn) \
tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
16, 16, data, fn)
#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
c, 16, 16, data, fn)
@ -214,6 +222,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
16, 16, data, fn)
#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
ptr, 16, 16, data, fn)
#define gen_gvec_dup_i64(es, v1, c) \
tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
#define gen_gvec_mov(v1, v2) \
@ -233,6 +245,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), 16, 16)
#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
/*
* Helper to carry out a 128 bit vector computation using 2 i64 values per
@ -903,40 +918,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
{
TCGv_i64 t = tcg_temp_new_i64();
/* bit in c not set -> copy bit from b */
tcg_gen_andc_i64(t, b, c);
/* bit in c set -> copy bit from a */
tcg_gen_and_i64(d, a, c);
/* merge the results */
tcg_gen_or_i64(d, d, t);
tcg_temp_free_i64(t);
}
static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
TCGv_vec c)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_andc_vec(vece, t, b, c);
tcg_gen_and_vec(vece, d, a, c);
tcg_gen_or_vec(vece, d, d, t);
tcg_temp_free_vec(t);
}
static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
{
static const GVecGen4 gvec_op = {
.fni8 = gen_sel_i64,
.fniv = gen_sel_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
get_field(s->fields, v4), get_field(s->fields, v2),
get_field(s->fields, v3));
return DISAS_NEXT;
}
@ -2353,3 +2339,460 @@ static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
set_cc_static(s);
return DISAS_NEXT;
}
static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
static gen_helper_gvec_3 * const g[3] = {
gen_helper_gvec_vfae8,
gen_helper_gvec_vfae16,
gen_helper_gvec_vfae32,
};
static gen_helper_gvec_3_ptr * const g_cc[3] = {
gen_helper_gvec_vfae_cc8,
gen_helper_gvec_vfae_cc16,
gen_helper_gvec_vfae_cc32,
};
if (es > ES_32) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m5, 0, 1)) {
gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
set_cc_static(s);
} else {
gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), m5, g[es]);
}
return DISAS_NEXT;
}
static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
static gen_helper_gvec_3 * const g[3] = {
gen_helper_gvec_vfee8,
gen_helper_gvec_vfee16,
gen_helper_gvec_vfee32,
};
static gen_helper_gvec_3_ptr * const g_cc[3] = {
gen_helper_gvec_vfee_cc8,
gen_helper_gvec_vfee_cc16,
gen_helper_gvec_vfee_cc32,
};
if (es > ES_32 || m5 & ~0x3) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m5, 0, 1)) {
gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
set_cc_static(s);
} else {
gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), m5, g[es]);
}
return DISAS_NEXT;
}
static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
static gen_helper_gvec_3 * const g[3] = {
gen_helper_gvec_vfene8,
gen_helper_gvec_vfene16,
gen_helper_gvec_vfene32,
};
static gen_helper_gvec_3_ptr * const g_cc[3] = {
gen_helper_gvec_vfene_cc8,
gen_helper_gvec_vfene_cc16,
gen_helper_gvec_vfene_cc32,
};
if (es > ES_32 || m5 & ~0x3) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m5, 0, 1)) {
gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
set_cc_static(s);
} else {
gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), m5, g[es]);
}
return DISAS_NEXT;
}
static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
static gen_helper_gvec_2 * const g[3] = {
gen_helper_gvec_vistr8,
gen_helper_gvec_vistr16,
gen_helper_gvec_vistr32,
};
static gen_helper_gvec_2_ptr * const g_cc[3] = {
gen_helper_gvec_vistr_cc8,
gen_helper_gvec_vistr_cc16,
gen_helper_gvec_vistr_cc32,
};
if (es > ES_32 || m5 & ~0x1) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m5, 0, 1)) {
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
cpu_env, 0, g_cc[es]);
set_cc_static(s);
} else {
gen_gvec_2_ool(get_field(s->fields, v1), get_field(s->fields, v2), 0,
g[es]);
}
return DISAS_NEXT;
}
static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
{
const uint8_t es = get_field(s->fields, m5);
const uint8_t m6 = get_field(s->fields, m6);
static gen_helper_gvec_4 * const g[3] = {
gen_helper_gvec_vstrc8,
gen_helper_gvec_vstrc16,
gen_helper_gvec_vstrc32,
};
static gen_helper_gvec_4 * const g_rt[3] = {
gen_helper_gvec_vstrc_rt8,
gen_helper_gvec_vstrc_rt16,
gen_helper_gvec_vstrc_rt32,
};
static gen_helper_gvec_4_ptr * const g_cc[3] = {
gen_helper_gvec_vstrc_cc8,
gen_helper_gvec_vstrc_cc16,
gen_helper_gvec_vstrc_cc32,
};
static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
gen_helper_gvec_vstrc_cc_rt8,
gen_helper_gvec_vstrc_cc_rt16,
gen_helper_gvec_vstrc_cc_rt32,
};
if (es > ES_32) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m6, 0, 1)) {
if (extract32(m6, 2, 1)) {
gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
cpu_env, m6, g_cc_rt[es]);
} else {
gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
cpu_env, m6, g_cc[es]);
}
set_cc_static(s);
} else {
if (extract32(m6, 2, 1)) {
gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
m6, g_rt[es]);
} else {
gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4),
m6, g[es]);
}
}
return DISAS_NEXT;
}
static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
{
const uint8_t fpf = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
const bool se = extract32(m5, 3, 1);
gen_helper_gvec_3_ptr *fn;
if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
switch (s->fields->op2) {
case 0xe3:
fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
break;
case 0xe5:
fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
break;
case 0xe7:
fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
break;
case 0xe2:
fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64;
break;
default:
g_assert_not_reached();
}
gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), cpu_env, 0, fn);
return DISAS_NEXT;
}
static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
{
const uint8_t fpf = get_field(s->fields, m3);
const uint8_t m4 = get_field(s->fields, m4);
if (fpf != FPF_LONG || m4) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (s->fields->op2 == 0xcb) {
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
cpu_env, 0, gen_helper_gvec_wfc64);
} else {
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
cpu_env, 0, gen_helper_gvec_wfk64);
}
set_cc_static(s);
return DISAS_NEXT;
}
static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
{
const uint8_t fpf = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
const uint8_t m6 = get_field(s->fields, m6);
const bool se = extract32(m5, 3, 1);
const bool cs = extract32(m6, 0, 1);
gen_helper_gvec_3_ptr *fn;
if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (cs) {
switch (s->fields->op2) {
case 0xe8:
fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
break;
case 0xeb:
fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
break;
case 0xea:
fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc;
break;
default:
g_assert_not_reached();
}
} else {
switch (s->fields->op2) {
case 0xe8:
fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
break;
case 0xeb:
fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
break;
case 0xea:
fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
break;
default:
g_assert_not_reached();
}
}
gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), cpu_env, 0, fn);
if (cs) {
set_cc_static(s);
}
return DISAS_NEXT;
}
static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
{
const uint8_t fpf = get_field(s->fields, m3);
const uint8_t m4 = get_field(s->fields, m4);
const uint8_t erm = get_field(s->fields, m5);
const bool se = extract32(m4, 3, 1);
gen_helper_gvec_2_ptr *fn;
if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
switch (s->fields->op2) {
case 0xc3:
fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
break;
case 0xc1:
fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
break;
case 0xc2:
fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
break;
case 0xc0:
fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
break;
case 0xc7:
fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
break;
case 0xc5:
fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
break;
default:
g_assert_not_reached();
}
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
deposit32(m4, 4, 4, erm), fn);
return DISAS_NEXT;
}
static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
{
const uint8_t fpf = get_field(s->fields, m3);
const uint8_t m4 = get_field(s->fields, m4);
gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32;
if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m4, 3, 1)) {
fn = gen_helper_gvec_vfll32s;
}
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
0, fn);
return DISAS_NEXT;
}
static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
{
const uint8_t m5 = get_field(s->fields, m5);
const uint8_t fpf = get_field(s->fields, m6);
const bool se = extract32(m5, 3, 1);
gen_helper_gvec_4_ptr *fn;
if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (s->fields->op2 == 0x8f) {
fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
} else {
fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
}
gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
get_field(s->fields, v3), get_field(s->fields, v4), cpu_env,
0, fn);
return DISAS_NEXT;
}
static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
{
const uint8_t v1 = get_field(s->fields, v1);
const uint8_t v2 = get_field(s->fields, v2);
const uint8_t fpf = get_field(s->fields, m3);
const uint8_t m4 = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
TCGv_i64 tmp;
if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m4, 3, 1)) {
tmp = tcg_temp_new_i64();
read_vec_element_i64(tmp, v2, 0, ES_64);
switch (m5) {
case 0:
/* sign bit is inverted (complement) */
tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
break;
case 1:
/* sign bit is set to one (negative) */
tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
break;
case 2:
/* sign bit is set to zero (positive) */
tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
break;
}
write_vec_element_i64(tmp, v1, 0, ES_64);
tcg_temp_free_i64(tmp);
} else {
switch (m5) {
case 0:
/* sign bit is inverted (complement) */
gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
break;
case 1:
/* sign bit is set to one (negative) */
gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
break;
case 2:
/* sign bit is set to zero (positive) */
gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
break;
}
}
return DISAS_NEXT;
}
static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
{
const uint8_t fpf = get_field(s->fields, m3);
const uint8_t m4 = get_field(s->fields, m4);
gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64;
if (fpf != FPF_LONG || extract32(m4, 0, 3)) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m4, 3, 1)) {
fn = gen_helper_gvec_vfsq64s;
}
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
0, fn);
return DISAS_NEXT;
}
static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
{
const uint16_t i3 = get_field(s->fields, i3);
const uint8_t fpf = get_field(s->fields, m4);
const uint8_t m5 = get_field(s->fields, m5);
gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64;
if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
gen_program_exception(s, PGM_SPECIFICATION);
return DISAS_NORETURN;
}
if (extract32(m5, 3, 1)) {
fn = gen_helper_gvec_vftci64s;
}
gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
i3, fn);
set_cc_static(s);
return DISAS_NEXT;
}

View File

@ -12,6 +12,8 @@
#ifndef S390X_VEC_H
#define S390X_VEC_H
#include "tcg/tcg.h"
typedef union S390Vector {
uint64_t doubleword[2];
uint32_t word[4];
@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
return v->doubleword[enr];
}
static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
uint8_t es)
{
switch (es) {
case MO_8:
return s390_vec_read_element8(v, enr);
case MO_16:
return s390_vec_read_element16(v, enr);
case MO_32:
return s390_vec_read_element32(v, enr);
case MO_64:
return s390_vec_read_element64(v, enr);
default:
g_assert_not_reached();
}
}
static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr,
uint8_t data)
{
@ -98,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
v->doubleword[enr] = data;
}
static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
uint8_t es, uint64_t data)
{
switch (es) {
case MO_8:
s390_vec_write_element8(v, enr, data);
break;
case MO_16:
s390_vec_write_element16(v, enr, data);
break;
case MO_32:
s390_vec_write_element32(v, enr, data);
break;
case MO_64:
s390_vec_write_element64(v, enr, data);
break;
default:
g_assert_not_reached();
}
}
#endif /* S390X_VEC_H */

View File

@ -0,0 +1,625 @@
/*
* QEMU TCG support -- s390x vector floating point instruction support
*
* Copyright (C) 2019 Red Hat Inc
*
* Authors:
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "cpu.h"
#include "internal.h"
#include "vec.h"
#include "tcg_s390x.h"
#include "tcg/tcg-gvec-desc.h"
#include "exec/exec-all.h"
#include "exec/helper-proto.h"
#include "fpu/softfloat.h"
#define VIC_INVALID 0x1
#define VIC_DIVBYZERO 0x2
#define VIC_OVERFLOW 0x3
#define VIC_UNDERFLOW 0x4
#define VIC_INEXACT 0x5
/* returns the VEX. If the VEX is 0, there is no trap */
static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
uint8_t *vec_exc)
{
uint8_t vece_exc = 0, trap_exc;
unsigned qemu_exc;
/* Retrieve and clear the softfloat exceptions */
qemu_exc = env->fpu_status.float_exception_flags;
if (qemu_exc == 0) {
return 0;
}
env->fpu_status.float_exception_flags = 0;
vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
/* Add them to the vector-wide s390x exception bits */
*vec_exc |= vece_exc;
/* Check for traps and construct the VXC */
trap_exc = vece_exc & env->fpc >> 24;
if (trap_exc) {
if (trap_exc & S390_IEEE_MASK_INVALID) {
return enr << 4 | VIC_INVALID;
} else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
return enr << 4 | VIC_DIVBYZERO;
} else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
return enr << 4 | VIC_OVERFLOW;
} else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
return enr << 4 | VIC_UNDERFLOW;
} else if (!XxC) {
g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
/* inexact has lowest priority on traps */
return enr << 4 | VIC_INEXACT;
}
}
return 0;
}
static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
uintptr_t retaddr)
{
if (vxc) {
/* on traps, the fpc flags are not updated, instruction is suppressed */
tcg_s390_vector_exception(env, vxc, retaddr);
}
if (vec_exc) {
/* indicate exceptions for all elements combined */
env->fpc |= vec_exc << 16;
}
}
typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s);
static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
uintptr_t retaddr)
{
uint8_t vxc, vec_exc = 0;
S390Vector tmp = {};
int i, old_mode;
old_mode = s390_swap_bfp_rounding_mode(env, erm);
for (i = 0; i < 2; i++) {
const uint64_t a = s390_vec_read_element64(v2, i);
s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status));
vxc = check_ieee_exc(env, i, XxC, &vec_exc);
if (s || vxc) {
break;
}
}
s390_restore_bfp_rounding_mode(env, old_mode);
handle_ieee_exc(env, vxc, vec_exc, retaddr);
*v1 = tmp;
}
typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s);
static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
CPUS390XState *env, bool s, vop64_3_fn fn,
uintptr_t retaddr)
{
uint8_t vxc, vec_exc = 0;
S390Vector tmp = {};
int i;
for (i = 0; i < 2; i++) {
const uint64_t a = s390_vec_read_element64(v2, i);
const uint64_t b = s390_vec_read_element64(v3, i);
s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status));
vxc = check_ieee_exc(env, i, false, &vec_exc);
if (s || vxc) {
break;
}
}
handle_ieee_exc(env, vxc, vec_exc, retaddr);
*v1 = tmp;
}
static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s)
{
return float64_add(a, b, s);
}
void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, false, vfa64, GETPC());
}
void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, true, vfa64, GETPC());
}
static int wfc64(const S390Vector *v1, const S390Vector *v2,
CPUS390XState *env, bool signal, uintptr_t retaddr)
{
/* only the zero-indexed elements are compared */
const float64 a = s390_vec_read_element64(v1, 0);
const float64 b = s390_vec_read_element64(v2, 0);
uint8_t vxc, vec_exc = 0;
int cmp;
if (signal) {
cmp = float64_compare(a, b, &env->fpu_status);
} else {
cmp = float64_compare_quiet(a, b, &env->fpu_status);
}
vxc = check_ieee_exc(env, 0, false, &vec_exc);
handle_ieee_exc(env, vxc, vec_exc, retaddr);
return float_comp_to_cc(env, cmp);
}
void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
env->cc_op = wfc64(v1, v2, env, false, GETPC());
}
void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
env->cc_op = wfc64(v1, v2, env, true, GETPC());
}
typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status);
static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
{
uint8_t vxc, vec_exc = 0;
S390Vector tmp = {};
int match = 0;
int i;
for (i = 0; i < 2; i++) {
const float64 a = s390_vec_read_element64(v2, i);
const float64 b = s390_vec_read_element64(v3, i);
/* swap the order of the parameters, so we can use existing functions */
if (fn(b, a, &env->fpu_status)) {
match++;
s390_vec_write_element64(&tmp, i, -1ull);
}
vxc = check_ieee_exc(env, i, false, &vec_exc);
if (s || vxc) {
break;
}
}
handle_ieee_exc(env, vxc, vec_exc, retaddr);
*v1 = tmp;
if (match) {
return s || match == 2 ? 0 : 1;
}
return 3;
}
void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
}
void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
}
void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
}
void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
}
void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
}
void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
}
void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
}
void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
}
void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
}
void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
}
void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
}
void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
}
static uint64_t vcdg64(uint64_t a, float_status *s)
{
return int64_to_float64(a, s);
}
void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC());
}
void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC());
}
static uint64_t vcdlg64(uint64_t a, float_status *s)
{
return uint64_to_float64(a, s);
}
void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC());
}
void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC());
}
static uint64_t vcgd64(uint64_t a, float_status *s)
{
return float64_to_int64(a, s);
}
void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC());
}
void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC());
}
static uint64_t vclgd64(uint64_t a, float_status *s)
{
return float64_to_uint64(a, s);
}
void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC());
}
void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC());
}
static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s)
{
return float64_div(a, b, s);
}
void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, false, vfd64, GETPC());
}
void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, true, vfd64, GETPC());
}
static uint64_t vfi64(uint64_t a, float_status *s)
{
return float64_round_to_int(a, s);
}
void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC());
}
void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC());
}
static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
bool s, uintptr_t retaddr)
{
uint8_t vxc, vec_exc = 0;
S390Vector tmp = {};
int i;
for (i = 0; i < 2; i++) {
/* load from even element */
const float32 a = s390_vec_read_element32(v2, i * 2);
const uint64_t ret = float32_to_float64(a, &env->fpu_status);
s390_vec_write_element64(&tmp, i, ret);
/* indicate the source element */
vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
if (s || vxc) {
break;
}
}
handle_ieee_exc(env, vxc, vec_exc, retaddr);
*v1 = tmp;
}
void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
vfll32(v1, v2, env, false, GETPC());
}
void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
vfll32(v1, v2, env, true, GETPC());
}
static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
bool s, bool XxC, uint8_t erm, uintptr_t retaddr)
{
uint8_t vxc, vec_exc = 0;
S390Vector tmp = {};
int i, old_mode;
old_mode = s390_swap_bfp_rounding_mode(env, erm);
for (i = 0; i < 2; i++) {
float64 a = s390_vec_read_element64(v2, i);
uint32_t ret = float64_to_float32(a, &env->fpu_status);
/* place at even element */
s390_vec_write_element32(&tmp, i * 2, ret);
/* indicate the source element */
vxc = check_ieee_exc(env, i, XxC, &vec_exc);
if (s || vxc) {
break;
}
}
s390_restore_bfp_rounding_mode(env, old_mode);
handle_ieee_exc(env, vxc, vec_exc, retaddr);
*v1 = tmp;
}
void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vflr64(v1, v2, env, false, XxC, erm, GETPC());
}
void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
const uint8_t erm = extract32(simd_data(desc), 4, 4);
const bool XxC = extract32(simd_data(desc), 2, 1);
vflr64(v1, v2, env, true, XxC, erm, GETPC());
}
static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s)
{
return float64_mul(a, b, s);
}
void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, false, vfm64, GETPC());
}
void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, true, vfm64, GETPC());
}
static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
const S390Vector *v4, CPUS390XState *env, bool s, int flags,
uintptr_t retaddr)
{
uint8_t vxc, vec_exc = 0;
S390Vector tmp = {};
int i;
for (i = 0; i < 2; i++) {
const uint64_t a = s390_vec_read_element64(v2, i);
const uint64_t b = s390_vec_read_element64(v3, i);
const uint64_t c = s390_vec_read_element64(v4, i);
uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
s390_vec_write_element64(&tmp, i, ret);
vxc = check_ieee_exc(env, i, false, &vec_exc);
if (s || vxc) {
break;
}
}
handle_ieee_exc(env, vxc, vec_exc, retaddr);
*v1 = tmp;
}
void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
const void *v4, CPUS390XState *env, uint32_t desc)
{
vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
}
void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
const void *v4, CPUS390XState *env, uint32_t desc)
{
vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
}
void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
const void *v4, CPUS390XState *env, uint32_t desc)
{
vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
}
void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
const void *v4, CPUS390XState *env, uint32_t desc)
{
vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
}
static uint64_t vfsq64(uint64_t a, float_status *s)
{
return float64_sqrt(a, s);
}
void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC());
}
void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC());
}
static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s)
{
return float64_sub(a, b, s);
}
void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, false, vfs64, GETPC());
}
void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3,
CPUS390XState *env, uint32_t desc)
{
vop64_3(v1, v2, v3, env, true, vfs64, GETPC());
}
static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
bool s, uint16_t i3)
{
int i, match = 0;
for (i = 0; i < 2; i++) {
float64 a = s390_vec_read_element64(v2, i);
if (float64_dcmask(env, a) & i3) {
match++;
s390_vec_write_element64(v1, i, -1ull);
} else {
s390_vec_write_element64(v1, i, 0);
}
if (s) {
break;
}
}
if (match) {
return s || match == 2 ? 0 : 1;
}
return 3;
}
void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
env->cc_op = vftci64(v1, v2, env, false, simd_data(desc));
}
void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env,
uint32_t desc)
{
env->cc_op = vftci64(v1, v2, env, true, simd_data(desc));
}

View File

@ -0,0 +1,473 @@
/*
* QEMU TCG support -- s390x vector string instruction support
*
* Copyright (C) 2019 Red Hat Inc
*
* Authors:
* David Hildenbrand <david@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "cpu.h"
#include "internal.h"
#include "vec.h"
#include "tcg/tcg.h"
#include "tcg/tcg-gvec-desc.h"
#include "exec/helper-proto.h"
/*
* Returns a bit set in the MSB of each element that is zero,
* as defined by the mask.
*/
static inline uint64_t zero_search(uint64_t a, uint64_t mask)
{
return ~(((a & mask) + mask) | a | mask);
}
/*
* Returns a bit set in the MSB of each element that is not zero,
* as defined by the mask.
*/
static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
{
return (((a & mask) + mask) | a) & ~mask;
}
/*
* Returns the byte offset for the first match, or 16 for no match.
*/
static inline int match_index(uint64_t c0, uint64_t c1)
{
return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
}
/*
* Returns the number of bits composing one element.
*/
static uint8_t get_element_bits(uint8_t es)
{
return (1 << es) * BITS_PER_BYTE;
}
/*
* Returns the bitmask for a single element.
*/
static uint64_t get_single_element_mask(uint8_t es)
{
return -1ull >> (64 - get_element_bits(es));
}
/*
* Returns the bitmask for a single element (excluding the MSB).
*/
static uint64_t get_single_element_lsbs_mask(uint8_t es)
{
return -1ull >> (65 - get_element_bits(es));
}
/*
* Returns the bitmasks for multiple elements (excluding the MSBs).
*/
static uint64_t get_element_lsbs_mask(uint8_t es)
{
return dup_const(es, get_single_element_lsbs_mask(es));
}
static int vfae(void *v1, const void *v2, const void *v3, bool in,
bool rt, bool zs, uint8_t es)
{
const uint64_t mask = get_element_lsbs_mask(es);
const int bits = get_element_bits(es);
uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
uint64_t first_zero = 16;
uint64_t first_equal;
int i;
a0 = s390_vec_read_element64(v2, 0);
a1 = s390_vec_read_element64(v2, 1);
b0 = s390_vec_read_element64(v3, 0);
b1 = s390_vec_read_element64(v3, 1);
e0 = 0;
e1 = 0;
/* compare against equality with every other element */
for (i = 0; i < 64; i += bits) {
t0 = rol64(b0, i);
t1 = rol64(b1, i);
e0 |= zero_search(a0 ^ t0, mask);
e0 |= zero_search(a0 ^ t1, mask);
e1 |= zero_search(a1 ^ t0, mask);
e1 |= zero_search(a1 ^ t1, mask);
}
/* invert the result if requested - invert only the MSBs */
if (in) {
e0 = ~e0 & ~mask;
e1 = ~e1 & ~mask;
}
first_equal = match_index(e0, e1);
if (zs) {
z0 = zero_search(a0, mask);
z1 = zero_search(a1, mask);
first_zero = match_index(z0, z1);
}
if (rt) {
e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
s390_vec_write_element64(v1, 0, e0);
s390_vec_write_element64(v1, 1, e1);
} else {
s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
s390_vec_write_element64(v1, 1, 0);
}
if (first_zero == 16 && first_equal == 16) {
return 3; /* no match */
} else if (first_zero == 16) {
return 1; /* matching elements, no match for zero */
} else if (first_equal < first_zero) {
return 2; /* matching elements before match for zero */
}
return 0; /* match for zero */
}
#define DEF_VFAE_HELPER(BITS) \
void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool rt = extract32(simd_data(desc), 2, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
}
DEF_VFAE_HELPER(8)
DEF_VFAE_HELPER(16)
DEF_VFAE_HELPER(32)
#define DEF_VFAE_CC_HELPER(BITS) \
void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
CPUS390XState *env, uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool rt = extract32(simd_data(desc), 2, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
}
DEF_VFAE_CC_HELPER(8)
DEF_VFAE_CC_HELPER(16)
DEF_VFAE_CC_HELPER(32)
static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
{
const uint64_t mask = get_element_lsbs_mask(es);
uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
uint64_t first_zero = 16;
uint64_t first_equal;
a0 = s390_vec_read_element64(v2, 0);
a1 = s390_vec_read_element64(v2, 1);
b0 = s390_vec_read_element64(v3, 0);
b1 = s390_vec_read_element64(v3, 1);
e0 = zero_search(a0 ^ b0, mask);
e1 = zero_search(a1 ^ b1, mask);
first_equal = match_index(e0, e1);
if (zs) {
z0 = zero_search(a0, mask);
z1 = zero_search(a1, mask);
first_zero = match_index(z0, z1);
}
s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
s390_vec_write_element64(v1, 1, 0);
if (first_zero == 16 && first_equal == 16) {
return 3; /* no match */
} else if (first_zero == 16) {
return 1; /* matching elements, no match for zero */
} else if (first_equal < first_zero) {
return 2; /* matching elements before match for zero */
}
return 0; /* match for zero */
}
#define DEF_VFEE_HELPER(BITS) \
void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vfee(v1, v2, v3, zs, MO_##BITS); \
}
DEF_VFEE_HELPER(8)
DEF_VFEE_HELPER(16)
DEF_VFEE_HELPER(32)
#define DEF_VFEE_CC_HELPER(BITS) \
void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
CPUS390XState *env, uint32_t desc) \
{ \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
}
DEF_VFEE_CC_HELPER(8)
DEF_VFEE_CC_HELPER(16)
DEF_VFEE_CC_HELPER(32)
static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
{
const uint64_t mask = get_element_lsbs_mask(es);
uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
uint64_t first_zero = 16;
uint64_t first_inequal;
bool smaller = false;
a0 = s390_vec_read_element64(v2, 0);
a1 = s390_vec_read_element64(v2, 1);
b0 = s390_vec_read_element64(v3, 0);
b1 = s390_vec_read_element64(v3, 1);
e0 = nonzero_search(a0 ^ b0, mask);
e1 = nonzero_search(a1 ^ b1, mask);
first_inequal = match_index(e0, e1);
/* identify the smaller element */
if (first_inequal < 16) {
uint8_t enr = first_inequal / (1 << es);
uint32_t a = s390_vec_read_element(v2, enr, es);
uint32_t b = s390_vec_read_element(v3, enr, es);
smaller = a < b;
}
if (zs) {
z0 = zero_search(a0, mask);
z1 = zero_search(a1, mask);
first_zero = match_index(z0, z1);
}
s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
s390_vec_write_element64(v1, 1, 0);
if (first_zero == 16 && first_inequal == 16) {
return 3;
} else if (first_zero < first_inequal) {
return 0;
}
return smaller ? 1 : 2;
}
#define DEF_VFENE_HELPER(BITS) \
void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
{ \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vfene(v1, v2, v3, zs, MO_##BITS); \
}
DEF_VFENE_HELPER(8)
DEF_VFENE_HELPER(16)
DEF_VFENE_HELPER(32)
#define DEF_VFENE_CC_HELPER(BITS) \
void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
CPUS390XState *env, uint32_t desc) \
{ \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
}
DEF_VFENE_CC_HELPER(8)
DEF_VFENE_CC_HELPER(16)
DEF_VFENE_CC_HELPER(32)
static int vistr(void *v1, const void *v2, uint8_t es)
{
const uint64_t mask = get_element_lsbs_mask(es);
uint64_t a0 = s390_vec_read_element64(v2, 0);
uint64_t a1 = s390_vec_read_element64(v2, 1);
uint64_t z;
int cc = 3;
z = zero_search(a0, mask);
if (z) {
a0 &= ~(-1ull >> clz64(z));
a1 = 0;
cc = 0;
} else {
z = zero_search(a1, mask);
if (z) {
a1 &= ~(-1ull >> clz64(z));
cc = 0;
}
}
s390_vec_write_element64(v1, 0, a0);
s390_vec_write_element64(v1, 1, a1);
return cc;
}
#define DEF_VISTR_HELPER(BITS) \
void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \
{ \
vistr(v1, v2, MO_##BITS); \
}
DEF_VISTR_HELPER(8)
DEF_VISTR_HELPER(16)
DEF_VISTR_HELPER(32)
#define DEF_VISTR_CC_HELPER(BITS) \
void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
uint32_t desc) \
{ \
env->cc_op = vistr(v1, v2, MO_##BITS); \
}
DEF_VISTR_CC_HELPER(8)
DEF_VISTR_CC_HELPER(16)
DEF_VISTR_CC_HELPER(32)
static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
{
const bool equal = extract32(c, 7, 1);
const bool lower = extract32(c, 6, 1);
const bool higher = extract32(c, 5, 1);
if (data < l) {
return lower;
} else if (data > l) {
return higher;
}
return equal;
}
static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
bool in, bool rt, bool zs, uint8_t es)
{
const uint64_t mask = get_element_lsbs_mask(es);
uint64_t a0 = s390_vec_read_element64(v2, 0);
uint64_t a1 = s390_vec_read_element64(v2, 1);
int first_zero = 16, first_match = 16;
S390Vector rt_result = {};
uint64_t z0, z1;
int i, j;
if (zs) {
z0 = zero_search(a0, mask);
z1 = zero_search(a1, mask);
first_zero = match_index(z0, z1);
}
for (i = 0; i < 16 / (1 << es); i++) {
const uint32_t data = s390_vec_read_element(v2, i, es);
const int cur_byte = i * (1 << es);
bool any_match = false;
/* if we don't need a bit vector, we can stop early */
if (cur_byte == first_zero && !rt) {
break;
}
for (j = 0; j < 16 / (1 << es); j += 2) {
const uint32_t l1 = s390_vec_read_element(v3, j, es);
const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
/* we are only interested in the highest byte of each element */
const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
if (element_compare(data, l1, c1) &&
element_compare(data, l2, c2)) {
any_match = true;
break;
}
}
/* invert the result if requested */
any_match = in ^ any_match;
if (any_match) {
/* indicate bit vector if requested */
if (rt) {
const uint64_t val = -1ull;
first_match = MIN(cur_byte, first_match);
s390_vec_write_element(&rt_result, i, es, val);
} else {
/* stop on the first match */
first_match = cur_byte;
break;
}
}
}
if (rt) {
*(S390Vector *)v1 = rt_result;
} else {
s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
s390_vec_write_element64(v1, 1, 0);
}
if (first_zero == 16 && first_match == 16) {
return 3; /* no match */
} else if (first_zero == 16) {
return 1; /* matching elements, no match for zero */
} else if (first_match < first_zero) {
return 2; /* matching elements before match for zero */
}
return 0; /* match for zero */
}
#define DEF_VSTRC_HELPER(BITS) \
void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
}
DEF_VSTRC_HELPER(8)
DEF_VSTRC_HELPER(16)
DEF_VSTRC_HELPER(32)
#define DEF_VSTRC_RT_HELPER(BITS) \
void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
}
DEF_VSTRC_RT_HELPER(8)
DEF_VSTRC_RT_HELPER(16)
DEF_VSTRC_RT_HELPER(32)
#define DEF_VSTRC_CC_HELPER(BITS) \
void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, CPUS390XState *env, \
uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
}
DEF_VSTRC_CC_HELPER(8)
DEF_VSTRC_CC_HELPER(16)
DEF_VSTRC_CC_HELPER(32)
#define DEF_VSTRC_CC_RT_HELPER(BITS) \
void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
const void *v4, CPUS390XState *env, \
uint32_t desc) \
{ \
const bool in = extract32(simd_data(desc), 3, 1); \
const bool zs = extract32(simd_data(desc), 1, 1); \
\
env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
}
DEF_VSTRC_CC_RT_HELPER(8)
DEF_VSTRC_CC_RT_HELPER(16)
DEF_VSTRC_CC_RT_HELPER(32)