From 9c49c83e4b23d31676633a1189faa6e70b489c01 Mon Sep 17 00:00:00 2001 From: Eden Mikitas Date: Tue, 2 Jun 2020 13:44:34 +0100 Subject: [PATCH 01/29] hw/ssi/imx_spi: changed while statement to prevent underflow The while statement in question only checked if tx_burst is not 0. tx_burst is a signed int, which is assigned the value put by the guest driver in ECSPI_CONREG. The burst length can be anywhere between 1 and 4096, and since tx_burst is always decremented by 8 it could possibly underflow, causing an infinite loop. Signed-off-by: Eden Mikitas Reviewed-by: Alistair Francis Signed-off-by: Peter Maydell --- hw/ssi/imx_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c index 2dd9a631e1..6fef5c7958 100644 --- a/hw/ssi/imx_spi.c +++ b/hw/ssi/imx_spi.c @@ -182,7 +182,7 @@ static void imx_spi_flush_txfifo(IMXSPIState *s) rx = 0; - while (tx_burst) { + while (tx_burst > 0) { uint8_t byte = tx & 0xff; DPRINTF("writing 0x%02x\n", (uint32_t)byte); From 6d686145c86c9712db1547f66bebb7131979c61b Mon Sep 17 00:00:00 2001 From: Eden Mikitas Date: Tue, 2 Jun 2020 13:44:34 +0100 Subject: [PATCH 02/29] hw/ssi/imx_spi: Removed unnecessary cast of rx data received from slave When inserting the value retrieved (rx) from the spi slave, rx is pushed to rx_fifo after being cast to uint8_t. rx_fifo is a fifo32, and the rx register the driver uses is also 32 bit. This zeroes the 24 most significant bits of rx. This proved problematic with devices that expect to use the whole 32 bits of the rx register. Signed-off-by: Eden Mikitas Reviewed-by: Alistair Francis Signed-off-by: Peter Maydell --- hw/ssi/imx_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c index 6fef5c7958..43b2f14dd2 100644 --- a/hw/ssi/imx_spi.c +++ b/hw/ssi/imx_spi.c @@ -206,7 +206,7 @@ static void imx_spi_flush_txfifo(IMXSPIState *s) if (fifo32_is_full(&s->rx_fifo)) { s->regs[ECSPI_STATREG] |= ECSPI_STATREG_RO; } else { - fifo32_push(&s->rx_fifo, (uint8_t)rx); + fifo32_push(&s->rx_fifo, rx); } if (s->burst_length <= 0) { From 3a37f23979ac8179dd297b45cd23020a610002ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 2 Jun 2020 13:44:34 +0100 Subject: [PATCH 03/29] hw/input/pxa2xx_keypad: Replace hw_error() by qemu_log_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hw_error() calls exit(). This a bit overkill when we can log the accesses as unimplemented or guest error. When fuzzing the devices, we don't want the whole process to exit. Replace some hw_error() calls by qemu_log_mask() (missed in commit 5a0001ec7e). Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200525114123.21317-2-f4bug@amsat.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/input/pxa2xx_keypad.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hw/input/pxa2xx_keypad.c b/hw/input/pxa2xx_keypad.c index 31862a7d16..62aa6f6b15 100644 --- a/hw/input/pxa2xx_keypad.c +++ b/hw/input/pxa2xx_keypad.c @@ -12,7 +12,7 @@ */ #include "qemu/osdep.h" -#include "hw/hw.h" +#include "qemu/log.h" #include "hw/irq.h" #include "migration/vmstate.h" #include "hw/arm/pxa.h" @@ -233,7 +233,9 @@ static uint64_t pxa2xx_keypad_read(void *opaque, hwaddr offset, return s->kpkdi; break; default: - hw_error("%s: Bad offset " REG_FMT "\n", __func__, offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } return 0; @@ -280,7 +282,9 @@ static void pxa2xx_keypad_write(void *opaque, hwaddr offset, break; default: - hw_error("%s: Bad offset " REG_FMT "\n", __func__, offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } } From fc417e5b5784eec92163ad36140ab029c6661b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 2 Jun 2020 13:44:35 +0100 Subject: [PATCH 04/29] hw/arm/pxa2xx: Replace printf() call by qemu_log_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace printf() calls by qemu_log_mask(), which is disabled by default. This avoid flooding the terminal when fuzzing the device. Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200525114123.21317-3-f4bug@amsat.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/pxa2xx.c | 66 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c index 336c9bad4a..e649f8930c 100644 --- a/hw/arm/pxa2xx.c +++ b/hw/arm/pxa2xx.c @@ -26,6 +26,7 @@ #include "sysemu/blockdev.h" #include "sysemu/qtest.h" #include "qemu/cutils.h" +#include "qemu/log.h" static struct { hwaddr io_base; @@ -112,7 +113,9 @@ static uint64_t pxa2xx_pm_read(void *opaque, hwaddr addr, return s->pm_regs[addr >> 2]; default: fail: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -143,8 +146,9 @@ static void pxa2xx_pm_write(void *opaque, hwaddr addr, s->pm_regs[addr >> 2] = value; break; } - - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -185,7 +189,9 @@ static uint64_t pxa2xx_cm_read(void *opaque, hwaddr addr, return s->cm_regs[CCCR >> 2] | (3 << 28); default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -210,7 +216,9 @@ static void pxa2xx_cm_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -415,7 +423,9 @@ static uint64_t pxa2xx_mm_read(void *opaque, hwaddr addr, return s->mm_regs[addr >> 2]; /* fall through */ default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -434,7 +444,9 @@ static void pxa2xx_mm_write(void *opaque, hwaddr addr, } default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -641,7 +653,9 @@ static uint64_t pxa2xx_ssp_read(void *opaque, hwaddr addr, case SSACD: return s->ssacd; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -733,7 +747,9 @@ static void pxa2xx_ssp_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } } @@ -995,7 +1011,9 @@ static uint64_t pxa2xx_rtc_read(void *opaque, hwaddr addr, else return s->last_swcr; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1101,7 +1119,9 @@ static void pxa2xx_rtc_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } @@ -1354,7 +1374,9 @@ static uint64_t pxa2xx_i2c_read(void *opaque, hwaddr addr, s->ibmr = 0; return s->ibmr; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1427,7 +1449,9 @@ static void pxa2xx_i2c_write(void *opaque, hwaddr addr, break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } @@ -1628,7 +1652,9 @@ static uint64_t pxa2xx_i2s_read(void *opaque, hwaddr addr, } return 0; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1685,7 +1711,9 @@ static void pxa2xx_i2s_write(void *opaque, hwaddr addr, } break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } @@ -1870,7 +1898,9 @@ static uint64_t pxa2xx_fir_read(void *opaque, hwaddr addr, case ICFOR: return s->rx_len; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, addr); break; } return 0; @@ -1922,7 +1952,9 @@ static void pxa2xx_fir_write(void *opaque, hwaddr addr, case ICFOR: break; default: - printf("%s: Bad register " REG_FMT "\n", __func__, addr); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, addr); } } From a04b68e1d4c4f0cd5cd7542697b1b230b84532f5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 May 2020 14:28:26 -0700 Subject: [PATCH 05/29] target/arm: Convert aes and sm4 to gvec helpers With this conversion, we will be able to use the same helpers with sve. In particular, pass 3 vector parameters for the 3-operand operations; for advsimd the destination register is also an input. This also fixes a bug in which we failed to clear the high bits of the SVE register after an AdvSIMD operation. Signed-off-by: Richard Henderson Message-id: 20200514212831.31248-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/crypto_helper.c | 72 +++++++++++++++++++++++++++----------- target/arm/helper.h | 6 ++-- target/arm/translate-a64.c | 55 ++++++++++++++++++----------- target/arm/translate.c | 27 +++++++------- target/arm/vec_helper.c | 12 +------ target/arm/vec_internal.h | 33 +++++++++++++++++ 6 files changed, 138 insertions(+), 67 deletions(-) create mode 100644 target/arm/vec_internal.h diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index f800266727..6bd5a3d2d0 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -13,7 +13,9 @@ #include "cpu.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "crypto/aes.h" +#include "vec_internal.h" union CRYPTO_STATE { uint8_t bytes[16]; @@ -29,18 +31,15 @@ union CRYPTO_STATE { #define CR_ST_WORD(state, i) (state.words[i]) #endif -void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) +static void do_crypto_aese(uint64_t *rd, uint64_t *rn, + uint64_t *rm, bool decrypt) { static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; - union CRYPTO_STATE st = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; int i; - assert(decrypt < 2); - /* xor state vector with round key */ rk.l[0] ^= st.l[0]; rk.l[1] ^= st.l[1]; @@ -54,7 +53,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } -void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) +void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aese(vd + i, vn + i, vm + i, decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) { static uint32_t const mc[][256] = { { /* MixColumns lookup table */ @@ -190,13 +200,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; int i; - assert(decrypt < 2); - for (i = 0; i < 16; i += 4) { CR_ST_WORD(st, i >> 2) = mc[decrypt][CR_ST_BYTE(st, i)] ^ @@ -209,6 +215,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aesmc(vd + i, vm + i, decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + /* * SHA-1 logical functions */ @@ -638,12 +655,10 @@ static uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; -void HELPER(crypto_sm4e)(void *vd, void *vn) +static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; - union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; uint32_t t, i; for (i = 0; i < 4; i++) { @@ -665,11 +680,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn) rd[1] = d.l[1]; } -void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) +void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4e(vd + i, vn + i, vm + i); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; @@ -693,3 +715,13 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) rd[0] = d.l[0]; rd[1] = d.l[1]; } + +void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4ekey(vd + i, vn + i, vm + i); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/target/arm/helper.h b/target/arm/helper.h index 49336dc432..42759f82aa 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -510,7 +510,7 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -531,8 +531,8 @@ DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 874f3eb4f9..b3f4223006 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -571,6 +571,15 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, is_q ? 16 : 8, vec_full_reg_size(s)); } +/* Expand a 2-operand operation using an out-of-line helper. */ +static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, + int rn, int data, gen_helper_gvec_2 *fn) +{ + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + is_q ? 16 : 8, vec_full_reg_size(s), data, fn); +} + /* Expand a 3-operand operation using an out-of-line helper. */ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, int data, gen_helper_gvec_3 *fn) @@ -13403,9 +13412,8 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); int decrypt; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - TCGv_i32 tcg_decrypt; - CryptoThreeOpIntFn *genfn; + gen_helper_gvec_2 *genfn2 = NULL; + gen_helper_gvec_3 *genfn3 = NULL; if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); @@ -13415,19 +13423,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) switch (opcode) { case 0x4: /* AESE */ decrypt = 0; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x6: /* AESMC */ decrypt = 0; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; case 0x5: /* AESD */ decrypt = 1; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x7: /* AESIMC */ decrypt = 1; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; default: unallocated_encoding(s); @@ -13437,16 +13445,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_decrypt = tcg_const_i32(decrypt); - - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_i32(tcg_decrypt); + if (genfn2) { + gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); + } else { + gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); + } } /* Crypto three-reg SHA @@ -13595,7 +13598,8 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn; + CryptoThreeOpFn *genfn = NULL; + gen_helper_gvec_3 *oolfn = NULL; if (o == 0) { switch (opcode) { @@ -13630,7 +13634,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4ekey; + oolfn = gen_helper_crypto_sm4ekey; break; default: unallocated_encoding(s); @@ -13647,6 +13651,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) return; } + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); + return; + } + if (genfn) { TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; @@ -13699,6 +13708,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; CryptoTwoOpFn *genfn; + gen_helper_gvec_3 *oolfn = NULL; switch (opcode) { case 0: /* SHA512SU0 */ @@ -13707,7 +13717,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4e; + oolfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -13723,6 +13733,11 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, oolfn); + return; + } + tcg_rd_ptr = vec_full_reg_ptr(s, rd); tcg_rn_ptr = vec_full_reg_ptr(s, rn); diff --git a/target/arm/translate.c b/target/arm/translate.c index c8296116d4..74c1b5be42 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -6350,22 +6350,23 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1; } - ptr1 = vfp_reg_ptr(true, rd); - ptr2 = vfp_reg_ptr(true, rm); - - /* Bit 6 is the lowest opcode bit; it distinguishes between - * encryption (AESE/AESMC) and decryption (AESD/AESIMC) - */ - tmp3 = tcg_const_i32(extract32(insn, 6, 1)); - + /* + * Bit 6 is the lowest opcode bit; it distinguishes + * between encryption (AESE/AESMC) and decryption + * (AESD/AESIMC). + */ if (op == NEON_2RM_AESE) { - gen_helper_crypto_aese(ptr1, ptr2, tmp3); + tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd), + vfp_reg_offset(true, rd), + vfp_reg_offset(true, rm), + 16, 16, extract32(insn, 6, 1), + gen_helper_crypto_aese); } else { - gen_helper_crypto_aesmc(ptr1, ptr2, tmp3); + tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd), + vfp_reg_offset(true, rm), + 16, 16, extract32(insn, 6, 1), + gen_helper_crypto_aesmc); } - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_i32(tmp3); break; case NEON_2RM_SHA1H: if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 50a499299f..7d76412ee0 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -22,7 +22,7 @@ #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" - +#include "vec_internal.h" /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ @@ -36,16 +36,6 @@ #define H4(x) (x) #endif -static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) -{ - uint64_t *d = vd + opr_sz; - uintptr_t i; - - for (i = opr_sz; i < max_sz; i += 8) { - *d++ = 0; - } -} - /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, int16_t src3, uint32_t *sat) diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h new file mode 100644 index 0000000000..00a8277765 --- /dev/null +++ b/target/arm/vec_internal.h @@ -0,0 +1,33 @@ +/* + * ARM AdvSIMD / SVE Vector Helpers + * + * Copyright (c) 2020 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef TARGET_ARM_VEC_INTERNALS_H +#define TARGET_ARM_VEC_INTERNALS_H + +static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) +{ + uint64_t *d = vd + opr_sz; + uintptr_t i; + + for (i = opr_sz; i < max_sz; i += 8) { + *d++ = 0; + } +} + +#endif /* TARGET_ARM_VEC_INTERNALS_H */ From 1738860d7e60dec5dbeba17f8b44d31aae3accac Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 May 2020 14:28:27 -0700 Subject: [PATCH 06/29] target/arm: Convert rax1 to gvec helpers With this conversion, we will be able to use the same helpers with sve. This also fixes a bug in which we failed to clear the high bits of the SVE register after an AdvSIMD operation. Signed-off-by: Richard Henderson Message-id: 20200514212831.31248-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/crypto_helper.c | 11 +++++++ target/arm/helper.h | 2 ++ target/arm/translate-a64.c | 59 ++++++++++++++++++++------------------ target/arm/translate-a64.h | 3 ++ 4 files changed, 47 insertions(+), 28 deletions(-) diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 6bd5a3d2d0..372d8350e4 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -725,3 +725,14 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) } clear_tail(vd, opr_sz, simd_maxsz(desc)); } + +void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] = n[i] ^ rol64(m[i], 1); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/target/arm/helper.h b/target/arm/helper.h index 42759f82aa..6c4eb9befb 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -534,6 +534,8 @@ DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b3f4223006..45c797f8fc 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13584,6 +13584,32 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rn_ptr); } +static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_rotli_i64(d, m, 1); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(vece, d, m, 1); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); +} + /* Crypto three-reg SHA512 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 * +-----------------------+------+---+---+-----+--------+------+------+ @@ -13600,6 +13626,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) bool feature; CryptoThreeOpFn *genfn = NULL; gen_helper_gvec_3 *oolfn = NULL; + GVecGen3Fn *gvecfn = NULL; if (o == 0) { switch (opcode) { @@ -13617,7 +13644,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); - genfn = NULL; + gvecfn = gen_gvec_rax1; break; default: g_assert_not_reached(); @@ -13653,10 +13680,9 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) if (oolfn) { gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); - return; - } - - if (genfn) { + } else if (gvecfn) { + gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); + } else { TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; tcg_rd_ptr = vec_full_reg_ptr(s, rd); @@ -13668,29 +13694,6 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rd_ptr); tcg_temp_free_ptr(tcg_rn_ptr); tcg_temp_free_ptr(tcg_rm_ptr); - } else { - TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(); - tcg_op2 = tcg_temp_new_i64(); - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = tcg_temp_new_i64(); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1); - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res[0]); - tcg_temp_free_i64(tcg_res[1]); } } diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index f02fbb63a4..da0f59a2ce 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -115,4 +115,7 @@ static inline int vec_full_reg_size(DisasContext *s) bool disas_sve(DisasContext *, uint32_t); +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + #endif /* TARGET_ARM_TRANSLATE_A64_H */ From aaffebd6d3135b8aed7e61932af53b004d261579 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 May 2020 14:28:28 -0700 Subject: [PATCH 07/29] target/arm: Convert sha512 and sm3 to gvec helpers Do not yet convert the helpers to loop over opr_sz, but the descriptor allows the vector tail to be cleared. Which fixes an existing bug vs SVE. Signed-off-by: Richard Henderson Message-id: 20200514212831.31248-4-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/crypto_helper.c | 37 +++++++++++++++++++++++----- target/arm/helper.h | 15 +++++++----- target/arm/translate-a64.c | 50 ++++++++++++-------------------------- 3 files changed, 55 insertions(+), 47 deletions(-) diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 372d8350e4..637e4c00bb 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -31,6 +31,19 @@ union CRYPTO_STATE { #define CR_ST_WORD(state, i) (state.words[i]) #endif +/* + * The caller has not been converted to full gvec, and so only + * modifies the low 16 bytes of the vector register. + */ +static void clear_tail_16(void *vd, uint32_t desc) +{ + int opr_sz = simd_oprsz(desc); + int max_sz = simd_maxsz(desc); + + assert(opr_sz == 16); + clear_tail(vd, opr_sz, max_sz); +} + static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm, bool decrypt) { @@ -470,7 +483,7 @@ static uint64_t s1_512(uint64_t x) return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); } -void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -483,9 +496,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -498,9 +513,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su0)(void *vd, void *vn) +void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -512,9 +529,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -522,9 +541,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) rd[0] += s1_512(rn[0]) + rm[0]; rd[1] += s1_512(rn[1]) + rm[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -548,9 +569,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -568,6 +591,8 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, diff --git a/target/arm/helper.h b/target/arm/helper.h index 6c4eb9befb..784dc29ce2 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -522,14 +522,17 @@ DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) -DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 45c797f8fc..2d24cfbe2f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13624,7 +13624,6 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn = NULL; gen_helper_gvec_3 *oolfn = NULL; GVecGen3Fn *gvecfn = NULL; @@ -13632,15 +13631,15 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA512H */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h; + oolfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h2; + oolfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su1; + oolfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); @@ -13653,11 +13652,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SM3PARTW1 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw1; + oolfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw2; + oolfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); @@ -13680,20 +13679,8 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) if (oolfn) { gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); - } else if (gvecfn) { - gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } else { - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); + gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } } @@ -13708,19 +13695,14 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 10, 2); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; - CryptoTwoOpFn *genfn; - gen_helper_gvec_3 *oolfn = NULL; switch (opcode) { case 0: /* SHA512SU0 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - oolfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -13736,18 +13718,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (oolfn) { - gen_gvec_op3_ool(s, true, rd, rd, rn, 0, oolfn); - return; + switch (opcode) { + case 0: /* SHA512SU0 */ + gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); + break; + case 1: /* SM4E */ + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); + break; + default: + g_assert_not_reached(); } - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - - genfn(tcg_rd_ptr, tcg_rn_ptr); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); } /* Crypto four-register From effa992f153f5e7ab97ab843b565690748c5b402 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 May 2020 14:28:29 -0700 Subject: [PATCH 08/29] target/arm: Convert sha1 and sha256 to gvec helpers Do not yet convert the helpers to loop over opr_sz, but the descriptor allows the vector tail to be cleared. Which fixes an existing bug vs SVE. Signed-off-by: Richard Henderson Message-id: 20200514212831.31248-5-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/crypto_helper.c | 24 +++++-- target/arm/helper.h | 12 ++-- target/arm/neon-dp.decode | 12 ++-- target/arm/translate-a64.c | 34 ++++----- target/arm/translate-neon.inc.c | 124 +++++--------------------------- target/arm/translate.c | 24 ++----- 6 files changed, 67 insertions(+), 163 deletions(-) diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 637e4c00bb..7124745c32 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -303,7 +303,7 @@ void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) rd[1] = d.l[1]; } -void HELPER(crypto_sha1h)(void *vd, void *vm) +void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -314,9 +314,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm) rd[0] = m.l[0]; rd[1] = m.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha1su1)(void *vd, void *vm) +void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -330,6 +332,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -357,7 +361,7 @@ static uint32_t s1(uint32_t x) return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); } -void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -388,9 +392,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -413,9 +419,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su0)(void *vd, void *vm) +void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -429,9 +437,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -447,6 +457,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* diff --git a/target/arm/helper.h b/target/arm/helper.h index 784dc29ce2..cee23adbfc 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -514,13 +514,13 @@ DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 8beb1db768..5b2fc65d72 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -165,14 +165,14 @@ VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0 VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same +@3same_crypto .... .... .... .... .... .... .... .... \ + &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 q=1 + SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \ vm=%vm_dp vn=%vn_dp vd=%vd_dp -SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp -SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp -SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp +SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 2d24cfbe2f..5a4f8196bd 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13465,8 +13465,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoThreeOpFn *genfn; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; + gen_helper_gvec_3 *genfn; bool feature; if (size != 0) { @@ -13508,23 +13507,22 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - if (genfn) { - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); } else { TCGv_i32 tcg_opcode = tcg_const_i32(opcode); + TCGv_ptr tcg_rd_ptr = vec_full_reg_ptr(s, rd); + TCGv_ptr tcg_rn_ptr = vec_full_reg_ptr(s, rn); + TCGv_ptr tcg_rm_ptr = vec_full_reg_ptr(s, rm); gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_opcode); - tcg_temp_free_i32(tcg_opcode); - } - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); + tcg_temp_free_i32(tcg_opcode); + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); + tcg_temp_free_ptr(tcg_rm_ptr); + } } /* Crypto two-reg SHA @@ -13539,9 +13537,8 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoTwoOpFn *genfn; + gen_helper_gvec_2 *genfn; bool feature; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { unallocated_encoding(s); @@ -13574,14 +13571,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - - genfn(tcg_rd_ptr, tcg_rn_ptr); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); + gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); } static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 3fe65a0b08..205877ca48 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -661,12 +661,14 @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE) DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) DO_3SAME_CMP(VCEQ, TCG_COND_EQ) -static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) -{ - tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, - 0, gen_helper_gvec_pmul_b); -} +#define WRAP_OOL_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ + } + +WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) { @@ -728,107 +730,19 @@ static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a) return true; } -static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha2, s)) { - return false; +#define DO_SHA2(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha2, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ } - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - gen_helper_crypto_sha256h(ptr1, ptr2, ptr3); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} - -static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha2, s)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} - -static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha2, s)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} +DO_SHA2(SHA256H, gen_helper_crypto_sha256h) +DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) +DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) #define DO_3SAME_64(INSN, FUNC) \ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ diff --git a/target/arm/translate.c b/target/arm/translate.c index 74c1b5be42..c61180ea61 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5257,7 +5257,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int vec_size; uint32_t imm; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; - TCGv_ptr ptr1, ptr2; + TCGv_ptr ptr1; TCGv_i64 tmp64; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -6372,13 +6372,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { return 1; } - ptr1 = vfp_reg_ptr(true, rd); - ptr2 = vfp_reg_ptr(true, rm); - - gen_helper_crypto_sha1h(ptr1, ptr2); - - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); + tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0, + gen_helper_crypto_sha1h); break; case NEON_2RM_SHA1SU1: if ((rm | rd) & 1) { @@ -6392,17 +6387,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } else if (!dc_isar_feature(aa32_sha1, s)) { return 1; } - ptr1 = vfp_reg_ptr(true, rd); - ptr2 = vfp_reg_ptr(true, rm); - if (q) { - gen_helper_crypto_sha256su0(ptr1, ptr2); - } else { - gen_helper_crypto_sha1su1(ptr1, ptr2); - } - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); + tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0, + q ? gen_helper_crypto_sha256su0 + : gen_helper_crypto_sha1su1); break; - case NEON_2RM_VMVN: tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size); break; From afc8b7d32668547308bdd654a63cf5228936e0ba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 May 2020 14:28:30 -0700 Subject: [PATCH 09/29] target/arm: Split helper_crypto_sha1_3reg Rather than passing an opcode to a helper, fully decode the operation at translate time. Use clear_tail_16 to zap the balance of the SVE register with the AdvSIMD write. Signed-off-by: Richard Henderson Message-id: 20200514212831.31248-6-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/crypto_helper.c | 99 +++++++++++++++++++++------------ target/arm/helper.h | 5 +- target/arm/neon-dp.decode | 6 +- target/arm/translate-a64.c | 29 ++++------ target/arm/translate-neon.inc.c | 46 ++++----------- 5 files changed, 93 insertions(+), 92 deletions(-) diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 7124745c32..636683d0f1 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -24,11 +24,11 @@ union CRYPTO_STATE { }; #ifdef HOST_WORDS_BIGENDIAN -#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) -#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) +#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) +#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) #else -#define CR_ST_BYTE(state, i) (state.bytes[i]) -#define CR_ST_WORD(state, i) (state.words[i]) +#define CR_ST_BYTE(state, i) ((state).bytes[i]) +#define CR_ST_WORD(state, i) ((state).words[i]) #endif /* @@ -258,49 +258,74 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) return (x & y) | ((x | y) & z); } -void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) +void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) +{ + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t d0, d1; + + d0 = d[1] ^ d[0] ^ m[0]; + d1 = n[0] ^ d[1] ^ m[1]; + d[0] = d0; + d[1] = d1; + + clear_tail_16(vd, desc); +} + +static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, + uint64_t *rm, uint32_t desc, + uint32_t (*fn)(union CRYPTO_STATE *d)) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + int i; - if (op == 3) { /* sha1su0 */ - d.l[0] ^= d.l[1] ^ m.l[0]; - d.l[1] ^= n.l[0] ^ m.l[1]; - } else { - int i; + for (i = 0; i < 4; i++) { + uint32_t t = fn(&d); - for (i = 0; i < 4; i++) { - uint32_t t; + t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) + + CR_ST_WORD(m, i); - switch (op) { - case 0: /* sha1c */ - t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 1: /* sha1p */ - t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 2: /* sha1m */ - t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - default: - g_assert_not_reached(); - } - t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) - + CR_ST_WORD(m, i); - - CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); - CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); - CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); - CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); - CR_ST_WORD(d, 0) = t; - } + CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = t; } rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); +} + +static uint32_t do_sha1c(union CRYPTO_STATE *d) +{ + return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); +} + +static uint32_t do_sha1p(union CRYPTO_STATE *d) +{ + return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); +} + +static uint32_t do_sha1m(union CRYPTO_STATE *d) +{ + return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); } void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) diff --git a/target/arm/helper.h b/target/arm/helper.h index cee23adbfc..13475ecf81 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -513,7 +513,10 @@ DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 5b2fc65d72..8af7c53d8b 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -168,8 +168,10 @@ VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same @3same_crypto .... .... .... .... .... .... .... .... \ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 q=1 -SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \ - vm=%vm_dp vn=%vn_dp vd=%vd_dp +SHA1C_3s 1111 001 0 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA1P_3s 1111 001 0 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA1M_3s 1111 001 0 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto +SHA1SU0_3s 1111 001 0 0 . 11 .... .... 1100 . 1 . 0 .... @3same_crypto SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 5a4f8196bd..3f28888cd2 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13475,10 +13475,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1C */ + genfn = gen_helper_crypto_sha1c; + feature = dc_isar_feature(aa64_sha1, s); + break; case 1: /* SHA1P */ + genfn = gen_helper_crypto_sha1p; + feature = dc_isar_feature(aa64_sha1, s); + break; case 2: /* SHA1M */ + genfn = gen_helper_crypto_sha1m; + feature = dc_isar_feature(aa64_sha1, s); + break; case 3: /* SHA1SU0 */ - genfn = NULL; + genfn = gen_helper_crypto_sha1su0; feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ @@ -13506,23 +13515,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } - - if (genfn) { - gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); - } else { - TCGv_i32 tcg_opcode = tcg_const_i32(opcode); - TCGv_ptr tcg_rd_ptr = vec_full_reg_ptr(s, rd); - TCGv_ptr tcg_rn_ptr = vec_full_reg_ptr(s, rn); - TCGv_ptr tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr, - tcg_rm_ptr, tcg_opcode); - - tcg_temp_free_i32(tcg_opcode); - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); - } + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); } /* Crypto two-reg SHA diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 205877ca48..7b19753c8c 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -693,42 +693,20 @@ static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) -static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a) -{ - TCGv_ptr ptr1, ptr2, ptr3; - TCGv_i32 tmp; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON) || - !dc_isar_feature(aa32_sha1, s)) { - return false; +#define DO_SHA1(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha1, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ } - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if ((a->vn | a->vm | a->vd) & 1) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - ptr1 = vfp_reg_ptr(true, a->vd); - ptr2 = vfp_reg_ptr(true, a->vn); - ptr3 = vfp_reg_ptr(true, a->vm); - tmp = tcg_const_i32(a->optype); - gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp); - tcg_temp_free_i32(tmp); - tcg_temp_free_ptr(ptr1); - tcg_temp_free_ptr(ptr2); - tcg_temp_free_ptr(ptr3); - - return true; -} +DO_SHA1(SHA1C, gen_helper_crypto_sha1c) +DO_SHA1(SHA1P, gen_helper_crypto_sha1p) +DO_SHA1(SHA1M, gen_helper_crypto_sha1m) +DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) #define DO_SHA2(NAME, FUNC) \ WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ From 43fa36c96c24349145497adc1b451f9caf74e344 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 May 2020 14:28:31 -0700 Subject: [PATCH 10/29] target/arm: Split helper_crypto_sm3tt Rather than passing an opcode to a helper, fully decode the operation at translate time. Use clear_tail_16 to zap the balance of the SVE register with the AdvSIMD write. Signed-off-by: Richard Henderson Message-id: 20200514212831.31248-7-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/crypto_helper.c | 24 ++++++++++++++++++------ target/arm/helper.h | 5 ++++- target/arm/translate-a64.c | 21 +++++---------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 636683d0f1..c76806dc8d 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -632,15 +632,14 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) clear_tail_16(vd, desc); } -void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, - uint32_t opcode) +static inline void QEMU_ALWAYS_INLINE +crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, + uint32_t desc, uint32_t opcode) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t imm2 = simd_data(desc); uint32_t t; assert(imm2 < 4); @@ -655,7 +654,7 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, /* SM3TT2B */ t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); } else { - g_assert_not_reached(); + qemu_build_not_reached(); } t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); @@ -680,8 +679,21 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); } +#define DO_SM3TT(NAME, OPCODE) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } + +DO_SM3TT(crypto_sm3tt1a, 0) +DO_SM3TT(crypto_sm3tt1b, 1) +DO_SM3TT(crypto_sm3tt2a, 2) +DO_SM3TT(crypto_sm3tt2b, 3) + +#undef DO_SM3TT + static uint8_t const sm4_sbox[] = { 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, diff --git a/target/arm/helper.h b/target/arm/helper.h index 13475ecf81..2a20c8174c 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -531,7 +531,10 @@ DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 3f28888cd2..a0e72ad694 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -13866,13 +13866,15 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) { + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, + gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, + }; int opcode = extract32(insn, 10, 2); int imm2 = extract32(insn, 12, 2); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - TCGv_i32 tcg_imm2, tcg_opcode; if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); @@ -13883,20 +13885,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - tcg_imm2 = tcg_const_i32(imm2); - tcg_opcode = tcg_const_i32(opcode); - - gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2, - tcg_opcode); - - tcg_temp_free_ptr(tcg_rd_ptr); - tcg_temp_free_ptr(tcg_rn_ptr); - tcg_temp_free_ptr(tcg_rm_ptr); - tcg_temp_free_i32(tcg_imm2); - tcg_temp_free_i32(tcg_opcode); + gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); } /* C3.6 Data processing - SIMD, inc Crypto From d04bf49c9ee8fa3e8f2961462a9f053c3faa8548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 3 Jun 2020 07:59:15 +0200 Subject: [PATCH 11/29] hw/adc/stm32f2xx_adc: Correct memory region size and access size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADC region size is 256B, split as: - [0x00 - 0x4f] defined - [0x50 - 0xff] reserved All registers are 32-bit (thus when the datasheet mentions the last defined register is 0x4c, it means its address range is 0x4c .. 0x4f. This model implementation is also 32-bit. Set MemoryRegionOps 'impl' fields. See: 'RM0033 Reference manual Rev 8', Table 10.13.18 "ADC register map". Reported-by: Seth Kintigh Reviewed-by: Alistair Francis Signed-off-by: Philippe Mathieu-Daudé Message-id: 20200603055915.17678-1-f4bug@amsat.org Signed-off-by: Peter Maydell --- hw/adc/stm32f2xx_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/adc/stm32f2xx_adc.c b/hw/adc/stm32f2xx_adc.c index 4f9d485ecf..01a0b14e69 100644 --- a/hw/adc/stm32f2xx_adc.c +++ b/hw/adc/stm32f2xx_adc.c @@ -246,6 +246,8 @@ static const MemoryRegionOps stm32f2xx_adc_ops = { .read = stm32f2xx_adc_read, .write = stm32f2xx_adc_write, .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 4, + .impl.max_access_size = 4, }; static const VMStateDescription vmstate_stm32f2xx_adc = { @@ -278,7 +280,7 @@ static void stm32f2xx_adc_init(Object *obj) sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); memory_region_init_io(&s->mmio, obj, &stm32f2xx_adc_ops, s, - TYPE_STM32F2XX_ADC, 0xFF); + TYPE_STM32F2XX_ADC, 0x100); sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); } From 27dfbafaa78c3937122954cb0706e4e203812c56 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 25 May 2020 16:12:37 +0200 Subject: [PATCH 12/29] tests/acceptance: Add a boot test for the xlnx-versal-virt machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described by Edgar here: https://www.mail-archive.com/qemu-devel@nongnu.org/msg605124.html we can use the Ubuntu kernel for testing the xlnx-versal-virt machine. So let's add a boot test for this now. Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth Reviewed-by: Alistair Francis Reviewed-by: Edgar E. Iglesias Message-id: 20200525141237.15243-1-thuth@redhat.com Signed-off-by: Peter Maydell --- tests/acceptance/boot_linux_console.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py index 12725d4529..bbbbd30e48 100644 --- a/tests/acceptance/boot_linux_console.py +++ b/tests/acceptance/boot_linux_console.py @@ -308,6 +308,32 @@ class BootLinuxConsole(LinuxKernelTest): console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) + def test_aarch64_xlnx_versal_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:xlnx-versal-virt + :avocado: tags=device:pl011 + :avocado: tags=device:arm_gicv3 + """ + kernel_url = ('http://ports.ubuntu.com/ubuntu-ports/dists/' + 'bionic-updates/main/installer-arm64/current/images/' + 'netboot/ubuntu-installer/arm64/linux') + kernel_hash = '5bfc54cf7ed8157d93f6e5b0241e727b6dc22c50' + kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) + + initrd_url = ('http://ports.ubuntu.com/ubuntu-ports/dists/' + 'bionic-updates/main/installer-arm64/current/images/' + 'netboot/ubuntu-installer/arm64/initrd.gz') + initrd_hash = 'd385d3e88d53e2004c5d43cbe668b458a094f772' + initrd_path = self.fetch_asset(initrd_url, asset_hash=initrd_hash) + + self.vm.set_console() + self.vm.add_args('-m', '2G', + '-kernel', kernel_path, + '-initrd', initrd_path) + self.vm.launch() + self.wait_for_console_pattern('Checked W+X mappings: passed') + def test_arm_virt(self): """ :avocado: tags=arch:arm From 7a1e049a707149b306b7b65c66d504d251c0a4b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 2 Jun 2020 15:50:50 +0200 Subject: [PATCH 13/29] docs/system: Document Aspeed boards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Cédric Le Goater Reviewed-by: Philippe Mathieu-Daudé Message-id: 20200602135050.593692-1-clg@kaod.org Signed-off-by: Peter Maydell --- docs/system/arm/aspeed.rst | 85 ++++++++++++++++++++++++++++++++++++++ docs/system/target-arm.rst | 1 + 2 files changed, 86 insertions(+) create mode 100644 docs/system/arm/aspeed.rst diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst new file mode 100644 index 0000000000..45f891eb3c --- /dev/null +++ b/docs/system/arm/aspeed.rst @@ -0,0 +1,85 @@ +Aspeed family boards (``*-bmc``, ``ast2500-evb``, ``ast2600-evb``) +================================================================== + +The QEMU Aspeed machines model BMCs of various OpenPOWER systems and +Aspeed evaluation boards. They are based on different releases of the +Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the +AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600 +with dual cores ARM Cortex A7 CPUs (1.2GHz). + +The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C, +etc. + +AST2400 SoC based machines : + +- ``palmetto-bmc`` OpenPOWER Palmetto POWER8 BMC + +AST2500 SoC based machines : + +- ``ast2500-evb`` Aspeed AST2500 Evaluation board +- ``romulus-bmc`` OpenPOWER Romulus POWER9 BMC +- ``witherspoon-bmc`` OpenPOWER Witherspoon POWER9 BMC +- ``sonorapass-bmc`` OCP SonoraPass BMC +- ``swift-bmc`` OpenPOWER Swift BMC POWER9 + +AST2600 SoC based machines : + +- ``ast2600-evb`` Aspeed AST2600 Evaluation board (Cortex A7) +- ``tacoma-bmc`` OpenPOWER Witherspoon POWER9 AST2600 BMC + +Supported devices +----------------- + + * SMP (for the AST2600 Cortex-A7) + * Interrupt Controller (VIC) + * Timer Controller + * RTC Controller + * I2C Controller + * System Control Unit (SCU) + * SRAM mapping + * X-DMA Controller (basic interface) + * Static Memory Controller (SMC or FMC) - Only SPI Flash support + * SPI Memory Controller + * USB 2.0 Controller + * SD/MMC storage controllers + * SDRAM controller (dummy interface for basic settings and training) + * Watchdog Controller + * GPIO Controller (Master only) + * UART + * Ethernet controllers + + +Missing devices +--------------- + + * Coprocessor support + * ADC (out of tree implementation) + * PWM and Fan Controller + * LPC Bus Controller + * Slave GPIO Controller + * Super I/O Controller + * Hash/Crypto Engine + * PCI-Express 1 Controller + * Graphic Display Controller + * PECI Controller + * MCTP Controller + * Mailbox Controller + * Virtual UART + * eSPI Controller + * I3C Controller + +Boot options +------------ + +The Aspeed machines can be started using the -kernel option to load a +Linux kernel or from a firmare image which can be downloaded from the +OpenPOWER jenkins : + + https://openpower.xyz/ + +The image should be attached as an MTD drive. Run : + +.. code-block:: bash + + $ qemu-system-arm -M romulus-bmc -nic user \ + -drive file=flash-romulus,format=raw,if=mtd -nographic diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index dce384cb0e..1bd477a293 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -81,6 +81,7 @@ undocumented; you can get a complete list by running arm/realview arm/versatile arm/vexpress + arm/aspeed arm/musicpal arm/nseries arm/orangepi From 3d46938bbbd7cea47ab9b994c0438aea3d10d98f Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:43 -0700 Subject: [PATCH 14/29] raspi: add BCM2835 SOC MPHI emulation Add BCM2835 SOC MPHI (Message-based Parallel Host Interface) emulation. It is very basic, only providing the FIQ interrupt needed to allow the dwc-otg USB host controller driver in the Raspbian kernel to function. Signed-off-by: Paul Zimmerman Acked-by: Philippe Mathieu-Daude Reviewed-by: Peter Maydell Message-id: 20200520235349.21215-2-pauldzim@gmail.com Signed-off-by: Peter Maydell --- hw/arm/bcm2835_peripherals.c | 17 +++ hw/misc/Makefile.objs | 1 + hw/misc/bcm2835_mphi.c | 191 +++++++++++++++++++++++++++ include/hw/arm/bcm2835_peripherals.h | 2 + include/hw/misc/bcm2835_mphi.h | 44 ++++++ 5 files changed, 255 insertions(+) create mode 100644 hw/misc/bcm2835_mphi.c create mode 100644 include/hw/misc/bcm2835_mphi.h diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index f1bcc14f55..b3e0495040 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -125,6 +125,10 @@ static void bcm2835_peripherals_init(Object *obj) OBJECT(&s->sdhci.sdbus)); object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost", OBJECT(&s->sdhost.sdbus)); + + /* Mphi */ + sysbus_init_child_obj(obj, "mphi", &s->mphi, sizeof(s->mphi), + TYPE_BCM2835_MPHI); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -360,6 +364,19 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus"); + /* Mphi */ + object_property_set_bool(OBJECT(&s->mphi), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, MPHI_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->mphi), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->mphi), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_HOSTPORT)); + create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40); create_unimp(s, &s->cprman, "bcm2835-cprman", CPRMAN_OFFSET, 0x1000); create_unimp(s, &s->a2w, "bcm2835-a2w", A2W_OFFSET, 0x1000); diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index b25181b711..60a9d80b74 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -56,6 +56,7 @@ common-obj-$(CONFIG_OMAP) += omap_l4.o common-obj-$(CONFIG_OMAP) += omap_sdrc.o common-obj-$(CONFIG_OMAP) += omap_tap.o common-obj-$(CONFIG_RASPI) += bcm2835_mbox.o +common-obj-$(CONFIG_RASPI) += bcm2835_mphi.o common-obj-$(CONFIG_RASPI) += bcm2835_property.o common-obj-$(CONFIG_RASPI) += bcm2835_rng.o common-obj-$(CONFIG_RASPI) += bcm2835_thermal.o diff --git a/hw/misc/bcm2835_mphi.c b/hw/misc/bcm2835_mphi.c new file mode 100644 index 0000000000..0428e10ba5 --- /dev/null +++ b/hw/misc/bcm2835_mphi.c @@ -0,0 +1,191 @@ +/* + * BCM2835 SOC MPHI emulation + * + * Very basic emulation, only providing the FIQ interrupt needed to + * allow the dwc-otg USB host controller driver in the Raspbian kernel + * to function. + * + * Copyright (c) 2020 Paul Zimmerman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/misc/bcm2835_mphi.h" +#include "migration/vmstate.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" + +static inline void mphi_raise_irq(BCM2835MphiState *s) +{ + qemu_set_irq(s->irq, 1); +} + +static inline void mphi_lower_irq(BCM2835MphiState *s) +{ + qemu_set_irq(s->irq, 0); +} + +static uint64_t mphi_reg_read(void *ptr, hwaddr addr, unsigned size) +{ + BCM2835MphiState *s = ptr; + uint32_t val = 0; + + switch (addr) { + case 0x28: /* outdda */ + val = s->outdda; + break; + case 0x2c: /* outddb */ + val = s->outddb; + break; + case 0x4c: /* ctrl */ + val = s->ctrl; + val |= 1 << 17; + break; + case 0x50: /* intstat */ + val = s->intstat; + break; + case 0x1f0: /* swirq_set */ + val = s->swirq; + break; + case 0x1f4: /* swirq_clr */ + val = s->swirq; + break; + default: + qemu_log_mask(LOG_UNIMP, "read from unknown register"); + break; + } + + return val; +} + +static void mphi_reg_write(void *ptr, hwaddr addr, uint64_t val, unsigned size) +{ + BCM2835MphiState *s = ptr; + int do_irq = 0; + + switch (addr) { + case 0x28: /* outdda */ + s->outdda = val; + break; + case 0x2c: /* outddb */ + s->outddb = val; + if (val & (1 << 29)) { + do_irq = 1; + } + break; + case 0x4c: /* ctrl */ + s->ctrl = val; + if (val & (1 << 16)) { + do_irq = -1; + } + break; + case 0x50: /* intstat */ + s->intstat = val; + if (val & ((1 << 16) | (1 << 29))) { + do_irq = -1; + } + break; + case 0x1f0: /* swirq_set */ + s->swirq |= val; + do_irq = 1; + break; + case 0x1f4: /* swirq_clr */ + s->swirq &= ~val; + do_irq = -1; + break; + default: + qemu_log_mask(LOG_UNIMP, "write to unknown register"); + return; + } + + if (do_irq > 0) { + mphi_raise_irq(s); + } else if (do_irq < 0) { + mphi_lower_irq(s); + } +} + +static const MemoryRegionOps mphi_mmio_ops = { + .read = mphi_reg_read, + .write = mphi_reg_write, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void mphi_reset(DeviceState *dev) +{ + BCM2835MphiState *s = BCM2835_MPHI(dev); + + s->outdda = 0; + s->outddb = 0; + s->ctrl = 0; + s->intstat = 0; + s->swirq = 0; +} + +static void mphi_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + BCM2835MphiState *s = BCM2835_MPHI(dev); + + sysbus_init_irq(sbd, &s->irq); +} + +static void mphi_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + BCM2835MphiState *s = BCM2835_MPHI(obj); + + memory_region_init_io(&s->iomem, obj, &mphi_mmio_ops, s, "mphi", MPHI_MMIO_SIZE); + sysbus_init_mmio(sbd, &s->iomem); +} + +const VMStateDescription vmstate_mphi_state = { + .name = "mphi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(outdda, BCM2835MphiState), + VMSTATE_UINT32(outddb, BCM2835MphiState), + VMSTATE_UINT32(ctrl, BCM2835MphiState), + VMSTATE_UINT32(intstat, BCM2835MphiState), + VMSTATE_UINT32(swirq, BCM2835MphiState), + VMSTATE_END_OF_LIST() + } +}; + +static void mphi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = mphi_realize; + dc->reset = mphi_reset; + dc->vmsd = &vmstate_mphi_state; +} + +static const TypeInfo bcm2835_mphi_type_info = { + .name = TYPE_BCM2835_MPHI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835MphiState), + .instance_init = mphi_init, + .class_init = mphi_class_init, +}; + +static void bcm2835_mphi_register_types(void) +{ + type_register_static(&bcm2835_mphi_type_info); +} + +type_init(bcm2835_mphi_register_types) diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 2e8655a7c2..7a7a8f6141 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -21,6 +21,7 @@ #include "hw/misc/bcm2835_property.h" #include "hw/misc/bcm2835_rng.h" #include "hw/misc/bcm2835_mbox.h" +#include "hw/misc/bcm2835_mphi.h" #include "hw/misc/bcm2835_thermal.h" #include "hw/sd/sdhci.h" #include "hw/sd/bcm2835_sdhost.h" @@ -42,6 +43,7 @@ typedef struct BCM2835PeripheralState { qemu_irq irq, fiq; BCM2835SystemTimerState systmr; + BCM2835MphiState mphi; UnimplementedDeviceState armtmr; UnimplementedDeviceState cprman; UnimplementedDeviceState a2w; diff --git a/include/hw/misc/bcm2835_mphi.h b/include/hw/misc/bcm2835_mphi.h new file mode 100644 index 0000000000..e084314d0f --- /dev/null +++ b/include/hw/misc/bcm2835_mphi.h @@ -0,0 +1,44 @@ +/* + * BCM2835 SOC MPHI state definitions + * + * Copyright (c) 2020 Paul Zimmerman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef HW_MISC_BCM2835_MPHI_H +#define HW_MISC_BCM2835_MPHI_H + +#include "hw/irq.h" +#include "hw/sysbus.h" + +#define MPHI_MMIO_SIZE 0x1000 + +typedef struct BCM2835MphiState BCM2835MphiState; + +struct BCM2835MphiState { + SysBusDevice parent_obj; + qemu_irq irq; + MemoryRegion iomem; + + uint32_t outdda; + uint32_t outddb; + uint32_t ctrl; + uint32_t intstat; + uint32_t swirq; +}; + +#define TYPE_BCM2835_MPHI "bcm2835-mphi" + +#define BCM2835_MPHI(obj) \ + OBJECT_CHECK(BCM2835MphiState, (obj), TYPE_BCM2835_MPHI) + +#endif From 3f5b312a3f9faf2e20a700be70d921e26220a0fe Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:44 -0700 Subject: [PATCH 15/29] dwc-hsotg (dwc2) USB host controller register definitions Import the dwc-hsotg (dwc2) register definitions file from the Linux kernel. This is a copy of drivers/usb/dwc2/hw.h from the mainline Linux kernel, the only changes being to the header, and two instances of 'u32' changed to 'uint32_t' to allow it to compile. Checkpatch throws a boatload of errors due to the tab indentation, but I would rather import it as-is than reformat it. Signed-off-by: Paul Zimmerman Message-id: 20200520235349.21215-3-pauldzim@gmail.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- include/hw/usb/dwc2-regs.h | 899 +++++++++++++++++++++++++++++++++++++ 1 file changed, 899 insertions(+) create mode 100644 include/hw/usb/dwc2-regs.h diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h new file mode 100644 index 0000000000..40af23a0ba --- /dev/null +++ b/include/hw/usb/dwc2-regs.h @@ -0,0 +1,899 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Imported from the Linux kernel file drivers/usb/dwc2/hw.h, commit + * a89bae709b3492b478480a2c9734e7e9393b279c ("usb: dwc2: Move + * UTMI_PHY_DATA defines closer") + * + * hw.h - DesignWare HS OTG Controller hardware definitions + * + * Copyright 2004-2013 Synopsys, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the above-listed copyright holders may not be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DWC2_HW_H__ +#define __DWC2_HW_H__ + +#define HSOTG_REG(x) (x) + +#define GOTGCTL HSOTG_REG(0x000) +#define GOTGCTL_CHIRPEN BIT(27) +#define GOTGCTL_MULT_VALID_BC_MASK (0x1f << 22) +#define GOTGCTL_MULT_VALID_BC_SHIFT 22 +#define GOTGCTL_OTGVER BIT(20) +#define GOTGCTL_BSESVLD BIT(19) +#define GOTGCTL_ASESVLD BIT(18) +#define GOTGCTL_DBNC_SHORT BIT(17) +#define GOTGCTL_CONID_B BIT(16) +#define GOTGCTL_DBNCE_FLTR_BYPASS BIT(15) +#define GOTGCTL_DEVHNPEN BIT(11) +#define GOTGCTL_HSTSETHNPEN BIT(10) +#define GOTGCTL_HNPREQ BIT(9) +#define GOTGCTL_HSTNEGSCS BIT(8) +#define GOTGCTL_SESREQ BIT(1) +#define GOTGCTL_SESREQSCS BIT(0) + +#define GOTGINT HSOTG_REG(0x004) +#define GOTGINT_DBNCE_DONE BIT(19) +#define GOTGINT_A_DEV_TOUT_CHG BIT(18) +#define GOTGINT_HST_NEG_DET BIT(17) +#define GOTGINT_HST_NEG_SUC_STS_CHNG BIT(9) +#define GOTGINT_SES_REQ_SUC_STS_CHNG BIT(8) +#define GOTGINT_SES_END_DET BIT(2) + +#define GAHBCFG HSOTG_REG(0x008) +#define GAHBCFG_AHB_SINGLE BIT(23) +#define GAHBCFG_NOTI_ALL_DMA_WRIT BIT(22) +#define GAHBCFG_REM_MEM_SUPP BIT(21) +#define GAHBCFG_P_TXF_EMP_LVL BIT(8) +#define GAHBCFG_NP_TXF_EMP_LVL BIT(7) +#define GAHBCFG_DMA_EN BIT(5) +#define GAHBCFG_HBSTLEN_MASK (0xf << 1) +#define GAHBCFG_HBSTLEN_SHIFT 1 +#define GAHBCFG_HBSTLEN_SINGLE 0 +#define GAHBCFG_HBSTLEN_INCR 1 +#define GAHBCFG_HBSTLEN_INCR4 3 +#define GAHBCFG_HBSTLEN_INCR8 5 +#define GAHBCFG_HBSTLEN_INCR16 7 +#define GAHBCFG_GLBL_INTR_EN BIT(0) +#define GAHBCFG_CTRL_MASK (GAHBCFG_P_TXF_EMP_LVL | \ + GAHBCFG_NP_TXF_EMP_LVL | \ + GAHBCFG_DMA_EN | \ + GAHBCFG_GLBL_INTR_EN) + +#define GUSBCFG HSOTG_REG(0x00C) +#define GUSBCFG_FORCEDEVMODE BIT(30) +#define GUSBCFG_FORCEHOSTMODE BIT(29) +#define GUSBCFG_TXENDDELAY BIT(28) +#define GUSBCFG_ICTRAFFICPULLREMOVE BIT(27) +#define GUSBCFG_ICUSBCAP BIT(26) +#define GUSBCFG_ULPI_INT_PROT_DIS BIT(25) +#define GUSBCFG_INDICATORPASSTHROUGH BIT(24) +#define GUSBCFG_INDICATORCOMPLEMENT BIT(23) +#define GUSBCFG_TERMSELDLPULSE BIT(22) +#define GUSBCFG_ULPI_INT_VBUS_IND BIT(21) +#define GUSBCFG_ULPI_EXT_VBUS_DRV BIT(20) +#define GUSBCFG_ULPI_CLK_SUSP_M BIT(19) +#define GUSBCFG_ULPI_AUTO_RES BIT(18) +#define GUSBCFG_ULPI_FS_LS BIT(17) +#define GUSBCFG_OTG_UTMI_FS_SEL BIT(16) +#define GUSBCFG_PHY_LP_CLK_SEL BIT(15) +#define GUSBCFG_USBTRDTIM_MASK (0xf << 10) +#define GUSBCFG_USBTRDTIM_SHIFT 10 +#define GUSBCFG_HNPCAP BIT(9) +#define GUSBCFG_SRPCAP BIT(8) +#define GUSBCFG_DDRSEL BIT(7) +#define GUSBCFG_PHYSEL BIT(6) +#define GUSBCFG_FSINTF BIT(5) +#define GUSBCFG_ULPI_UTMI_SEL BIT(4) +#define GUSBCFG_PHYIF16 BIT(3) +#define GUSBCFG_PHYIF8 (0 << 3) +#define GUSBCFG_TOUTCAL_MASK (0x7 << 0) +#define GUSBCFG_TOUTCAL_SHIFT 0 +#define GUSBCFG_TOUTCAL_LIMIT 0x7 +#define GUSBCFG_TOUTCAL(_x) ((_x) << 0) + +#define GRSTCTL HSOTG_REG(0x010) +#define GRSTCTL_AHBIDLE BIT(31) +#define GRSTCTL_DMAREQ BIT(30) +#define GRSTCTL_TXFNUM_MASK (0x1f << 6) +#define GRSTCTL_TXFNUM_SHIFT 6 +#define GRSTCTL_TXFNUM_LIMIT 0x1f +#define GRSTCTL_TXFNUM(_x) ((_x) << 6) +#define GRSTCTL_TXFFLSH BIT(5) +#define GRSTCTL_RXFFLSH BIT(4) +#define GRSTCTL_IN_TKNQ_FLSH BIT(3) +#define GRSTCTL_FRMCNTRRST BIT(2) +#define GRSTCTL_HSFTRST BIT(1) +#define GRSTCTL_CSFTRST BIT(0) + +#define GINTSTS HSOTG_REG(0x014) +#define GINTMSK HSOTG_REG(0x018) +#define GINTSTS_WKUPINT BIT(31) +#define GINTSTS_SESSREQINT BIT(30) +#define GINTSTS_DISCONNINT BIT(29) +#define GINTSTS_CONIDSTSCHNG BIT(28) +#define GINTSTS_LPMTRANRCVD BIT(27) +#define GINTSTS_PTXFEMP BIT(26) +#define GINTSTS_HCHINT BIT(25) +#define GINTSTS_PRTINT BIT(24) +#define GINTSTS_RESETDET BIT(23) +#define GINTSTS_FET_SUSP BIT(22) +#define GINTSTS_INCOMPL_IP BIT(21) +#define GINTSTS_INCOMPL_SOOUT BIT(21) +#define GINTSTS_INCOMPL_SOIN BIT(20) +#define GINTSTS_OEPINT BIT(19) +#define GINTSTS_IEPINT BIT(18) +#define GINTSTS_EPMIS BIT(17) +#define GINTSTS_RESTOREDONE BIT(16) +#define GINTSTS_EOPF BIT(15) +#define GINTSTS_ISOUTDROP BIT(14) +#define GINTSTS_ENUMDONE BIT(13) +#define GINTSTS_USBRST BIT(12) +#define GINTSTS_USBSUSP BIT(11) +#define GINTSTS_ERLYSUSP BIT(10) +#define GINTSTS_I2CINT BIT(9) +#define GINTSTS_ULPI_CK_INT BIT(8) +#define GINTSTS_GOUTNAKEFF BIT(7) +#define GINTSTS_GINNAKEFF BIT(6) +#define GINTSTS_NPTXFEMP BIT(5) +#define GINTSTS_RXFLVL BIT(4) +#define GINTSTS_SOF BIT(3) +#define GINTSTS_OTGINT BIT(2) +#define GINTSTS_MODEMIS BIT(1) +#define GINTSTS_CURMODE_HOST BIT(0) + +#define GRXSTSR HSOTG_REG(0x01C) +#define GRXSTSP HSOTG_REG(0x020) +#define GRXSTS_FN_MASK (0x7f << 25) +#define GRXSTS_FN_SHIFT 25 +#define GRXSTS_PKTSTS_MASK (0xf << 17) +#define GRXSTS_PKTSTS_SHIFT 17 +#define GRXSTS_PKTSTS_GLOBALOUTNAK 1 +#define GRXSTS_PKTSTS_OUTRX 2 +#define GRXSTS_PKTSTS_HCHIN 2 +#define GRXSTS_PKTSTS_OUTDONE 3 +#define GRXSTS_PKTSTS_HCHIN_XFER_COMP 3 +#define GRXSTS_PKTSTS_SETUPDONE 4 +#define GRXSTS_PKTSTS_DATATOGGLEERR 5 +#define GRXSTS_PKTSTS_SETUPRX 6 +#define GRXSTS_PKTSTS_HCHHALTED 7 +#define GRXSTS_HCHNUM_MASK (0xf << 0) +#define GRXSTS_HCHNUM_SHIFT 0 +#define GRXSTS_DPID_MASK (0x3 << 15) +#define GRXSTS_DPID_SHIFT 15 +#define GRXSTS_BYTECNT_MASK (0x7ff << 4) +#define GRXSTS_BYTECNT_SHIFT 4 +#define GRXSTS_EPNUM_MASK (0xf << 0) +#define GRXSTS_EPNUM_SHIFT 0 + +#define GRXFSIZ HSOTG_REG(0x024) +#define GRXFSIZ_DEPTH_MASK (0xffff << 0) +#define GRXFSIZ_DEPTH_SHIFT 0 + +#define GNPTXFSIZ HSOTG_REG(0x028) +/* Use FIFOSIZE_* constants to access this register */ + +#define GNPTXSTS HSOTG_REG(0x02C) +#define GNPTXSTS_NP_TXQ_TOP_MASK (0x7f << 24) +#define GNPTXSTS_NP_TXQ_TOP_SHIFT 24 +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_MASK (0xff << 16) +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_SHIFT 16 +#define GNPTXSTS_NP_TXQ_SPC_AVAIL_GET(_v) (((_v) >> 16) & 0xff) +#define GNPTXSTS_NP_TXF_SPC_AVAIL_MASK (0xffff << 0) +#define GNPTXSTS_NP_TXF_SPC_AVAIL_SHIFT 0 +#define GNPTXSTS_NP_TXF_SPC_AVAIL_GET(_v) (((_v) >> 0) & 0xffff) + +#define GI2CCTL HSOTG_REG(0x0030) +#define GI2CCTL_BSYDNE BIT(31) +#define GI2CCTL_RW BIT(30) +#define GI2CCTL_I2CDATSE0 BIT(28) +#define GI2CCTL_I2CDEVADDR_MASK (0x3 << 26) +#define GI2CCTL_I2CDEVADDR_SHIFT 26 +#define GI2CCTL_I2CSUSPCTL BIT(25) +#define GI2CCTL_ACK BIT(24) +#define GI2CCTL_I2CEN BIT(23) +#define GI2CCTL_ADDR_MASK (0x7f << 16) +#define GI2CCTL_ADDR_SHIFT 16 +#define GI2CCTL_REGADDR_MASK (0xff << 8) +#define GI2CCTL_REGADDR_SHIFT 8 +#define GI2CCTL_RWDATA_MASK (0xff << 0) +#define GI2CCTL_RWDATA_SHIFT 0 + +#define GPVNDCTL HSOTG_REG(0x0034) +#define GGPIO HSOTG_REG(0x0038) +#define GGPIO_STM32_OTG_GCCFG_PWRDWN BIT(16) + +#define GUID HSOTG_REG(0x003c) +#define GSNPSID HSOTG_REG(0x0040) +#define GHWCFG1 HSOTG_REG(0x0044) +#define GSNPSID_ID_MASK GENMASK(31, 16) + +#define GHWCFG2 HSOTG_REG(0x0048) +#define GHWCFG2_OTG_ENABLE_IC_USB BIT(31) +#define GHWCFG2_DEV_TOKEN_Q_DEPTH_MASK (0x1f << 26) +#define GHWCFG2_DEV_TOKEN_Q_DEPTH_SHIFT 26 +#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_MASK (0x3 << 24) +#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_SHIFT 24 +#define GHWCFG2_NONPERIO_TX_Q_DEPTH_MASK (0x3 << 22) +#define GHWCFG2_NONPERIO_TX_Q_DEPTH_SHIFT 22 +#define GHWCFG2_MULTI_PROC_INT BIT(20) +#define GHWCFG2_DYNAMIC_FIFO BIT(19) +#define GHWCFG2_PERIO_EP_SUPPORTED BIT(18) +#define GHWCFG2_NUM_HOST_CHAN_MASK (0xf << 14) +#define GHWCFG2_NUM_HOST_CHAN_SHIFT 14 +#define GHWCFG2_NUM_DEV_EP_MASK (0xf << 10) +#define GHWCFG2_NUM_DEV_EP_SHIFT 10 +#define GHWCFG2_FS_PHY_TYPE_MASK (0x3 << 8) +#define GHWCFG2_FS_PHY_TYPE_SHIFT 8 +#define GHWCFG2_FS_PHY_TYPE_NOT_SUPPORTED 0 +#define GHWCFG2_FS_PHY_TYPE_DEDICATED 1 +#define GHWCFG2_FS_PHY_TYPE_SHARED_UTMI 2 +#define GHWCFG2_FS_PHY_TYPE_SHARED_ULPI 3 +#define GHWCFG2_HS_PHY_TYPE_MASK (0x3 << 6) +#define GHWCFG2_HS_PHY_TYPE_SHIFT 6 +#define GHWCFG2_HS_PHY_TYPE_NOT_SUPPORTED 0 +#define GHWCFG2_HS_PHY_TYPE_UTMI 1 +#define GHWCFG2_HS_PHY_TYPE_ULPI 2 +#define GHWCFG2_HS_PHY_TYPE_UTMI_ULPI 3 +#define GHWCFG2_POINT2POINT BIT(5) +#define GHWCFG2_ARCHITECTURE_MASK (0x3 << 3) +#define GHWCFG2_ARCHITECTURE_SHIFT 3 +#define GHWCFG2_SLAVE_ONLY_ARCH 0 +#define GHWCFG2_EXT_DMA_ARCH 1 +#define GHWCFG2_INT_DMA_ARCH 2 +#define GHWCFG2_OP_MODE_MASK (0x7 << 0) +#define GHWCFG2_OP_MODE_SHIFT 0 +#define GHWCFG2_OP_MODE_HNP_SRP_CAPABLE 0 +#define GHWCFG2_OP_MODE_SRP_ONLY_CAPABLE 1 +#define GHWCFG2_OP_MODE_NO_HNP_SRP_CAPABLE 2 +#define GHWCFG2_OP_MODE_SRP_CAPABLE_DEVICE 3 +#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_DEVICE 4 +#define GHWCFG2_OP_MODE_SRP_CAPABLE_HOST 5 +#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_HOST 6 +#define GHWCFG2_OP_MODE_UNDEFINED 7 + +#define GHWCFG3 HSOTG_REG(0x004c) +#define GHWCFG3_DFIFO_DEPTH_MASK (0xffff << 16) +#define GHWCFG3_DFIFO_DEPTH_SHIFT 16 +#define GHWCFG3_OTG_LPM_EN BIT(15) +#define GHWCFG3_BC_SUPPORT BIT(14) +#define GHWCFG3_OTG_ENABLE_HSIC BIT(13) +#define GHWCFG3_ADP_SUPP BIT(12) +#define GHWCFG3_SYNCH_RESET_TYPE BIT(11) +#define GHWCFG3_OPTIONAL_FEATURES BIT(10) +#define GHWCFG3_VENDOR_CTRL_IF BIT(9) +#define GHWCFG3_I2C BIT(8) +#define GHWCFG3_OTG_FUNC BIT(7) +#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_MASK (0x7 << 4) +#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT 4 +#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_MASK (0xf << 0) +#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT 0 + +#define GHWCFG4 HSOTG_REG(0x0050) +#define GHWCFG4_DESC_DMA_DYN BIT(31) +#define GHWCFG4_DESC_DMA BIT(30) +#define GHWCFG4_NUM_IN_EPS_MASK (0xf << 26) +#define GHWCFG4_NUM_IN_EPS_SHIFT 26 +#define GHWCFG4_DED_FIFO_EN BIT(25) +#define GHWCFG4_DED_FIFO_SHIFT 25 +#define GHWCFG4_SESSION_END_FILT_EN BIT(24) +#define GHWCFG4_B_VALID_FILT_EN BIT(23) +#define GHWCFG4_A_VALID_FILT_EN BIT(22) +#define GHWCFG4_VBUS_VALID_FILT_EN BIT(21) +#define GHWCFG4_IDDIG_FILT_EN BIT(20) +#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_MASK (0xf << 16) +#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_SHIFT 16 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK (0x3 << 14) +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT 14 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8 0 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_16 1 +#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8_OR_16 2 +#define GHWCFG4_ACG_SUPPORTED BIT(12) +#define GHWCFG4_IPG_ISOC_SUPPORTED BIT(11) +#define GHWCFG4_SERVICE_INTERVAL_SUPPORTED BIT(10) +#define GHWCFG4_XHIBER BIT(7) +#define GHWCFG4_HIBER BIT(6) +#define GHWCFG4_MIN_AHB_FREQ BIT(5) +#define GHWCFG4_POWER_OPTIMIZ BIT(4) +#define GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK (0xf << 0) +#define GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT 0 + +#define GLPMCFG HSOTG_REG(0x0054) +#define GLPMCFG_INVSELHSIC BIT(31) +#define GLPMCFG_HSICCON BIT(30) +#define GLPMCFG_RSTRSLPSTS BIT(29) +#define GLPMCFG_ENBESL BIT(28) +#define GLPMCFG_LPM_RETRYCNT_STS_MASK (0x7 << 25) +#define GLPMCFG_LPM_RETRYCNT_STS_SHIFT 25 +#define GLPMCFG_SNDLPM BIT(24) +#define GLPMCFG_RETRY_CNT_MASK (0x7 << 21) +#define GLPMCFG_RETRY_CNT_SHIFT 21 +#define GLPMCFG_LPM_REJECT_CTRL_CONTROL BIT(21) +#define GLPMCFG_LPM_ACCEPT_CTRL_ISOC BIT(22) +#define GLPMCFG_LPM_CHNL_INDX_MASK (0xf << 17) +#define GLPMCFG_LPM_CHNL_INDX_SHIFT 17 +#define GLPMCFG_L1RESUMEOK BIT(16) +#define GLPMCFG_SLPSTS BIT(15) +#define GLPMCFG_COREL1RES_MASK (0x3 << 13) +#define GLPMCFG_COREL1RES_SHIFT 13 +#define GLPMCFG_HIRD_THRES_MASK (0x1f << 8) +#define GLPMCFG_HIRD_THRES_SHIFT 8 +#define GLPMCFG_HIRD_THRES_EN (0x10 << 8) +#define GLPMCFG_ENBLSLPM BIT(7) +#define GLPMCFG_BREMOTEWAKE BIT(6) +#define GLPMCFG_HIRD_MASK (0xf << 2) +#define GLPMCFG_HIRD_SHIFT 2 +#define GLPMCFG_APPL1RES BIT(1) +#define GLPMCFG_LPMCAP BIT(0) + +#define GPWRDN HSOTG_REG(0x0058) +#define GPWRDN_MULT_VAL_ID_BC_MASK (0x1f << 24) +#define GPWRDN_MULT_VAL_ID_BC_SHIFT 24 +#define GPWRDN_ADP_INT BIT(23) +#define GPWRDN_BSESSVLD BIT(22) +#define GPWRDN_IDSTS BIT(21) +#define GPWRDN_LINESTATE_MASK (0x3 << 19) +#define GPWRDN_LINESTATE_SHIFT 19 +#define GPWRDN_STS_CHGINT_MSK BIT(18) +#define GPWRDN_STS_CHGINT BIT(17) +#define GPWRDN_SRP_DET_MSK BIT(16) +#define GPWRDN_SRP_DET BIT(15) +#define GPWRDN_CONNECT_DET_MSK BIT(14) +#define GPWRDN_CONNECT_DET BIT(13) +#define GPWRDN_DISCONN_DET_MSK BIT(12) +#define GPWRDN_DISCONN_DET BIT(11) +#define GPWRDN_RST_DET_MSK BIT(10) +#define GPWRDN_RST_DET BIT(9) +#define GPWRDN_LNSTSCHG_MSK BIT(8) +#define GPWRDN_LNSTSCHG BIT(7) +#define GPWRDN_DIS_VBUS BIT(6) +#define GPWRDN_PWRDNSWTCH BIT(5) +#define GPWRDN_PWRDNRSTN BIT(4) +#define GPWRDN_PWRDNCLMP BIT(3) +#define GPWRDN_RESTORE BIT(2) +#define GPWRDN_PMUACTV BIT(1) +#define GPWRDN_PMUINTSEL BIT(0) + +#define GDFIFOCFG HSOTG_REG(0x005c) +#define GDFIFOCFG_EPINFOBASE_MASK (0xffff << 16) +#define GDFIFOCFG_EPINFOBASE_SHIFT 16 +#define GDFIFOCFG_GDFIFOCFG_MASK (0xffff << 0) +#define GDFIFOCFG_GDFIFOCFG_SHIFT 0 + +#define ADPCTL HSOTG_REG(0x0060) +#define ADPCTL_AR_MASK (0x3 << 27) +#define ADPCTL_AR_SHIFT 27 +#define ADPCTL_ADP_TMOUT_INT_MSK BIT(26) +#define ADPCTL_ADP_SNS_INT_MSK BIT(25) +#define ADPCTL_ADP_PRB_INT_MSK BIT(24) +#define ADPCTL_ADP_TMOUT_INT BIT(23) +#define ADPCTL_ADP_SNS_INT BIT(22) +#define ADPCTL_ADP_PRB_INT BIT(21) +#define ADPCTL_ADPENA BIT(20) +#define ADPCTL_ADPRES BIT(19) +#define ADPCTL_ENASNS BIT(18) +#define ADPCTL_ENAPRB BIT(17) +#define ADPCTL_RTIM_MASK (0x7ff << 6) +#define ADPCTL_RTIM_SHIFT 6 +#define ADPCTL_PRB_PER_MASK (0x3 << 4) +#define ADPCTL_PRB_PER_SHIFT 4 +#define ADPCTL_PRB_DELTA_MASK (0x3 << 2) +#define ADPCTL_PRB_DELTA_SHIFT 2 +#define ADPCTL_PRB_DSCHRG_MASK (0x3 << 0) +#define ADPCTL_PRB_DSCHRG_SHIFT 0 + +#define GREFCLK HSOTG_REG(0x0064) +#define GREFCLK_REFCLKPER_MASK (0x1ffff << 15) +#define GREFCLK_REFCLKPER_SHIFT 15 +#define GREFCLK_REF_CLK_MODE BIT(14) +#define GREFCLK_SOF_CNT_WKUP_ALERT_MASK (0x3ff) +#define GREFCLK_SOF_CNT_WKUP_ALERT_SHIFT 0 + +#define GINTMSK2 HSOTG_REG(0x0068) +#define GINTMSK2_WKUP_ALERT_INT_MSK BIT(0) + +#define GINTSTS2 HSOTG_REG(0x006c) +#define GINTSTS2_WKUP_ALERT_INT BIT(0) + +#define HPTXFSIZ HSOTG_REG(0x100) +/* Use FIFOSIZE_* constants to access this register */ + +#define DPTXFSIZN(_a) HSOTG_REG(0x104 + (((_a) - 1) * 4)) +/* Use FIFOSIZE_* constants to access this register */ + +/* These apply to the GNPTXFSIZ, HPTXFSIZ and DPTXFSIZN registers */ +#define FIFOSIZE_DEPTH_MASK (0xffff << 16) +#define FIFOSIZE_DEPTH_SHIFT 16 +#define FIFOSIZE_STARTADDR_MASK (0xffff << 0) +#define FIFOSIZE_STARTADDR_SHIFT 0 +#define FIFOSIZE_DEPTH_GET(_x) (((_x) >> 16) & 0xffff) + +/* Device mode registers */ + +#define DCFG HSOTG_REG(0x800) +#define DCFG_DESCDMA_EN BIT(23) +#define DCFG_EPMISCNT_MASK (0x1f << 18) +#define DCFG_EPMISCNT_SHIFT 18 +#define DCFG_EPMISCNT_LIMIT 0x1f +#define DCFG_EPMISCNT(_x) ((_x) << 18) +#define DCFG_IPG_ISOC_SUPPORDED BIT(17) +#define DCFG_PERFRINT_MASK (0x3 << 11) +#define DCFG_PERFRINT_SHIFT 11 +#define DCFG_PERFRINT_LIMIT 0x3 +#define DCFG_PERFRINT(_x) ((_x) << 11) +#define DCFG_DEVADDR_MASK (0x7f << 4) +#define DCFG_DEVADDR_SHIFT 4 +#define DCFG_DEVADDR_LIMIT 0x7f +#define DCFG_DEVADDR(_x) ((_x) << 4) +#define DCFG_NZ_STS_OUT_HSHK BIT(2) +#define DCFG_DEVSPD_MASK (0x3 << 0) +#define DCFG_DEVSPD_SHIFT 0 +#define DCFG_DEVSPD_HS 0 +#define DCFG_DEVSPD_FS 1 +#define DCFG_DEVSPD_LS 2 +#define DCFG_DEVSPD_FS48 3 + +#define DCTL HSOTG_REG(0x804) +#define DCTL_SERVICE_INTERVAL_SUPPORTED BIT(19) +#define DCTL_PWRONPRGDONE BIT(11) +#define DCTL_CGOUTNAK BIT(10) +#define DCTL_SGOUTNAK BIT(9) +#define DCTL_CGNPINNAK BIT(8) +#define DCTL_SGNPINNAK BIT(7) +#define DCTL_TSTCTL_MASK (0x7 << 4) +#define DCTL_TSTCTL_SHIFT 4 +#define DCTL_GOUTNAKSTS BIT(3) +#define DCTL_GNPINNAKSTS BIT(2) +#define DCTL_SFTDISCON BIT(1) +#define DCTL_RMTWKUPSIG BIT(0) + +#define DSTS HSOTG_REG(0x808) +#define DSTS_SOFFN_MASK (0x3fff << 8) +#define DSTS_SOFFN_SHIFT 8 +#define DSTS_SOFFN_LIMIT 0x3fff +#define DSTS_SOFFN(_x) ((_x) << 8) +#define DSTS_ERRATICERR BIT(3) +#define DSTS_ENUMSPD_MASK (0x3 << 1) +#define DSTS_ENUMSPD_SHIFT 1 +#define DSTS_ENUMSPD_HS 0 +#define DSTS_ENUMSPD_FS 1 +#define DSTS_ENUMSPD_LS 2 +#define DSTS_ENUMSPD_FS48 3 +#define DSTS_SUSPSTS BIT(0) + +#define DIEPMSK HSOTG_REG(0x810) +#define DIEPMSK_NAKMSK BIT(13) +#define DIEPMSK_BNAININTRMSK BIT(9) +#define DIEPMSK_TXFIFOUNDRNMSK BIT(8) +#define DIEPMSK_TXFIFOEMPTY BIT(7) +#define DIEPMSK_INEPNAKEFFMSK BIT(6) +#define DIEPMSK_INTKNEPMISMSK BIT(5) +#define DIEPMSK_INTKNTXFEMPMSK BIT(4) +#define DIEPMSK_TIMEOUTMSK BIT(3) +#define DIEPMSK_AHBERRMSK BIT(2) +#define DIEPMSK_EPDISBLDMSK BIT(1) +#define DIEPMSK_XFERCOMPLMSK BIT(0) + +#define DOEPMSK HSOTG_REG(0x814) +#define DOEPMSK_BNAMSK BIT(9) +#define DOEPMSK_BACK2BACKSETUP BIT(6) +#define DOEPMSK_STSPHSERCVDMSK BIT(5) +#define DOEPMSK_OUTTKNEPDISMSK BIT(4) +#define DOEPMSK_SETUPMSK BIT(3) +#define DOEPMSK_AHBERRMSK BIT(2) +#define DOEPMSK_EPDISBLDMSK BIT(1) +#define DOEPMSK_XFERCOMPLMSK BIT(0) + +#define DAINT HSOTG_REG(0x818) +#define DAINTMSK HSOTG_REG(0x81C) +#define DAINT_OUTEP_SHIFT 16 +#define DAINT_OUTEP(_x) (1 << ((_x) + 16)) +#define DAINT_INEP(_x) (1 << (_x)) + +#define DTKNQR1 HSOTG_REG(0x820) +#define DTKNQR2 HSOTG_REG(0x824) +#define DTKNQR3 HSOTG_REG(0x830) +#define DTKNQR4 HSOTG_REG(0x834) +#define DIEPEMPMSK HSOTG_REG(0x834) + +#define DVBUSDIS HSOTG_REG(0x828) +#define DVBUSPULSE HSOTG_REG(0x82C) + +#define DIEPCTL0 HSOTG_REG(0x900) +#define DIEPCTL(_a) HSOTG_REG(0x900 + ((_a) * 0x20)) + +#define DOEPCTL0 HSOTG_REG(0xB00) +#define DOEPCTL(_a) HSOTG_REG(0xB00 + ((_a) * 0x20)) + +/* EP0 specialness: + * bits[29..28] - reserved (no SetD0PID, SetD1PID) + * bits[25..22] - should always be zero, this isn't a periodic endpoint + * bits[10..0] - MPS setting different for EP0 + */ +#define D0EPCTL_MPS_MASK (0x3 << 0) +#define D0EPCTL_MPS_SHIFT 0 +#define D0EPCTL_MPS_64 0 +#define D0EPCTL_MPS_32 1 +#define D0EPCTL_MPS_16 2 +#define D0EPCTL_MPS_8 3 + +#define DXEPCTL_EPENA BIT(31) +#define DXEPCTL_EPDIS BIT(30) +#define DXEPCTL_SETD1PID BIT(29) +#define DXEPCTL_SETODDFR BIT(29) +#define DXEPCTL_SETD0PID BIT(28) +#define DXEPCTL_SETEVENFR BIT(28) +#define DXEPCTL_SNAK BIT(27) +#define DXEPCTL_CNAK BIT(26) +#define DXEPCTL_TXFNUM_MASK (0xf << 22) +#define DXEPCTL_TXFNUM_SHIFT 22 +#define DXEPCTL_TXFNUM_LIMIT 0xf +#define DXEPCTL_TXFNUM(_x) ((_x) << 22) +#define DXEPCTL_STALL BIT(21) +#define DXEPCTL_SNP BIT(20) +#define DXEPCTL_EPTYPE_MASK (0x3 << 18) +#define DXEPCTL_EPTYPE_CONTROL (0x0 << 18) +#define DXEPCTL_EPTYPE_ISO (0x1 << 18) +#define DXEPCTL_EPTYPE_BULK (0x2 << 18) +#define DXEPCTL_EPTYPE_INTERRUPT (0x3 << 18) + +#define DXEPCTL_NAKSTS BIT(17) +#define DXEPCTL_DPID BIT(16) +#define DXEPCTL_EOFRNUM BIT(16) +#define DXEPCTL_USBACTEP BIT(15) +#define DXEPCTL_NEXTEP_MASK (0xf << 11) +#define DXEPCTL_NEXTEP_SHIFT 11 +#define DXEPCTL_NEXTEP_LIMIT 0xf +#define DXEPCTL_NEXTEP(_x) ((_x) << 11) +#define DXEPCTL_MPS_MASK (0x7ff << 0) +#define DXEPCTL_MPS_SHIFT 0 +#define DXEPCTL_MPS_LIMIT 0x7ff +#define DXEPCTL_MPS(_x) ((_x) << 0) + +#define DIEPINT(_a) HSOTG_REG(0x908 + ((_a) * 0x20)) +#define DOEPINT(_a) HSOTG_REG(0xB08 + ((_a) * 0x20)) +#define DXEPINT_SETUP_RCVD BIT(15) +#define DXEPINT_NYETINTRPT BIT(14) +#define DXEPINT_NAKINTRPT BIT(13) +#define DXEPINT_BBLEERRINTRPT BIT(12) +#define DXEPINT_PKTDRPSTS BIT(11) +#define DXEPINT_BNAINTR BIT(9) +#define DXEPINT_TXFIFOUNDRN BIT(8) +#define DXEPINT_OUTPKTERR BIT(8) +#define DXEPINT_TXFEMP BIT(7) +#define DXEPINT_INEPNAKEFF BIT(6) +#define DXEPINT_BACK2BACKSETUP BIT(6) +#define DXEPINT_INTKNEPMIS BIT(5) +#define DXEPINT_STSPHSERCVD BIT(5) +#define DXEPINT_INTKNTXFEMP BIT(4) +#define DXEPINT_OUTTKNEPDIS BIT(4) +#define DXEPINT_TIMEOUT BIT(3) +#define DXEPINT_SETUP BIT(3) +#define DXEPINT_AHBERR BIT(2) +#define DXEPINT_EPDISBLD BIT(1) +#define DXEPINT_XFERCOMPL BIT(0) + +#define DIEPTSIZ0 HSOTG_REG(0x910) +#define DIEPTSIZ0_PKTCNT_MASK (0x3 << 19) +#define DIEPTSIZ0_PKTCNT_SHIFT 19 +#define DIEPTSIZ0_PKTCNT_LIMIT 0x3 +#define DIEPTSIZ0_PKTCNT(_x) ((_x) << 19) +#define DIEPTSIZ0_XFERSIZE_MASK (0x7f << 0) +#define DIEPTSIZ0_XFERSIZE_SHIFT 0 +#define DIEPTSIZ0_XFERSIZE_LIMIT 0x7f +#define DIEPTSIZ0_XFERSIZE(_x) ((_x) << 0) + +#define DOEPTSIZ0 HSOTG_REG(0xB10) +#define DOEPTSIZ0_SUPCNT_MASK (0x3 << 29) +#define DOEPTSIZ0_SUPCNT_SHIFT 29 +#define DOEPTSIZ0_SUPCNT_LIMIT 0x3 +#define DOEPTSIZ0_SUPCNT(_x) ((_x) << 29) +#define DOEPTSIZ0_PKTCNT BIT(19) +#define DOEPTSIZ0_XFERSIZE_MASK (0x7f << 0) +#define DOEPTSIZ0_XFERSIZE_SHIFT 0 + +#define DIEPTSIZ(_a) HSOTG_REG(0x910 + ((_a) * 0x20)) +#define DOEPTSIZ(_a) HSOTG_REG(0xB10 + ((_a) * 0x20)) +#define DXEPTSIZ_MC_MASK (0x3 << 29) +#define DXEPTSIZ_MC_SHIFT 29 +#define DXEPTSIZ_MC_LIMIT 0x3 +#define DXEPTSIZ_MC(_x) ((_x) << 29) +#define DXEPTSIZ_PKTCNT_MASK (0x3ff << 19) +#define DXEPTSIZ_PKTCNT_SHIFT 19 +#define DXEPTSIZ_PKTCNT_LIMIT 0x3ff +#define DXEPTSIZ_PKTCNT_GET(_v) (((_v) >> 19) & 0x3ff) +#define DXEPTSIZ_PKTCNT(_x) ((_x) << 19) +#define DXEPTSIZ_XFERSIZE_MASK (0x7ffff << 0) +#define DXEPTSIZ_XFERSIZE_SHIFT 0 +#define DXEPTSIZ_XFERSIZE_LIMIT 0x7ffff +#define DXEPTSIZ_XFERSIZE_GET(_v) (((_v) >> 0) & 0x7ffff) +#define DXEPTSIZ_XFERSIZE(_x) ((_x) << 0) + +#define DIEPDMA(_a) HSOTG_REG(0x914 + ((_a) * 0x20)) +#define DOEPDMA(_a) HSOTG_REG(0xB14 + ((_a) * 0x20)) + +#define DTXFSTS(_a) HSOTG_REG(0x918 + ((_a) * 0x20)) + +#define PCGCTL HSOTG_REG(0x0e00) +#define PCGCTL_IF_DEV_MODE BIT(31) +#define PCGCTL_P2HD_PRT_SPD_MASK (0x3 << 29) +#define PCGCTL_P2HD_PRT_SPD_SHIFT 29 +#define PCGCTL_P2HD_DEV_ENUM_SPD_MASK (0x3 << 27) +#define PCGCTL_P2HD_DEV_ENUM_SPD_SHIFT 27 +#define PCGCTL_MAC_DEV_ADDR_MASK (0x7f << 20) +#define PCGCTL_MAC_DEV_ADDR_SHIFT 20 +#define PCGCTL_MAX_TERMSEL BIT(19) +#define PCGCTL_MAX_XCVRSELECT_MASK (0x3 << 17) +#define PCGCTL_MAX_XCVRSELECT_SHIFT 17 +#define PCGCTL_PORT_POWER BIT(16) +#define PCGCTL_PRT_CLK_SEL_MASK (0x3 << 14) +#define PCGCTL_PRT_CLK_SEL_SHIFT 14 +#define PCGCTL_ESS_REG_RESTORED BIT(13) +#define PCGCTL_EXTND_HIBER_SWITCH BIT(12) +#define PCGCTL_EXTND_HIBER_PWRCLMP BIT(11) +#define PCGCTL_ENBL_EXTND_HIBER BIT(10) +#define PCGCTL_RESTOREMODE BIT(9) +#define PCGCTL_RESETAFTSUSP BIT(8) +#define PCGCTL_DEEP_SLEEP BIT(7) +#define PCGCTL_PHY_IN_SLEEP BIT(6) +#define PCGCTL_ENBL_SLEEP_GATING BIT(5) +#define PCGCTL_RSTPDWNMODULE BIT(3) +#define PCGCTL_PWRCLMP BIT(2) +#define PCGCTL_GATEHCLK BIT(1) +#define PCGCTL_STOPPCLK BIT(0) + +#define PCGCCTL1 HSOTG_REG(0xe04) +#define PCGCCTL1_TIMER (0x3 << 1) +#define PCGCCTL1_GATEEN BIT(0) + +#define EPFIFO(_a) HSOTG_REG(0x1000 + ((_a) * 0x1000)) + +/* Host Mode Registers */ + +#define HCFG HSOTG_REG(0x0400) +#define HCFG_MODECHTIMEN BIT(31) +#define HCFG_PERSCHEDENA BIT(26) +#define HCFG_FRLISTEN_MASK (0x3 << 24) +#define HCFG_FRLISTEN_SHIFT 24 +#define HCFG_FRLISTEN_8 (0 << 24) +#define FRLISTEN_8_SIZE 8 +#define HCFG_FRLISTEN_16 BIT(24) +#define FRLISTEN_16_SIZE 16 +#define HCFG_FRLISTEN_32 (2 << 24) +#define FRLISTEN_32_SIZE 32 +#define HCFG_FRLISTEN_64 (3 << 24) +#define FRLISTEN_64_SIZE 64 +#define HCFG_DESCDMA BIT(23) +#define HCFG_RESVALID_MASK (0xff << 8) +#define HCFG_RESVALID_SHIFT 8 +#define HCFG_ENA32KHZ BIT(7) +#define HCFG_FSLSSUPP BIT(2) +#define HCFG_FSLSPCLKSEL_MASK (0x3 << 0) +#define HCFG_FSLSPCLKSEL_SHIFT 0 +#define HCFG_FSLSPCLKSEL_30_60_MHZ 0 +#define HCFG_FSLSPCLKSEL_48_MHZ 1 +#define HCFG_FSLSPCLKSEL_6_MHZ 2 + +#define HFIR HSOTG_REG(0x0404) +#define HFIR_FRINT_MASK (0xffff << 0) +#define HFIR_FRINT_SHIFT 0 +#define HFIR_RLDCTRL BIT(16) + +#define HFNUM HSOTG_REG(0x0408) +#define HFNUM_FRREM_MASK (0xffff << 16) +#define HFNUM_FRREM_SHIFT 16 +#define HFNUM_FRNUM_MASK (0xffff << 0) +#define HFNUM_FRNUM_SHIFT 0 +#define HFNUM_MAX_FRNUM 0x3fff + +#define HPTXSTS HSOTG_REG(0x0410) +#define TXSTS_QTOP_ODD BIT(31) +#define TXSTS_QTOP_CHNEP_MASK (0xf << 27) +#define TXSTS_QTOP_CHNEP_SHIFT 27 +#define TXSTS_QTOP_TOKEN_MASK (0x3 << 25) +#define TXSTS_QTOP_TOKEN_SHIFT 25 +#define TXSTS_QTOP_TERMINATE BIT(24) +#define TXSTS_QSPCAVAIL_MASK (0xff << 16) +#define TXSTS_QSPCAVAIL_SHIFT 16 +#define TXSTS_FSPCAVAIL_MASK (0xffff << 0) +#define TXSTS_FSPCAVAIL_SHIFT 0 + +#define HAINT HSOTG_REG(0x0414) +#define HAINTMSK HSOTG_REG(0x0418) +#define HFLBADDR HSOTG_REG(0x041c) + +#define HPRT0 HSOTG_REG(0x0440) +#define HPRT0_SPD_MASK (0x3 << 17) +#define HPRT0_SPD_SHIFT 17 +#define HPRT0_SPD_HIGH_SPEED 0 +#define HPRT0_SPD_FULL_SPEED 1 +#define HPRT0_SPD_LOW_SPEED 2 +#define HPRT0_TSTCTL_MASK (0xf << 13) +#define HPRT0_TSTCTL_SHIFT 13 +#define HPRT0_PWR BIT(12) +#define HPRT0_LNSTS_MASK (0x3 << 10) +#define HPRT0_LNSTS_SHIFT 10 +#define HPRT0_RST BIT(8) +#define HPRT0_SUSP BIT(7) +#define HPRT0_RES BIT(6) +#define HPRT0_OVRCURRCHG BIT(5) +#define HPRT0_OVRCURRACT BIT(4) +#define HPRT0_ENACHG BIT(3) +#define HPRT0_ENA BIT(2) +#define HPRT0_CONNDET BIT(1) +#define HPRT0_CONNSTS BIT(0) + +#define HCCHAR(_ch) HSOTG_REG(0x0500 + 0x20 * (_ch)) +#define HCCHAR_CHENA BIT(31) +#define HCCHAR_CHDIS BIT(30) +#define HCCHAR_ODDFRM BIT(29) +#define HCCHAR_DEVADDR_MASK (0x7f << 22) +#define HCCHAR_DEVADDR_SHIFT 22 +#define HCCHAR_MULTICNT_MASK (0x3 << 20) +#define HCCHAR_MULTICNT_SHIFT 20 +#define HCCHAR_EPTYPE_MASK (0x3 << 18) +#define HCCHAR_EPTYPE_SHIFT 18 +#define HCCHAR_LSPDDEV BIT(17) +#define HCCHAR_EPDIR BIT(15) +#define HCCHAR_EPNUM_MASK (0xf << 11) +#define HCCHAR_EPNUM_SHIFT 11 +#define HCCHAR_MPS_MASK (0x7ff << 0) +#define HCCHAR_MPS_SHIFT 0 + +#define HCSPLT(_ch) HSOTG_REG(0x0504 + 0x20 * (_ch)) +#define HCSPLT_SPLTENA BIT(31) +#define HCSPLT_COMPSPLT BIT(16) +#define HCSPLT_XACTPOS_MASK (0x3 << 14) +#define HCSPLT_XACTPOS_SHIFT 14 +#define HCSPLT_XACTPOS_MID 0 +#define HCSPLT_XACTPOS_END 1 +#define HCSPLT_XACTPOS_BEGIN 2 +#define HCSPLT_XACTPOS_ALL 3 +#define HCSPLT_HUBADDR_MASK (0x7f << 7) +#define HCSPLT_HUBADDR_SHIFT 7 +#define HCSPLT_PRTADDR_MASK (0x7f << 0) +#define HCSPLT_PRTADDR_SHIFT 0 + +#define HCINT(_ch) HSOTG_REG(0x0508 + 0x20 * (_ch)) +#define HCINTMSK(_ch) HSOTG_REG(0x050c + 0x20 * (_ch)) +#define HCINTMSK_RESERVED14_31 (0x3ffff << 14) +#define HCINTMSK_FRM_LIST_ROLL BIT(13) +#define HCINTMSK_XCS_XACT BIT(12) +#define HCINTMSK_BNA BIT(11) +#define HCINTMSK_DATATGLERR BIT(10) +#define HCINTMSK_FRMOVRUN BIT(9) +#define HCINTMSK_BBLERR BIT(8) +#define HCINTMSK_XACTERR BIT(7) +#define HCINTMSK_NYET BIT(6) +#define HCINTMSK_ACK BIT(5) +#define HCINTMSK_NAK BIT(4) +#define HCINTMSK_STALL BIT(3) +#define HCINTMSK_AHBERR BIT(2) +#define HCINTMSK_CHHLTD BIT(1) +#define HCINTMSK_XFERCOMPL BIT(0) + +#define HCTSIZ(_ch) HSOTG_REG(0x0510 + 0x20 * (_ch)) +#define TSIZ_DOPNG BIT(31) +#define TSIZ_SC_MC_PID_MASK (0x3 << 29) +#define TSIZ_SC_MC_PID_SHIFT 29 +#define TSIZ_SC_MC_PID_DATA0 0 +#define TSIZ_SC_MC_PID_DATA2 1 +#define TSIZ_SC_MC_PID_DATA1 2 +#define TSIZ_SC_MC_PID_MDATA 3 +#define TSIZ_SC_MC_PID_SETUP 3 +#define TSIZ_PKTCNT_MASK (0x3ff << 19) +#define TSIZ_PKTCNT_SHIFT 19 +#define TSIZ_NTD_MASK (0xff << 8) +#define TSIZ_NTD_SHIFT 8 +#define TSIZ_SCHINFO_MASK (0xff << 0) +#define TSIZ_SCHINFO_SHIFT 0 +#define TSIZ_XFERSIZE_MASK (0x7ffff << 0) +#define TSIZ_XFERSIZE_SHIFT 0 + +#define HCDMA(_ch) HSOTG_REG(0x0514 + 0x20 * (_ch)) + +#define HCDMAB(_ch) HSOTG_REG(0x051c + 0x20 * (_ch)) + +#define HCFIFO(_ch) HSOTG_REG(0x1000 + 0x1000 * (_ch)) + +/** + * struct dwc2_dma_desc - DMA descriptor structure, + * used for both host and gadget modes + * + * @status: DMA descriptor status quadlet + * @buf: DMA descriptor data buffer pointer + * + * DMA Descriptor structure contains two quadlets: + * Status quadlet and Data buffer pointer. + */ +struct dwc2_dma_desc { + uint32_t status; + uint32_t buf; +} __packed; + +/* Host Mode DMA descriptor status quadlet */ + +#define HOST_DMA_A BIT(31) +#define HOST_DMA_STS_MASK (0x3 << 28) +#define HOST_DMA_STS_SHIFT 28 +#define HOST_DMA_STS_PKTERR BIT(28) +#define HOST_DMA_EOL BIT(26) +#define HOST_DMA_IOC BIT(25) +#define HOST_DMA_SUP BIT(24) +#define HOST_DMA_ALT_QTD BIT(23) +#define HOST_DMA_QTD_OFFSET_MASK (0x3f << 17) +#define HOST_DMA_QTD_OFFSET_SHIFT 17 +#define HOST_DMA_ISOC_NBYTES_MASK (0xfff << 0) +#define HOST_DMA_ISOC_NBYTES_SHIFT 0 +#define HOST_DMA_NBYTES_MASK (0x1ffff << 0) +#define HOST_DMA_NBYTES_SHIFT 0 +#define HOST_DMA_NBYTES_LIMIT 131071 + +/* Device Mode DMA descriptor status quadlet */ + +#define DEV_DMA_BUFF_STS_MASK (0x3 << 30) +#define DEV_DMA_BUFF_STS_SHIFT 30 +#define DEV_DMA_BUFF_STS_HREADY 0 +#define DEV_DMA_BUFF_STS_DMABUSY 1 +#define DEV_DMA_BUFF_STS_DMADONE 2 +#define DEV_DMA_BUFF_STS_HBUSY 3 +#define DEV_DMA_STS_MASK (0x3 << 28) +#define DEV_DMA_STS_SHIFT 28 +#define DEV_DMA_STS_SUCC 0 +#define DEV_DMA_STS_BUFF_FLUSH 1 +#define DEV_DMA_STS_BUFF_ERR 3 +#define DEV_DMA_L BIT(27) +#define DEV_DMA_SHORT BIT(26) +#define DEV_DMA_IOC BIT(25) +#define DEV_DMA_SR BIT(24) +#define DEV_DMA_MTRF BIT(23) +#define DEV_DMA_ISOC_PID_MASK (0x3 << 23) +#define DEV_DMA_ISOC_PID_SHIFT 23 +#define DEV_DMA_ISOC_PID_DATA0 0 +#define DEV_DMA_ISOC_PID_DATA2 1 +#define DEV_DMA_ISOC_PID_DATA1 2 +#define DEV_DMA_ISOC_PID_MDATA 3 +#define DEV_DMA_ISOC_FRNUM_MASK (0x7ff << 12) +#define DEV_DMA_ISOC_FRNUM_SHIFT 12 +#define DEV_DMA_ISOC_TX_NBYTES_MASK (0xfff << 0) +#define DEV_DMA_ISOC_TX_NBYTES_LIMIT 0xfff +#define DEV_DMA_ISOC_RX_NBYTES_MASK (0x7ff << 0) +#define DEV_DMA_ISOC_RX_NBYTES_LIMIT 0x7ff +#define DEV_DMA_ISOC_NBYTES_SHIFT 0 +#define DEV_DMA_NBYTES_MASK (0xffff << 0) +#define DEV_DMA_NBYTES_SHIFT 0 +#define DEV_DMA_NBYTES_LIMIT 0xffff + +#define MAX_DMA_DESC_NUM_GENERIC 64 +#define MAX_DMA_DESC_NUM_HS_ISOC 256 + +#endif /* __DWC2_HW_H__ */ From 104a010f24f7ae9d16ff67bce66c309a4a070915 Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:45 -0700 Subject: [PATCH 16/29] dwc-hsotg (dwc2) USB host controller state definitions Add the dwc-hsotg (dwc2) USB host controller state definitions. Mostly based on hw/usb/hcd-ehci.h. Signed-off-by: Paul Zimmerman Message-id: 20200520235349.21215-4-pauldzim@gmail.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/usb/hcd-dwc2.h | 190 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 hw/usb/hcd-dwc2.h diff --git a/hw/usb/hcd-dwc2.h b/hw/usb/hcd-dwc2.h new file mode 100644 index 0000000000..4ba809a07b --- /dev/null +++ b/hw/usb/hcd-dwc2.h @@ -0,0 +1,190 @@ +/* + * dwc-hsotg (dwc2) USB host controller state definitions + * + * Based on hw/usb/hcd-ehci.h + * + * Copyright (c) 2020 Paul Zimmerman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef HW_USB_DWC2_H +#define HW_USB_DWC2_H + +#include "qemu/timer.h" +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "hw/usb.h" +#include "sysemu/dma.h" + +#define DWC2_MMIO_SIZE 0x11000 + +#define DWC2_NB_CHAN 8 /* Number of host channels */ +#define DWC2_MAX_XFER_SIZE 65536 /* Max transfer size expected in HCTSIZ */ + +typedef struct DWC2Packet DWC2Packet; +typedef struct DWC2State DWC2State; +typedef struct DWC2Class DWC2Class; + +enum async_state { + DWC2_ASYNC_NONE = 0, + DWC2_ASYNC_INITIALIZED, + DWC2_ASYNC_INFLIGHT, + DWC2_ASYNC_FINISHED, +}; + +struct DWC2Packet { + USBPacket packet; + uint32_t devadr; + uint32_t epnum; + uint32_t epdir; + uint32_t mps; + uint32_t pid; + uint32_t index; + uint32_t pcnt; + uint32_t len; + int32_t async; + bool small; + bool needs_service; +}; + +struct DWC2State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + USBBus bus; + qemu_irq irq; + MemoryRegion *dma_mr; + AddressSpace dma_as; + MemoryRegion container; + MemoryRegion hsotg; + MemoryRegion fifos; + + union { +#define DWC2_GLBREG_SIZE 0x70 + uint32_t glbreg[DWC2_GLBREG_SIZE / sizeof(uint32_t)]; + struct { + uint32_t gotgctl; /* 00 */ + uint32_t gotgint; /* 04 */ + uint32_t gahbcfg; /* 08 */ + uint32_t gusbcfg; /* 0c */ + uint32_t grstctl; /* 10 */ + uint32_t gintsts; /* 14 */ + uint32_t gintmsk; /* 18 */ + uint32_t grxstsr; /* 1c */ + uint32_t grxstsp; /* 20 */ + uint32_t grxfsiz; /* 24 */ + uint32_t gnptxfsiz; /* 28 */ + uint32_t gnptxsts; /* 2c */ + uint32_t gi2cctl; /* 30 */ + uint32_t gpvndctl; /* 34 */ + uint32_t ggpio; /* 38 */ + uint32_t guid; /* 3c */ + uint32_t gsnpsid; /* 40 */ + uint32_t ghwcfg1; /* 44 */ + uint32_t ghwcfg2; /* 48 */ + uint32_t ghwcfg3; /* 4c */ + uint32_t ghwcfg4; /* 50 */ + uint32_t glpmcfg; /* 54 */ + uint32_t gpwrdn; /* 58 */ + uint32_t gdfifocfg; /* 5c */ + uint32_t gadpctl; /* 60 */ + uint32_t grefclk; /* 64 */ + uint32_t gintmsk2; /* 68 */ + uint32_t gintsts2; /* 6c */ + }; + }; + + union { +#define DWC2_FSZREG_SIZE 0x04 + uint32_t fszreg[DWC2_FSZREG_SIZE / sizeof(uint32_t)]; + struct { + uint32_t hptxfsiz; /* 100 */ + }; + }; + + union { +#define DWC2_HREG0_SIZE 0x44 + uint32_t hreg0[DWC2_HREG0_SIZE / sizeof(uint32_t)]; + struct { + uint32_t hcfg; /* 400 */ + uint32_t hfir; /* 404 */ + uint32_t hfnum; /* 408 */ + uint32_t rsvd0; /* 40c */ + uint32_t hptxsts; /* 410 */ + uint32_t haint; /* 414 */ + uint32_t haintmsk; /* 418 */ + uint32_t hflbaddr; /* 41c */ + uint32_t rsvd1[8]; /* 420-43c */ + uint32_t hprt0; /* 440 */ + }; + }; + +#define DWC2_HREG1_SIZE (0x20 * DWC2_NB_CHAN) + uint32_t hreg1[DWC2_HREG1_SIZE / sizeof(uint32_t)]; + +#define hcchar(_ch) hreg1[((_ch) << 3) + 0] /* 500, 520, ... */ +#define hcsplt(_ch) hreg1[((_ch) << 3) + 1] /* 504, 524, ... */ +#define hcint(_ch) hreg1[((_ch) << 3) + 2] /* 508, 528, ... */ +#define hcintmsk(_ch) hreg1[((_ch) << 3) + 3] /* 50c, 52c, ... */ +#define hctsiz(_ch) hreg1[((_ch) << 3) + 4] /* 510, 530, ... */ +#define hcdma(_ch) hreg1[((_ch) << 3) + 5] /* 514, 534, ... */ +#define hcdmab(_ch) hreg1[((_ch) << 3) + 7] /* 51c, 53c, ... */ + + union { +#define DWC2_PCGREG_SIZE 0x08 + uint32_t pcgreg[DWC2_PCGREG_SIZE / sizeof(uint32_t)]; + struct { + uint32_t pcgctl; /* e00 */ + uint32_t pcgcctl1; /* e04 */ + }; + }; + + /* TODO - implement FIFO registers for slave mode */ +#define DWC2_HFIFO_SIZE (0x1000 * DWC2_NB_CHAN) + + /* + * Internal state + */ + QEMUTimer *eof_timer; + QEMUTimer *frame_timer; + QEMUBH *async_bh; + int64_t sof_time; + int64_t usb_frame_time; + int64_t usb_bit_time; + uint32_t usb_version; + uint16_t frame_number; + uint16_t fi; + uint16_t next_chan; + bool working; + USBPort uport; + DWC2Packet packet[DWC2_NB_CHAN]; /* one packet per chan */ + uint8_t usb_buf[DWC2_NB_CHAN][DWC2_MAX_XFER_SIZE]; /* one buffer per chan */ +}; + +struct DWC2Class { + /*< private >*/ + SysBusDeviceClass parent_class; + ResettablePhases parent_phases; + + /*< public >*/ +}; + +#define TYPE_DWC2_USB "dwc2-usb" +#define DWC2_USB(obj) \ + OBJECT_CHECK(DWC2State, (obj), TYPE_DWC2_USB) +#define DWC2_CLASS(klass) \ + OBJECT_CLASS_CHECK(DWC2Class, (klass), TYPE_DWC2_USB) +#define DWC2_GET_CLASS(obj) \ + OBJECT_GET_CLASS(DWC2Class, (obj), TYPE_DWC2_USB) + +#endif From 153ef1662c35ba3d3bbcedefe8dc24cfa4e8c33d Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:46 -0700 Subject: [PATCH 17/29] dwc-hsotg (dwc2) USB host controller emulation Add the dwc-hsotg (dwc2) USB host controller emulation code. Based on hw/usb/hcd-ehci.c and hw/usb/hcd-ohci.c. Note that to use this with the dwc-otg driver in the Raspbian kernel, you must pass the option "dwc_otg.fiq_fsm_enable=0" on the kernel command line. Emulation of slave mode and of descriptor-DMA mode has not been implemented yet. These modes are seldom used. I have used some on-line sources of information while developing this emulation, including: http://www.capital-micro.com/PDF/CME-M7_Family_User_Guide_EN.pdf which has a pretty complete description of the controller starting on page 370. https://sourceforge.net/p/wive-ng/wive-ng-mt/ci/master/tree/docs/DataSheets/RT3050_5x_V2.0_081408_0902.pdf which has a description of the controller registers starting on page 130. Thanks to Felippe Mathieu-Daude for providing a cleaner method of implementing the memory regions for the controller registers. Signed-off-by: Paul Zimmerman Message-id: 20200520235349.21215-5-pauldzim@gmail.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/usb/Kconfig | 5 + hw/usb/Makefile.objs | 1 + hw/usb/hcd-dwc2.c | 1417 ++++++++++++++++++++++++++++++++++++++++++ hw/usb/trace-events | 50 ++ 4 files changed, 1473 insertions(+) create mode 100644 hw/usb/hcd-dwc2.c diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig index 464348ba14..d4d8c37c28 100644 --- a/hw/usb/Kconfig +++ b/hw/usb/Kconfig @@ -46,6 +46,11 @@ config USB_MUSB bool select USB +config USB_DWC2 + bool + default y + select USB + config TUSB6010 bool select USB_MUSB diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index 66835e5bf7..fa5c3fa1b8 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -12,6 +12,7 @@ common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o common-obj-$(CONFIG_USB_XHCI_NEC) += hcd-xhci-nec.o common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o +common-obj-$(CONFIG_USB_DWC2) += hcd-dwc2.o common-obj-$(CONFIG_TUSB6010) += tusb6010.o common-obj-$(CONFIG_IMX) += chipidea.o diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c new file mode 100644 index 0000000000..72cbd051f3 --- /dev/null +++ b/hw/usb/hcd-dwc2.c @@ -0,0 +1,1417 @@ +/* + * dwc-hsotg (dwc2) USB host controller emulation + * + * Based on hw/usb/hcd-ehci.c and hw/usb/hcd-ohci.c + * + * Note that to use this emulation with the dwc-otg driver in the + * Raspbian kernel, you must pass the option "dwc_otg.fiq_fsm_enable=0" + * on the kernel command line. + * + * Some useful documentation used to develop this emulation can be + * found online (as of April 2020) at: + * + * http://www.capital-micro.com/PDF/CME-M7_Family_User_Guide_EN.pdf + * which has a pretty complete description of the controller starting + * on page 370. + * + * https://sourceforge.net/p/wive-ng/wive-ng-mt/ci/master/tree/docs/DataSheets/RT3050_5x_V2.0_081408_0902.pdf + * which has a description of the controller registers starting on + * page 130. + * + * Copyright (c) 2020 Paul Zimmerman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/usb/dwc2-regs.h" +#include "hw/usb/hcd-dwc2.h" +#include "migration/vmstate.h" +#include "trace.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "hw/qdev-properties.h" + +#define USB_HZ_FS 12000000 +#define USB_HZ_HS 96000000 +#define USB_FRMINTVL 12000 + +/* nifty macros from Arnon's EHCI version */ +#define get_field(data, field) \ + (((data) & field##_MASK) >> field##_SHIFT) + +#define set_field(data, newval, field) do { \ + uint32_t val = *(data); \ + val &= ~field##_MASK; \ + val |= ((newval) << field##_SHIFT) & field##_MASK; \ + *(data) = val; \ +} while (0) + +#define get_bit(data, bitmask) \ + (!!((data) & (bitmask))) + +/* update irq line */ +static inline void dwc2_update_irq(DWC2State *s) +{ + static int oldlevel; + int level = 0; + + if ((s->gintsts & s->gintmsk) && (s->gahbcfg & GAHBCFG_GLBL_INTR_EN)) { + level = 1; + } + if (level != oldlevel) { + oldlevel = level; + trace_usb_dwc2_update_irq(level); + qemu_set_irq(s->irq, level); + } +} + +/* flag interrupt condition */ +static inline void dwc2_raise_global_irq(DWC2State *s, uint32_t intr) +{ + if (!(s->gintsts & intr)) { + s->gintsts |= intr; + trace_usb_dwc2_raise_global_irq(intr); + dwc2_update_irq(s); + } +} + +static inline void dwc2_lower_global_irq(DWC2State *s, uint32_t intr) +{ + if (s->gintsts & intr) { + s->gintsts &= ~intr; + trace_usb_dwc2_lower_global_irq(intr); + dwc2_update_irq(s); + } +} + +static inline void dwc2_raise_host_irq(DWC2State *s, uint32_t host_intr) +{ + if (!(s->haint & host_intr)) { + s->haint |= host_intr; + s->haint &= 0xffff; + trace_usb_dwc2_raise_host_irq(host_intr); + if (s->haint & s->haintmsk) { + dwc2_raise_global_irq(s, GINTSTS_HCHINT); + } + } +} + +static inline void dwc2_lower_host_irq(DWC2State *s, uint32_t host_intr) +{ + if (s->haint & host_intr) { + s->haint &= ~host_intr; + trace_usb_dwc2_lower_host_irq(host_intr); + if (!(s->haint & s->haintmsk)) { + dwc2_lower_global_irq(s, GINTSTS_HCHINT); + } + } +} + +static inline void dwc2_update_hc_irq(DWC2State *s, int index) +{ + uint32_t host_intr = 1 << (index >> 3); + + if (s->hreg1[index + 2] & s->hreg1[index + 3]) { + dwc2_raise_host_irq(s, host_intr); + } else { + dwc2_lower_host_irq(s, host_intr); + } +} + +/* set a timer for EOF */ +static void dwc2_eof_timer(DWC2State *s) +{ + timer_mod(s->eof_timer, s->sof_time + s->usb_frame_time); +} + +/* Set a timer for EOF and generate SOF event */ +static void dwc2_sof(DWC2State *s) +{ + s->sof_time += s->usb_frame_time; + trace_usb_dwc2_sof(s->sof_time); + dwc2_eof_timer(s); + dwc2_raise_global_irq(s, GINTSTS_SOF); +} + +/* Do frame processing on frame boundary */ +static void dwc2_frame_boundary(void *opaque) +{ + DWC2State *s = opaque; + int64_t now; + uint16_t frcnt; + + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + /* Frame boundary, so do EOF stuff here */ + + /* Increment frame number */ + frcnt = (uint16_t)((now - s->sof_time) / s->fi); + s->frame_number = (s->frame_number + frcnt) & 0xffff; + s->hfnum = s->frame_number & HFNUM_MAX_FRNUM; + + /* Do SOF stuff here */ + dwc2_sof(s); +} + +/* Start sending SOF tokens on the USB bus */ +static void dwc2_bus_start(DWC2State *s) +{ + trace_usb_dwc2_bus_start(); + s->sof_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + dwc2_eof_timer(s); +} + +/* Stop sending SOF tokens on the USB bus */ +static void dwc2_bus_stop(DWC2State *s) +{ + trace_usb_dwc2_bus_stop(); + timer_del(s->eof_timer); +} + +static USBDevice *dwc2_find_device(DWC2State *s, uint8_t addr) +{ + USBDevice *dev; + + trace_usb_dwc2_find_device(addr); + + if (!(s->hprt0 & HPRT0_ENA)) { + trace_usb_dwc2_port_disabled(0); + } else { + dev = usb_find_device(&s->uport, addr); + if (dev != NULL) { + trace_usb_dwc2_device_found(0); + return dev; + } + } + + trace_usb_dwc2_device_not_found(); + return NULL; +} + +static const char *pstatus[] = { + "USB_RET_SUCCESS", "USB_RET_NODEV", "USB_RET_NAK", "USB_RET_STALL", + "USB_RET_BABBLE", "USB_RET_IOERROR", "USB_RET_ASYNC", + "USB_RET_ADD_TO_QUEUE", "USB_RET_REMOVE_FROM_QUEUE" +}; + +static uint32_t pintr[] = { + HCINTMSK_XFERCOMPL, HCINTMSK_XACTERR, HCINTMSK_NAK, HCINTMSK_STALL, + HCINTMSK_BBLERR, HCINTMSK_XACTERR, HCINTMSK_XACTERR, HCINTMSK_XACTERR, + HCINTMSK_XACTERR +}; + +static const char *types[] = { + "Ctrl", "Isoc", "Bulk", "Intr" +}; + +static const char *dirs[] = { + "Out", "In" +}; + +static void dwc2_handle_packet(DWC2State *s, uint32_t devadr, USBDevice *dev, + USBEndpoint *ep, uint32_t index, bool send) +{ + DWC2Packet *p; + uint32_t hcchar = s->hreg1[index]; + uint32_t hctsiz = s->hreg1[index + 4]; + uint32_t hcdma = s->hreg1[index + 5]; + uint32_t chan, epnum, epdir, eptype, mps, pid, pcnt, len, tlen, intr = 0; + uint32_t tpcnt, stsidx, actual = 0; + bool do_intr = false, done = false; + + epnum = get_field(hcchar, HCCHAR_EPNUM); + epdir = get_bit(hcchar, HCCHAR_EPDIR); + eptype = get_field(hcchar, HCCHAR_EPTYPE); + mps = get_field(hcchar, HCCHAR_MPS); + pid = get_field(hctsiz, TSIZ_SC_MC_PID); + pcnt = get_field(hctsiz, TSIZ_PKTCNT); + len = get_field(hctsiz, TSIZ_XFERSIZE); + assert(len <= DWC2_MAX_XFER_SIZE); + chan = index >> 3; + p = &s->packet[chan]; + + trace_usb_dwc2_handle_packet(chan, dev, &p->packet, epnum, types[eptype], + dirs[epdir], mps, len, pcnt); + + if (eptype == USB_ENDPOINT_XFER_CONTROL && pid == TSIZ_SC_MC_PID_SETUP) { + pid = USB_TOKEN_SETUP; + } else { + pid = epdir ? USB_TOKEN_IN : USB_TOKEN_OUT; + } + + if (send) { + tlen = len; + if (p->small) { + if (tlen > mps) { + tlen = mps; + } + } + + if (pid != USB_TOKEN_IN) { + trace_usb_dwc2_memory_read(hcdma, tlen); + if (dma_memory_read(&s->dma_as, hcdma, + s->usb_buf[chan], tlen) != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: dma_memory_read failed\n", + __func__); + } + } + + usb_packet_init(&p->packet); + usb_packet_setup(&p->packet, pid, ep, 0, hcdma, + pid != USB_TOKEN_IN, true); + usb_packet_addbuf(&p->packet, s->usb_buf[chan], tlen); + p->async = DWC2_ASYNC_NONE; + usb_handle_packet(dev, &p->packet); + } else { + tlen = p->len; + } + + stsidx = -p->packet.status; + assert(stsidx < sizeof(pstatus) / sizeof(*pstatus)); + actual = p->packet.actual_length; + trace_usb_dwc2_packet_status(pstatus[stsidx], actual); + +babble: + if (p->packet.status != USB_RET_SUCCESS && + p->packet.status != USB_RET_NAK && + p->packet.status != USB_RET_STALL && + p->packet.status != USB_RET_ASYNC) { + trace_usb_dwc2_packet_error(pstatus[stsidx]); + } + + if (p->packet.status == USB_RET_ASYNC) { + trace_usb_dwc2_async_packet(&p->packet, chan, dev, epnum, + dirs[epdir], tlen); + usb_device_flush_ep_queue(dev, ep); + assert(p->async != DWC2_ASYNC_INFLIGHT); + p->devadr = devadr; + p->epnum = epnum; + p->epdir = epdir; + p->mps = mps; + p->pid = pid; + p->index = index; + p->pcnt = pcnt; + p->len = tlen; + p->async = DWC2_ASYNC_INFLIGHT; + p->needs_service = false; + return; + } + + if (p->packet.status == USB_RET_SUCCESS) { + if (actual > tlen) { + p->packet.status = USB_RET_BABBLE; + goto babble; + } + + if (pid == USB_TOKEN_IN) { + trace_usb_dwc2_memory_write(hcdma, actual); + if (dma_memory_write(&s->dma_as, hcdma, s->usb_buf[chan], + actual) != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: dma_memory_write failed\n", + __func__); + } + } + + tpcnt = actual / mps; + if (actual % mps) { + tpcnt++; + if (pid == USB_TOKEN_IN) { + done = true; + } + } + + pcnt -= tpcnt < pcnt ? tpcnt : pcnt; + set_field(&hctsiz, pcnt, TSIZ_PKTCNT); + len -= actual < len ? actual : len; + set_field(&hctsiz, len, TSIZ_XFERSIZE); + s->hreg1[index + 4] = hctsiz; + hcdma += actual; + s->hreg1[index + 5] = hcdma; + + if (!pcnt || len == 0 || actual == 0) { + done = true; + } + } else { + intr |= pintr[stsidx]; + if (p->packet.status == USB_RET_NAK && + (eptype == USB_ENDPOINT_XFER_CONTROL || + eptype == USB_ENDPOINT_XFER_BULK)) { + /* + * for ctrl/bulk, automatically retry on NAK, + * but send the interrupt anyway + */ + intr &= ~HCINTMSK_RESERVED14_31; + s->hreg1[index + 2] |= intr; + do_intr = true; + } else { + intr |= HCINTMSK_CHHLTD; + done = true; + } + } + + usb_packet_cleanup(&p->packet); + + if (done) { + hcchar &= ~HCCHAR_CHENA; + s->hreg1[index] = hcchar; + if (!(intr & HCINTMSK_CHHLTD)) { + intr |= HCINTMSK_CHHLTD | HCINTMSK_XFERCOMPL; + } + intr &= ~HCINTMSK_RESERVED14_31; + s->hreg1[index + 2] |= intr; + p->needs_service = false; + trace_usb_dwc2_packet_done(pstatus[stsidx], actual, len, pcnt); + dwc2_update_hc_irq(s, index); + return; + } + + p->devadr = devadr; + p->epnum = epnum; + p->epdir = epdir; + p->mps = mps; + p->pid = pid; + p->index = index; + p->pcnt = pcnt; + p->len = len; + p->needs_service = true; + trace_usb_dwc2_packet_next(pstatus[stsidx], len, pcnt); + if (do_intr) { + dwc2_update_hc_irq(s, index); + } +} + +/* Attach or detach a device on root hub */ + +static const char *speeds[] = { + "low", "full", "high" +}; + +static void dwc2_attach(USBPort *port) +{ + DWC2State *s = port->opaque; + int hispd = 0; + + trace_usb_dwc2_attach(port); + assert(port->index == 0); + + if (!port->dev || !port->dev->attached) { + return; + } + + assert(port->dev->speed <= USB_SPEED_HIGH); + trace_usb_dwc2_attach_speed(speeds[port->dev->speed]); + s->hprt0 &= ~HPRT0_SPD_MASK; + + switch (port->dev->speed) { + case USB_SPEED_LOW: + s->hprt0 |= HPRT0_SPD_LOW_SPEED << HPRT0_SPD_SHIFT; + break; + case USB_SPEED_FULL: + s->hprt0 |= HPRT0_SPD_FULL_SPEED << HPRT0_SPD_SHIFT; + break; + case USB_SPEED_HIGH: + s->hprt0 |= HPRT0_SPD_HIGH_SPEED << HPRT0_SPD_SHIFT; + hispd = 1; + break; + } + + if (hispd) { + s->usb_frame_time = NANOSECONDS_PER_SECOND / 8000; /* 125000 */ + if (NANOSECONDS_PER_SECOND >= USB_HZ_HS) { + s->usb_bit_time = NANOSECONDS_PER_SECOND / USB_HZ_HS; /* 10.4 */ + } else { + s->usb_bit_time = 1; + } + } else { + s->usb_frame_time = NANOSECONDS_PER_SECOND / 1000; /* 1000000 */ + if (NANOSECONDS_PER_SECOND >= USB_HZ_FS) { + s->usb_bit_time = NANOSECONDS_PER_SECOND / USB_HZ_FS; /* 83.3 */ + } else { + s->usb_bit_time = 1; + } + } + + s->fi = USB_FRMINTVL - 1; + s->hprt0 |= HPRT0_CONNDET | HPRT0_CONNSTS; + + dwc2_bus_start(s); + dwc2_raise_global_irq(s, GINTSTS_PRTINT); +} + +static void dwc2_detach(USBPort *port) +{ + DWC2State *s = port->opaque; + + trace_usb_dwc2_detach(port); + assert(port->index == 0); + + dwc2_bus_stop(s); + + s->hprt0 &= ~(HPRT0_SPD_MASK | HPRT0_SUSP | HPRT0_ENA | HPRT0_CONNSTS); + s->hprt0 |= HPRT0_CONNDET | HPRT0_ENACHG; + + dwc2_raise_global_irq(s, GINTSTS_PRTINT); +} + +static void dwc2_child_detach(USBPort *port, USBDevice *child) +{ + trace_usb_dwc2_child_detach(port, child); + assert(port->index == 0); +} + +static void dwc2_wakeup(USBPort *port) +{ + DWC2State *s = port->opaque; + + trace_usb_dwc2_wakeup(port); + assert(port->index == 0); + + if (s->hprt0 & HPRT0_SUSP) { + s->hprt0 |= HPRT0_RES; + dwc2_raise_global_irq(s, GINTSTS_PRTINT); + } + + qemu_bh_schedule(s->async_bh); +} + +static void dwc2_async_packet_complete(USBPort *port, USBPacket *packet) +{ + DWC2State *s = port->opaque; + DWC2Packet *p; + USBDevice *dev; + USBEndpoint *ep; + + assert(port->index == 0); + p = container_of(packet, DWC2Packet, packet); + dev = dwc2_find_device(s, p->devadr); + ep = usb_ep_get(dev, p->pid, p->epnum); + trace_usb_dwc2_async_packet_complete(port, packet, p->index >> 3, dev, + p->epnum, dirs[p->epdir], p->len); + assert(p->async == DWC2_ASYNC_INFLIGHT); + + if (packet->status == USB_RET_REMOVE_FROM_QUEUE) { + usb_cancel_packet(packet); + usb_packet_cleanup(packet); + return; + } + + dwc2_handle_packet(s, p->devadr, dev, ep, p->index, false); + + p->async = DWC2_ASYNC_FINISHED; + qemu_bh_schedule(s->async_bh); +} + +static USBPortOps dwc2_port_ops = { + .attach = dwc2_attach, + .detach = dwc2_detach, + .child_detach = dwc2_child_detach, + .wakeup = dwc2_wakeup, + .complete = dwc2_async_packet_complete, +}; + +static uint32_t dwc2_get_frame_remaining(DWC2State *s) +{ + uint32_t fr = 0; + int64_t tks; + + tks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->sof_time; + if (tks < 0) { + tks = 0; + } + + /* avoid muldiv if possible */ + if (tks >= s->usb_frame_time) { + goto out; + } + if (tks < s->usb_bit_time) { + fr = s->fi; + goto out; + } + + /* tks = number of ns since SOF, divided by 83 (fs) or 10 (hs) */ + tks = tks / s->usb_bit_time; + if (tks >= (int64_t)s->fi) { + goto out; + } + + /* remaining = frame interval minus tks */ + fr = (uint32_t)((int64_t)s->fi - tks); + +out: + return fr; +} + +static void dwc2_work_bh(void *opaque) +{ + DWC2State *s = opaque; + DWC2Packet *p; + USBDevice *dev; + USBEndpoint *ep; + int64_t t_now, expire_time; + int chan; + bool found = false; + + trace_usb_dwc2_work_bh(); + if (s->working) { + return; + } + s->working = true; + + t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + chan = s->next_chan; + + do { + p = &s->packet[chan]; + if (p->needs_service) { + dev = dwc2_find_device(s, p->devadr); + ep = usb_ep_get(dev, p->pid, p->epnum); + trace_usb_dwc2_work_bh_service(s->next_chan, chan, dev, p->epnum); + dwc2_handle_packet(s, p->devadr, dev, ep, p->index, true); + found = true; + } + if (++chan == DWC2_NB_CHAN) { + chan = 0; + } + if (found) { + s->next_chan = chan; + trace_usb_dwc2_work_bh_next(chan); + } + } while (chan != s->next_chan); + + if (found) { + expire_time = t_now + NANOSECONDS_PER_SECOND / 4000; + timer_mod(s->frame_timer, expire_time); + } + s->working = false; +} + +static void dwc2_enable_chan(DWC2State *s, uint32_t index) +{ + USBDevice *dev; + USBEndpoint *ep; + uint32_t hcchar; + uint32_t hctsiz; + uint32_t devadr, epnum, epdir, eptype, pid, len; + DWC2Packet *p; + + assert((index >> 3) < DWC2_NB_CHAN); + p = &s->packet[index >> 3]; + hcchar = s->hreg1[index]; + hctsiz = s->hreg1[index + 4]; + devadr = get_field(hcchar, HCCHAR_DEVADDR); + epnum = get_field(hcchar, HCCHAR_EPNUM); + epdir = get_bit(hcchar, HCCHAR_EPDIR); + eptype = get_field(hcchar, HCCHAR_EPTYPE); + pid = get_field(hctsiz, TSIZ_SC_MC_PID); + len = get_field(hctsiz, TSIZ_XFERSIZE); + + dev = dwc2_find_device(s, devadr); + + trace_usb_dwc2_enable_chan(index >> 3, dev, &p->packet, epnum); + if (dev == NULL) { + return; + } + + if (eptype == USB_ENDPOINT_XFER_CONTROL && pid == TSIZ_SC_MC_PID_SETUP) { + pid = USB_TOKEN_SETUP; + } else { + pid = epdir ? USB_TOKEN_IN : USB_TOKEN_OUT; + } + + ep = usb_ep_get(dev, pid, epnum); + + /* + * Hack: Networking doesn't like us delivering large transfers, it kind + * of works but the latency is horrible. So if the transfer is <= the mtu + * size, we take that as a hint that this might be a network transfer, + * and do the transfer packet-by-packet. + */ + if (len > 1536) { + p->small = false; + } else { + p->small = true; + } + + dwc2_handle_packet(s, devadr, dev, ep, index, true); + qemu_bh_schedule(s->async_bh); +} + +static const char *glbregnm[] = { + "GOTGCTL ", "GOTGINT ", "GAHBCFG ", "GUSBCFG ", "GRSTCTL ", + "GINTSTS ", "GINTMSK ", "GRXSTSR ", "GRXSTSP ", "GRXFSIZ ", + "GNPTXFSIZ", "GNPTXSTS ", "GI2CCTL ", "GPVNDCTL ", "GGPIO ", + "GUID ", "GSNPSID ", "GHWCFG1 ", "GHWCFG2 ", "GHWCFG3 ", + "GHWCFG4 ", "GLPMCFG ", "GPWRDN ", "GDFIFOCFG", "GADPCTL ", + "GREFCLK ", "GINTMSK2 ", "GINTSTS2 " +}; + +static uint64_t dwc2_glbreg_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr <= GINTSTS2); + val = s->glbreg[index]; + + switch (addr) { + case GRSTCTL: + /* clear any self-clearing bits that were set */ + val &= ~(GRSTCTL_TXFFLSH | GRSTCTL_RXFFLSH | GRSTCTL_IN_TKNQ_FLSH | + GRSTCTL_FRMCNTRRST | GRSTCTL_HSFTRST | GRSTCTL_CSFTRST); + s->glbreg[index] = val; + break; + default: + break; + } + + trace_usb_dwc2_glbreg_read(addr, glbregnm[index], val); + return val; +} + +static void dwc2_glbreg_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + int iflg = 0; + + assert(addr <= GINTSTS2); + mmio = &s->glbreg[index]; + old = *mmio; + + switch (addr) { + case GOTGCTL: + /* don't allow setting of read-only bits */ + val &= ~(GOTGCTL_MULT_VALID_BC_MASK | GOTGCTL_BSESVLD | + GOTGCTL_ASESVLD | GOTGCTL_DBNC_SHORT | GOTGCTL_CONID_B | + GOTGCTL_HSTNEGSCS | GOTGCTL_SESREQSCS); + /* don't allow clearing of read-only bits */ + val |= old & (GOTGCTL_MULT_VALID_BC_MASK | GOTGCTL_BSESVLD | + GOTGCTL_ASESVLD | GOTGCTL_DBNC_SHORT | GOTGCTL_CONID_B | + GOTGCTL_HSTNEGSCS | GOTGCTL_SESREQSCS); + break; + case GAHBCFG: + if ((val & GAHBCFG_GLBL_INTR_EN) && !(old & GAHBCFG_GLBL_INTR_EN)) { + iflg = 1; + } + break; + case GRSTCTL: + val |= GRSTCTL_AHBIDLE; + val &= ~GRSTCTL_DMAREQ; + if (!(old & GRSTCTL_TXFFLSH) && (val & GRSTCTL_TXFFLSH)) { + /* TODO - TX fifo flush */ + qemu_log_mask(LOG_UNIMP, "Tx FIFO flush not implemented\n"); + } + if (!(old & GRSTCTL_RXFFLSH) && (val & GRSTCTL_RXFFLSH)) { + /* TODO - RX fifo flush */ + qemu_log_mask(LOG_UNIMP, "Rx FIFO flush not implemented\n"); + } + if (!(old & GRSTCTL_IN_TKNQ_FLSH) && (val & GRSTCTL_IN_TKNQ_FLSH)) { + /* TODO - device IN token queue flush */ + qemu_log_mask(LOG_UNIMP, "Token queue flush not implemented\n"); + } + if (!(old & GRSTCTL_FRMCNTRRST) && (val & GRSTCTL_FRMCNTRRST)) { + /* TODO - host frame counter reset */ + qemu_log_mask(LOG_UNIMP, "Frame counter reset not implemented\n"); + } + if (!(old & GRSTCTL_HSFTRST) && (val & GRSTCTL_HSFTRST)) { + /* TODO - host soft reset */ + qemu_log_mask(LOG_UNIMP, "Host soft reset not implemented\n"); + } + if (!(old & GRSTCTL_CSFTRST) && (val & GRSTCTL_CSFTRST)) { + /* TODO - core soft reset */ + qemu_log_mask(LOG_UNIMP, "Core soft reset not implemented\n"); + } + /* don't allow clearing of self-clearing bits */ + val |= old & (GRSTCTL_TXFFLSH | GRSTCTL_RXFFLSH | + GRSTCTL_IN_TKNQ_FLSH | GRSTCTL_FRMCNTRRST | + GRSTCTL_HSFTRST | GRSTCTL_CSFTRST); + break; + case GINTSTS: + /* clear the write-1-to-clear bits */ + val |= ~old; + val = ~val; + /* don't allow clearing of read-only bits */ + val |= old & (GINTSTS_PTXFEMP | GINTSTS_HCHINT | GINTSTS_PRTINT | + GINTSTS_OEPINT | GINTSTS_IEPINT | GINTSTS_GOUTNAKEFF | + GINTSTS_GINNAKEFF | GINTSTS_NPTXFEMP | GINTSTS_RXFLVL | + GINTSTS_OTGINT | GINTSTS_CURMODE_HOST); + iflg = 1; + break; + case GINTMSK: + iflg = 1; + break; + default: + break; + } + + trace_usb_dwc2_glbreg_write(addr, glbregnm[index], orig, old, val); + *mmio = val; + + if (iflg) { + dwc2_update_irq(s); + } +} + +static uint64_t dwc2_fszreg_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr == HPTXFSIZ); + val = s->fszreg[index]; + + trace_usb_dwc2_fszreg_read(addr, val); + return val; +} + +static void dwc2_fszreg_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + + assert(addr == HPTXFSIZ); + mmio = &s->fszreg[index]; + old = *mmio; + + trace_usb_dwc2_fszreg_write(addr, orig, old, val); + *mmio = val; +} + +static const char *hreg0nm[] = { + "HCFG ", "HFIR ", "HFNUM ", " ", "HPTXSTS ", + "HAINT ", "HAINTMSK ", "HFLBADDR ", " ", " ", + " ", " ", " ", " ", " ", + " ", "HPRT0 " +}; + +static uint64_t dwc2_hreg0_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr >= HCFG && addr <= HPRT0); + val = s->hreg0[index]; + + switch (addr) { + case HFNUM: + val = (dwc2_get_frame_remaining(s) << HFNUM_FRREM_SHIFT) | + (s->hfnum << HFNUM_FRNUM_SHIFT); + break; + default: + break; + } + + trace_usb_dwc2_hreg0_read(addr, hreg0nm[index], val); + return val; +} + +static void dwc2_hreg0_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + USBDevice *dev = s->uport.dev; + uint64_t orig = val; + uint32_t *mmio; + uint32_t tval, told, old; + int prst = 0; + int iflg = 0; + + assert(addr >= HCFG && addr <= HPRT0); + mmio = &s->hreg0[index]; + old = *mmio; + + switch (addr) { + case HFIR: + break; + case HFNUM: + case HPTXSTS: + case HAINT: + qemu_log_mask(LOG_GUEST_ERROR, "%s: write to read-only register\n", + __func__); + return; + case HAINTMSK: + val &= 0xffff; + break; + case HPRT0: + /* don't allow clearing of read-only bits */ + val |= old & (HPRT0_SPD_MASK | HPRT0_LNSTS_MASK | HPRT0_OVRCURRACT | + HPRT0_CONNSTS); + /* don't allow clearing of self-clearing bits */ + val |= old & (HPRT0_SUSP | HPRT0_RES); + /* don't allow setting of self-setting bits */ + if (!(old & HPRT0_ENA) && (val & HPRT0_ENA)) { + val &= ~HPRT0_ENA; + } + /* clear the write-1-to-clear bits */ + tval = val & (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + told = old & (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + tval |= ~told; + tval = ~tval; + tval &= (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + val &= ~(HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_ENA | + HPRT0_CONNDET); + val |= tval; + if (!(val & HPRT0_RST) && (old & HPRT0_RST)) { + if (dev && dev->attached) { + val |= HPRT0_ENA | HPRT0_ENACHG; + prst = 1; + } + } + if (val & (HPRT0_OVRCURRCHG | HPRT0_ENACHG | HPRT0_CONNDET)) { + iflg = 1; + } else { + iflg = -1; + } + break; + default: + break; + } + + if (prst) { + trace_usb_dwc2_hreg0_write(addr, hreg0nm[index], orig, old, + val & ~HPRT0_CONNDET); + trace_usb_dwc2_hreg0_action("call usb_port_reset"); + usb_port_reset(&s->uport); + val &= ~HPRT0_CONNDET; + } else { + trace_usb_dwc2_hreg0_write(addr, hreg0nm[index], orig, old, val); + } + + *mmio = val; + + if (iflg > 0) { + trace_usb_dwc2_hreg0_action("enable PRTINT"); + dwc2_raise_global_irq(s, GINTSTS_PRTINT); + } else if (iflg < 0) { + trace_usb_dwc2_hreg0_action("disable PRTINT"); + dwc2_lower_global_irq(s, GINTSTS_PRTINT); + } +} + +static const char *hreg1nm[] = { + "HCCHAR ", "HCSPLT ", "HCINT ", "HCINTMSK", "HCTSIZ ", "HCDMA ", + " ", "HCDMAB " +}; + +static uint64_t dwc2_hreg1_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr >= HCCHAR(0) && addr <= HCDMAB(DWC2_NB_CHAN - 1)); + val = s->hreg1[index]; + + trace_usb_dwc2_hreg1_read(addr, hreg1nm[index & 7], addr >> 5, val); + return val; +} + +static void dwc2_hreg1_write(void *ptr, hwaddr addr, int index, uint64_t val, + unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + int iflg = 0; + int enflg = 0; + int disflg = 0; + + assert(addr >= HCCHAR(0) && addr <= HCDMAB(DWC2_NB_CHAN - 1)); + mmio = &s->hreg1[index]; + old = *mmio; + + switch (HSOTG_REG(0x500) + (addr & 0x1c)) { + case HCCHAR(0): + if ((val & HCCHAR_CHDIS) && !(old & HCCHAR_CHDIS)) { + val &= ~(HCCHAR_CHENA | HCCHAR_CHDIS); + disflg = 1; + } else { + val |= old & HCCHAR_CHDIS; + if ((val & HCCHAR_CHENA) && !(old & HCCHAR_CHENA)) { + val &= ~HCCHAR_CHDIS; + enflg = 1; + } else { + val |= old & HCCHAR_CHENA; + } + } + break; + case HCINT(0): + /* clear the write-1-to-clear bits */ + val |= ~old; + val = ~val; + val &= ~HCINTMSK_RESERVED14_31; + iflg = 1; + break; + case HCINTMSK(0): + val &= ~HCINTMSK_RESERVED14_31; + iflg = 1; + break; + case HCDMAB(0): + qemu_log_mask(LOG_GUEST_ERROR, "%s: write to read-only register\n", + __func__); + return; + default: + break; + } + + trace_usb_dwc2_hreg1_write(addr, hreg1nm[index & 7], index >> 3, orig, + old, val); + *mmio = val; + + if (disflg) { + /* set ChHltd in HCINT */ + s->hreg1[(index & ~7) + 2] |= HCINTMSK_CHHLTD; + iflg = 1; + } + + if (enflg) { + dwc2_enable_chan(s, index & ~7); + } + + if (iflg) { + dwc2_update_hc_irq(s, index & ~7); + } +} + +static const char *pcgregnm[] = { + "PCGCTL ", "PCGCCTL1 " +}; + +static uint64_t dwc2_pcgreg_read(void *ptr, hwaddr addr, int index, + unsigned size) +{ + DWC2State *s = ptr; + uint32_t val; + + assert(addr >= PCGCTL && addr <= PCGCCTL1); + val = s->pcgreg[index]; + + trace_usb_dwc2_pcgreg_read(addr, pcgregnm[index], val); + return val; +} + +static void dwc2_pcgreg_write(void *ptr, hwaddr addr, int index, + uint64_t val, unsigned size) +{ + DWC2State *s = ptr; + uint64_t orig = val; + uint32_t *mmio; + uint32_t old; + + assert(addr >= PCGCTL && addr <= PCGCCTL1); + mmio = &s->pcgreg[index]; + old = *mmio; + + trace_usb_dwc2_pcgreg_write(addr, pcgregnm[index], orig, old, val); + *mmio = val; +} + +static uint64_t dwc2_hsotg_read(void *ptr, hwaddr addr, unsigned size) +{ + uint64_t val; + + switch (addr) { + case HSOTG_REG(0x000) ... HSOTG_REG(0x0fc): + val = dwc2_glbreg_read(ptr, addr, (addr - HSOTG_REG(0x000)) >> 2, size); + break; + case HSOTG_REG(0x100): + val = dwc2_fszreg_read(ptr, addr, (addr - HSOTG_REG(0x100)) >> 2, size); + break; + case HSOTG_REG(0x104) ... HSOTG_REG(0x3fc): + /* Gadget-mode registers, just return 0 for now */ + val = 0; + break; + case HSOTG_REG(0x400) ... HSOTG_REG(0x4fc): + val = dwc2_hreg0_read(ptr, addr, (addr - HSOTG_REG(0x400)) >> 2, size); + break; + case HSOTG_REG(0x500) ... HSOTG_REG(0x7fc): + val = dwc2_hreg1_read(ptr, addr, (addr - HSOTG_REG(0x500)) >> 2, size); + break; + case HSOTG_REG(0x800) ... HSOTG_REG(0xdfc): + /* Gadget-mode registers, just return 0 for now */ + val = 0; + break; + case HSOTG_REG(0xe00) ... HSOTG_REG(0xffc): + val = dwc2_pcgreg_read(ptr, addr, (addr - HSOTG_REG(0xe00)) >> 2, size); + break; + default: + g_assert_not_reached(); + } + + return val; +} + +static void dwc2_hsotg_write(void *ptr, hwaddr addr, uint64_t val, + unsigned size) +{ + switch (addr) { + case HSOTG_REG(0x000) ... HSOTG_REG(0x0fc): + dwc2_glbreg_write(ptr, addr, (addr - HSOTG_REG(0x000)) >> 2, val, size); + break; + case HSOTG_REG(0x100): + dwc2_fszreg_write(ptr, addr, (addr - HSOTG_REG(0x100)) >> 2, val, size); + break; + case HSOTG_REG(0x104) ... HSOTG_REG(0x3fc): + /* Gadget-mode registers, do nothing for now */ + break; + case HSOTG_REG(0x400) ... HSOTG_REG(0x4fc): + dwc2_hreg0_write(ptr, addr, (addr - HSOTG_REG(0x400)) >> 2, val, size); + break; + case HSOTG_REG(0x500) ... HSOTG_REG(0x7fc): + dwc2_hreg1_write(ptr, addr, (addr - HSOTG_REG(0x500)) >> 2, val, size); + break; + case HSOTG_REG(0x800) ... HSOTG_REG(0xdfc): + /* Gadget-mode registers, do nothing for now */ + break; + case HSOTG_REG(0xe00) ... HSOTG_REG(0xffc): + dwc2_pcgreg_write(ptr, addr, (addr - HSOTG_REG(0xe00)) >> 2, val, size); + break; + default: + g_assert_not_reached(); + } +} + +static const MemoryRegionOps dwc2_mmio_hsotg_ops = { + .read = dwc2_hsotg_read, + .write = dwc2_hsotg_write, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t dwc2_hreg2_read(void *ptr, hwaddr addr, unsigned size) +{ + /* TODO - implement FIFOs to support slave mode */ + trace_usb_dwc2_hreg2_read(addr, addr >> 12, 0); + qemu_log_mask(LOG_UNIMP, "FIFO read not implemented\n"); + return 0; +} + +static void dwc2_hreg2_write(void *ptr, hwaddr addr, uint64_t val, + unsigned size) +{ + uint64_t orig = val; + + /* TODO - implement FIFOs to support slave mode */ + trace_usb_dwc2_hreg2_write(addr, addr >> 12, orig, 0, val); + qemu_log_mask(LOG_UNIMP, "FIFO write not implemented\n"); +} + +static const MemoryRegionOps dwc2_mmio_hreg2_ops = { + .read = dwc2_hreg2_read, + .write = dwc2_hreg2_write, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void dwc2_wakeup_endpoint(USBBus *bus, USBEndpoint *ep, + unsigned int stream) +{ + DWC2State *s = container_of(bus, DWC2State, bus); + + trace_usb_dwc2_wakeup_endpoint(ep, stream); + + /* TODO - do something here? */ + qemu_bh_schedule(s->async_bh); +} + +static USBBusOps dwc2_bus_ops = { + .wakeup_endpoint = dwc2_wakeup_endpoint, +}; + +static void dwc2_work_timer(void *opaque) +{ + DWC2State *s = opaque; + + trace_usb_dwc2_work_timer(); + qemu_bh_schedule(s->async_bh); +} + +static void dwc2_reset_enter(Object *obj, ResetType type) +{ + DWC2Class *c = DWC2_GET_CLASS(obj); + DWC2State *s = DWC2_USB(obj); + int i; + + trace_usb_dwc2_reset_enter(); + + if (c->parent_phases.enter) { + c->parent_phases.enter(obj, type); + } + + timer_del(s->frame_timer); + qemu_bh_cancel(s->async_bh); + + if (s->uport.dev && s->uport.dev->attached) { + usb_detach(&s->uport); + } + + dwc2_bus_stop(s); + + s->gotgctl = GOTGCTL_BSESVLD | GOTGCTL_ASESVLD | GOTGCTL_CONID_B; + s->gotgint = 0; + s->gahbcfg = 0; + s->gusbcfg = 5 << GUSBCFG_USBTRDTIM_SHIFT; + s->grstctl = GRSTCTL_AHBIDLE; + s->gintsts = GINTSTS_CONIDSTSCHNG | GINTSTS_PTXFEMP | GINTSTS_NPTXFEMP | + GINTSTS_CURMODE_HOST; + s->gintmsk = 0; + s->grxstsr = 0; + s->grxstsp = 0; + s->grxfsiz = 1024; + s->gnptxfsiz = 1024 << FIFOSIZE_DEPTH_SHIFT; + s->gnptxsts = (4 << FIFOSIZE_DEPTH_SHIFT) | 1024; + s->gi2cctl = GI2CCTL_I2CDATSE0 | GI2CCTL_ACK; + s->gpvndctl = 0; + s->ggpio = 0; + s->guid = 0; + s->gsnpsid = 0x4f54294a; + s->ghwcfg1 = 0; + s->ghwcfg2 = (8 << GHWCFG2_DEV_TOKEN_Q_DEPTH_SHIFT) | + (4 << GHWCFG2_HOST_PERIO_TX_Q_DEPTH_SHIFT) | + (4 << GHWCFG2_NONPERIO_TX_Q_DEPTH_SHIFT) | + GHWCFG2_DYNAMIC_FIFO | + GHWCFG2_PERIO_EP_SUPPORTED | + ((DWC2_NB_CHAN - 1) << GHWCFG2_NUM_HOST_CHAN_SHIFT) | + (GHWCFG2_INT_DMA_ARCH << GHWCFG2_ARCHITECTURE_SHIFT) | + (GHWCFG2_OP_MODE_NO_SRP_CAPABLE_HOST << GHWCFG2_OP_MODE_SHIFT); + s->ghwcfg3 = (4096 << GHWCFG3_DFIFO_DEPTH_SHIFT) | + (4 << GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT) | + (4 << GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT); + s->ghwcfg4 = 0; + s->glpmcfg = 0; + s->gpwrdn = GPWRDN_PWRDNRSTN; + s->gdfifocfg = 0; + s->gadpctl = 0; + s->grefclk = 0; + s->gintmsk2 = 0; + s->gintsts2 = 0; + + s->hptxfsiz = 500 << FIFOSIZE_DEPTH_SHIFT; + + s->hcfg = 2 << HCFG_RESVALID_SHIFT; + s->hfir = 60000; + s->hfnum = 0x3fff; + s->hptxsts = (16 << TXSTS_QSPCAVAIL_SHIFT) | 32768; + s->haint = 0; + s->haintmsk = 0; + s->hprt0 = 0; + + memset(s->hreg1, 0, sizeof(s->hreg1)); + memset(s->pcgreg, 0, sizeof(s->pcgreg)); + + s->sof_time = 0; + s->frame_number = 0; + s->fi = USB_FRMINTVL - 1; + s->next_chan = 0; + s->working = false; + + for (i = 0; i < DWC2_NB_CHAN; i++) { + s->packet[i].needs_service = false; + } +} + +static void dwc2_reset_hold(Object *obj) +{ + DWC2Class *c = DWC2_GET_CLASS(obj); + DWC2State *s = DWC2_USB(obj); + + trace_usb_dwc2_reset_hold(); + + if (c->parent_phases.hold) { + c->parent_phases.hold(obj); + } + + dwc2_update_irq(s); +} + +static void dwc2_reset_exit(Object *obj) +{ + DWC2Class *c = DWC2_GET_CLASS(obj); + DWC2State *s = DWC2_USB(obj); + + trace_usb_dwc2_reset_exit(); + + if (c->parent_phases.exit) { + c->parent_phases.exit(obj); + } + + s->hprt0 = HPRT0_PWR; + if (s->uport.dev && s->uport.dev->attached) { + usb_attach(&s->uport); + usb_device_reset(s->uport.dev); + } +} + +static void dwc2_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + DWC2State *s = DWC2_USB(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "dma-mr", &err); + if (err) { + error_setg(errp, "dwc2: required dma-mr link not found: %s", + error_get_pretty(err)); + return; + } + assert(obj != NULL); + + s->dma_mr = MEMORY_REGION(obj); + address_space_init(&s->dma_as, s->dma_mr, "dwc2"); + + usb_bus_new(&s->bus, sizeof(s->bus), &dwc2_bus_ops, dev); + usb_register_port(&s->bus, &s->uport, s, 0, &dwc2_port_ops, + USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL | + (s->usb_version == 2 ? USB_SPEED_MASK_HIGH : 0)); + s->uport.dev = 0; + + s->usb_frame_time = NANOSECONDS_PER_SECOND / 1000; /* 1000000 */ + if (NANOSECONDS_PER_SECOND >= USB_HZ_FS) { + s->usb_bit_time = NANOSECONDS_PER_SECOND / USB_HZ_FS; /* 83.3 */ + } else { + s->usb_bit_time = 1; + } + + s->fi = USB_FRMINTVL - 1; + s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); + s->async_bh = qemu_bh_new(dwc2_work_bh, s); + + sysbus_init_irq(sbd, &s->irq); +} + +static void dwc2_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + DWC2State *s = DWC2_USB(obj); + + memory_region_init(&s->container, obj, "dwc2", DWC2_MMIO_SIZE); + sysbus_init_mmio(sbd, &s->container); + + memory_region_init_io(&s->hsotg, obj, &dwc2_mmio_hsotg_ops, s, + "dwc2-io", 4 * KiB); + memory_region_add_subregion(&s->container, 0x0000, &s->hsotg); + + memory_region_init_io(&s->fifos, obj, &dwc2_mmio_hreg2_ops, s, + "dwc2-fifo", 64 * KiB); + memory_region_add_subregion(&s->container, 0x1000, &s->fifos); +} + +static const VMStateDescription vmstate_dwc2_state_packet = { + .name = "dwc2/packet", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(devadr, DWC2Packet), + VMSTATE_UINT32(epnum, DWC2Packet), + VMSTATE_UINT32(epdir, DWC2Packet), + VMSTATE_UINT32(mps, DWC2Packet), + VMSTATE_UINT32(pid, DWC2Packet), + VMSTATE_UINT32(index, DWC2Packet), + VMSTATE_UINT32(pcnt, DWC2Packet), + VMSTATE_UINT32(len, DWC2Packet), + VMSTATE_INT32(async, DWC2Packet), + VMSTATE_BOOL(small, DWC2Packet), + VMSTATE_BOOL(needs_service, DWC2Packet), + VMSTATE_END_OF_LIST() + }, +}; + +const VMStateDescription vmstate_dwc2_state = { + .name = "dwc2", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(glbreg, DWC2State, + DWC2_GLBREG_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(fszreg, DWC2State, + DWC2_FSZREG_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(hreg0, DWC2State, + DWC2_HREG0_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(hreg1, DWC2State, + DWC2_HREG1_SIZE / sizeof(uint32_t)), + VMSTATE_UINT32_ARRAY(pcgreg, DWC2State, + DWC2_PCGREG_SIZE / sizeof(uint32_t)), + + VMSTATE_TIMER_PTR(eof_timer, DWC2State), + VMSTATE_TIMER_PTR(frame_timer, DWC2State), + VMSTATE_INT64(sof_time, DWC2State), + VMSTATE_INT64(usb_frame_time, DWC2State), + VMSTATE_INT64(usb_bit_time, DWC2State), + VMSTATE_UINT32(usb_version, DWC2State), + VMSTATE_UINT16(frame_number, DWC2State), + VMSTATE_UINT16(fi, DWC2State), + VMSTATE_UINT16(next_chan, DWC2State), + VMSTATE_BOOL(working, DWC2State), + + VMSTATE_STRUCT_ARRAY(packet, DWC2State, DWC2_NB_CHAN, 1, + vmstate_dwc2_state_packet, DWC2Packet), + VMSTATE_UINT8_2DARRAY(usb_buf, DWC2State, DWC2_NB_CHAN, + DWC2_MAX_XFER_SIZE), + + VMSTATE_END_OF_LIST() + } +}; + +static Property dwc2_usb_properties[] = { + DEFINE_PROP_UINT32("usb_version", DWC2State, usb_version, 2), + DEFINE_PROP_END_OF_LIST(), +}; + +static void dwc2_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + DWC2Class *c = DWC2_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + dc->realize = dwc2_realize; + dc->vmsd = &vmstate_dwc2_state; + set_bit(DEVICE_CATEGORY_USB, dc->categories); + device_class_set_props(dc, dwc2_usb_properties); + resettable_class_set_parent_phases(rc, dwc2_reset_enter, dwc2_reset_hold, + dwc2_reset_exit, &c->parent_phases); +} + +static const TypeInfo dwc2_usb_type_info = { + .name = TYPE_DWC2_USB, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(DWC2State), + .instance_init = dwc2_init, + .class_size = sizeof(DWC2Class), + .class_init = dwc2_class_init, +}; + +static void dwc2_usb_register_types(void) +{ + type_register_static(&dwc2_usb_type_info); +} + +type_init(dwc2_usb_register_types) diff --git a/hw/usb/trace-events b/hw/usb/trace-events index 1c24d82c09..5817ce4421 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -176,6 +176,56 @@ usb_xhci_xfer_error(void *xfer, uint32_t ret) "%p: ret %d" usb_xhci_unimplemented(const char *item, int nr) "%s (0x%x)" usb_xhci_enforced_limit(const char *item) "%s" +# hcd-dwc2.c +usb_dwc2_update_irq(uint32_t level) "level=%d" +usb_dwc2_raise_global_irq(uint32_t intr) "0x%08x" +usb_dwc2_lower_global_irq(uint32_t intr) "0x%08x" +usb_dwc2_raise_host_irq(uint32_t intr) "0x%04x" +usb_dwc2_lower_host_irq(uint32_t intr) "0x%04x" +usb_dwc2_sof(int64_t next) "next SOF %" PRId64 +usb_dwc2_bus_start(void) "start SOFs" +usb_dwc2_bus_stop(void) "stop SOFs" +usb_dwc2_find_device(uint8_t addr) "%d" +usb_dwc2_port_disabled(uint32_t pnum) "port %d disabled" +usb_dwc2_device_found(uint32_t pnum) "device found on port %d" +usb_dwc2_device_not_found(void) "device not found" +usb_dwc2_handle_packet(uint32_t chan, void *dev, void *pkt, uint32_t ep, const char *type, const char *dir, uint32_t mps, uint32_t len, uint32_t pcnt) "ch %d dev %p pkt %p ep %d type %s dir %s mps %d len %d pcnt %d" +usb_dwc2_memory_read(uint32_t addr, uint32_t len) "addr %d len %d" +usb_dwc2_packet_status(const char *status, uint32_t len) "status %s len %d" +usb_dwc2_packet_error(const char *status) "ERROR %s" +usb_dwc2_async_packet(void *pkt, uint32_t chan, void *dev, uint32_t ep, const char *dir, uint32_t len) "pkt %p ch %d dev %p ep %d %s len %d" +usb_dwc2_memory_write(uint32_t addr, uint32_t len) "addr %d len %d" +usb_dwc2_packet_done(const char *status, uint32_t actual, uint32_t len, uint32_t pcnt) "status %s actual %d len %d pcnt %d" +usb_dwc2_packet_next(const char *status, uint32_t len, uint32_t pcnt) "status %s len %d pcnt %d" +usb_dwc2_attach(void *port) "port %p" +usb_dwc2_attach_speed(const char *speed) "%s-speed device attached" +usb_dwc2_detach(void *port) "port %p" +usb_dwc2_child_detach(void *port, void *child) "port %p child %p" +usb_dwc2_wakeup(void *port) "port %p" +usb_dwc2_async_packet_complete(void *port, void *pkt, uint32_t chan, void *dev, uint32_t ep, const char *dir, uint32_t len) "port %p packet %p ch %d dev %p ep %d %s len %d" +usb_dwc2_work_bh(void) "" +usb_dwc2_work_bh_service(uint32_t first, uint32_t current, void *dev, uint32_t ep) "first %d servicing %d dev %p ep %d" +usb_dwc2_work_bh_next(uint32_t chan) "next %d" +usb_dwc2_enable_chan(uint32_t chan, void *dev, void *pkt, uint32_t ep) "ch %d dev %p pkt %p ep %d" +usb_dwc2_glbreg_read(uint64_t addr, const char *reg, uint32_t val) " 0x%04" PRIx64 " %s val 0x%08x" +usb_dwc2_glbreg_write(uint64_t addr, const char *reg, uint64_t val, uint32_t old, uint64_t result) "0x%04" PRIx64 " %s val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_fszreg_read(uint64_t addr, uint32_t val) " 0x%04" PRIx64 " HPTXFSIZ val 0x%08x" +usb_dwc2_fszreg_write(uint64_t addr, uint64_t val, uint32_t old, uint64_t result) "0x%04" PRIx64 " HPTXFSIZ val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg0_read(uint64_t addr, const char *reg, uint32_t val) " 0x%04" PRIx64 " %s val 0x%08x" +usb_dwc2_hreg0_write(uint64_t addr, const char *reg, uint64_t val, uint32_t old, uint64_t result) " 0x%04" PRIx64 " %s val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg1_read(uint64_t addr, const char *reg, uint64_t chan, uint32_t val) " 0x%04" PRIx64 " %s%" PRId64 " val 0x%08x" +usb_dwc2_hreg1_write(uint64_t addr, const char *reg, uint64_t chan, uint64_t val, uint32_t old, uint64_t result) " 0x%04" PRIx64 " %s%" PRId64 " val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_pcgreg_read(uint64_t addr, const char *reg, uint32_t val) " 0x%04" PRIx64 " %s val 0x%08x" +usb_dwc2_pcgreg_write(uint64_t addr, const char *reg, uint64_t val, uint32_t old, uint64_t result) "0x%04" PRIx64 " %s val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg2_read(uint64_t addr, uint64_t fifo, uint32_t val) " 0x%04" PRIx64 " FIFO%" PRId64 " val 0x%08x" +usb_dwc2_hreg2_write(uint64_t addr, uint64_t fifo, uint64_t val, uint32_t old, uint64_t result) " 0x%04" PRIx64 " FIFO%" PRId64 " val 0x%08" PRIx64 " old 0x%08x result 0x%08" PRIx64 +usb_dwc2_hreg0_action(const char *s) "%s" +usb_dwc2_wakeup_endpoint(void *ep, uint32_t stream) "endp %p stream %d" +usb_dwc2_work_timer(void) "" +usb_dwc2_reset_enter(void) "=== RESET enter ===" +usb_dwc2_reset_hold(void) "=== RESET hold ===" +usb_dwc2_reset_exit(void) "=== RESET exit ===" + # desc.c usb_desc_device(int addr, int len, int ret) "dev %d query device, len %d, ret %d" usb_desc_device_qualifier(int addr, int len, int ret) "dev %d query device qualifier, len %d, ret %d" From 7ad3d51ebb8a522ffcad391c4bef281245739dde Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:47 -0700 Subject: [PATCH 18/29] usb: add short-packet handling to usb-storage driver The dwc-hsotg (dwc2) USB host depends on a short packet to indicate the end of an IN transfer. The usb-storage driver currently doesn't provide this, so fix it. I have tested this change rather extensively using a PC emulation with xhci, ehci, and uhci controllers, and have not observed any regressions. Signed-off-by: Paul Zimmerman Message-id: 20200520235349.21215-6-pauldzim@gmail.com Signed-off-by: Peter Maydell --- hw/usb/dev-storage.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index 4eba47538d..a5204b6f2a 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -229,6 +229,9 @@ static void usb_msd_copy_data(MSDState *s, USBPacket *p) usb_packet_copy(p, scsi_req_get_buf(s->req) + s->scsi_off, len); s->scsi_len -= len; s->scsi_off += len; + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; if (s->scsi_len == 0 || s->data_len == 0) { scsi_req_continue(s->req); @@ -303,6 +306,9 @@ static void usb_msd_command_complete(SCSIRequest *req, uint32_t status, size_t r if (s->data_len) { int len = (p->iov.size - p->actual_length); usb_packet_skip(p, len); + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; } if (s->data_len == 0) { @@ -469,6 +475,9 @@ static void usb_msd_handle_data(USBDevice *dev, USBPacket *p) int len = p->iov.size - p->actual_length; if (len) { usb_packet_skip(p, len); + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; if (s->data_len == 0) { s->mode = USB_MSDM_CSW; @@ -528,13 +537,17 @@ static void usb_msd_handle_data(USBDevice *dev, USBPacket *p) int len = p->iov.size - p->actual_length; if (len) { usb_packet_skip(p, len); + if (len > s->data_len) { + len = s->data_len; + } s->data_len -= len; if (s->data_len == 0) { s->mode = USB_MSDM_CSW; } } } - if (p->actual_length < p->iov.size) { + if (p->actual_length < p->iov.size && (p->short_not_ok || + s->scsi_len >= p->ep->max_packet_size)) { DPRINTF("Deferring packet %p [wait data-in]\n", p); s->packet = p; p->status = USB_RET_ASYNC; From 60bf734e647f8a6e243766929813358c9fcd4335 Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:48 -0700 Subject: [PATCH 19/29] wire in the dwc-hsotg (dwc2) USB host controller emulation Wire the dwc-hsotg (dwc2) emulation into Qemu Signed-off-by: Paul Zimmerman Reviewed-by: Philippe Mathieu-Daude Message-id: 20200520235349.21215-7-pauldzim@gmail.com Signed-off-by: Peter Maydell --- hw/arm/bcm2835_peripherals.c | 21 ++++++++++++++++++++- include/hw/arm/bcm2835_peripherals.h | 3 ++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index b3e0495040..cca5b5ad04 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -129,6 +129,13 @@ static void bcm2835_peripherals_init(Object *obj) /* Mphi */ sysbus_init_child_obj(obj, "mphi", &s->mphi, sizeof(s->mphi), TYPE_BCM2835_MPHI); + + /* DWC2 */ + sysbus_init_child_obj(obj, "dwc2", &s->dwc2, sizeof(s->dwc2), + TYPE_DWC2_USB); + + object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr", + OBJECT(&s->gpu_bus_mr)); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -377,6 +384,19 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_HOSTPORT)); + /* DWC2 */ + object_property_set_bool(OBJECT(&s->dwc2), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, USB_OTG_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dwc2), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->dwc2), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_USB)); + create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40); create_unimp(s, &s->cprman, "bcm2835-cprman", CPRMAN_OFFSET, 0x1000); create_unimp(s, &s->a2w, "bcm2835-a2w", A2W_OFFSET, 0x1000); @@ -390,7 +410,6 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) create_unimp(s, &s->otp, "bcm2835-otp", OTP_OFFSET, 0x80); create_unimp(s, &s->dbus, "bcm2835-dbus", DBUS_OFFSET, 0x8000); create_unimp(s, &s->ave0, "bcm2835-ave0", AVE0_OFFSET, 0x8000); - create_unimp(s, &s->dwc2, "dwc-usb2", USB_OTG_OFFSET, 0x1000); create_unimp(s, &s->sdramc, "bcm2835-sdramc", SDRAMC_OFFSET, 0x100); } diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 7a7a8f6141..48a0ad1633 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -27,6 +27,7 @@ #include "hw/sd/bcm2835_sdhost.h" #include "hw/gpio/bcm2835_gpio.h" #include "hw/timer/bcm2835_systmr.h" +#include "hw/usb/hcd-dwc2.h" #include "hw/misc/unimp.h" #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals" @@ -67,7 +68,7 @@ typedef struct BCM2835PeripheralState { UnimplementedDeviceState ave0; UnimplementedDeviceState bscsl; UnimplementedDeviceState smi; - UnimplementedDeviceState dwc2; + DWC2State dwc2; UnimplementedDeviceState sdramc; } BCM2835PeripheralState; From d02ded087030d2b5b5906b127d616acb2a6d1483 Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Wed, 20 May 2020 16:53:49 -0700 Subject: [PATCH 20/29] raspi2 acceptance test: add test for dwc-hsotg (dwc2) USB host Add a check for functional dwc-hsotg (dwc2) USB host emulation to the Raspi 2 acceptance test Signed-off-by: Paul Zimmerman Reviewed-by: Philippe Mathieu-Daude Message-id: 20200520235349.21215-8-pauldzim@gmail.com Signed-off-by: Peter Maydell --- tests/acceptance/boot_linux_console.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py index bbbbd30e48..3f3aa0c854 100644 --- a/tests/acceptance/boot_linux_console.py +++ b/tests/acceptance/boot_linux_console.py @@ -405,13 +405,18 @@ class BootLinuxConsole(LinuxKernelTest): self.vm.set_console() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + - serial_kernel_cmdline[uart_id]) + serial_kernel_cmdline[uart_id] + + ' root=/dev/mmcblk0p2 rootwait ' + + 'dwc_otg.fiq_fsm_enable=0') self.vm.add_args('-kernel', kernel_path, '-dtb', dtb_path, - '-append', kernel_command_line) + '-append', kernel_command_line, + '-device', 'usb-kbd') self.vm.launch() console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) + console_pattern = 'Product: QEMU USB Keyboard' + self.wait_for_console_pattern(console_pattern) def test_arm_raspi2_uart0(self): """ From d3c8c736f8b4bdd02831076286b1788232f46ced Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:12 +0100 Subject: [PATCH 21/29] target/arm: Convert Neon VSHL and VSLI 2-reg-shift insn to decodetree Convert the VSHL and VSLI insns from the Neon 2-registers-and-a-shift group to decodetree. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-2-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 25 ++++++++++++++++++++++ target/arm/translate-neon.inc.c | 38 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 18 +++++++--------- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 8af7c53d8b..fcce2edacd 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -201,3 +201,28 @@ VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp + +###################################################################### +# 2-reg-and-shift grouping: +# 1111 001 U 1 D immH:3 immL:3 Vd:4 opc:4 L Q M 1 Vm:4 +###################################################################### +&2reg_shift vm vd q shift size + +@2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 +@2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 +@2reg_shl_h .... ... . . . 01 shift:4 .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 +@2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 + +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h +VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b + +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h +VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 7b19753c8c..7f05323fdf 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1202,3 +1202,41 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds) DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs) DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins) + +static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) +{ + /* Handle a 2-reg-shift insn which can be vectorized. */ + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); + return true; +} + +#define DO_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_vector_2sh(s, a, FUNC); \ + } \ + +DO_2SH(VSHL, tcg_gen_gvec_shli) +DO_2SH(VSLI, gen_gvec_sli) diff --git a/target/arm/translate.c b/target/arm/translate.c index c61180ea61..41fef49dbe 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5294,6 +5294,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) if ((insn & 0x00380080) != 0) { /* Two registers and shift. */ op = (insn >> 8) & 0xf; + + switch (op) { + case 5: /* VSHL, VSLI */ + return 1; /* handled by decodetree */ + default: + break; + } + if (insn & (1 << 7)) { /* 64-bit shift. */ if (op > 7) { @@ -5387,16 +5395,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) gen_gvec_sri(size, rd_ofs, rm_ofs, shift, vec_size, vec_size); return 0; - - case 5: /* VSHL, VSLI */ - if (u) { /* VSLI */ - gen_gvec_sli(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { /* VSHL */ - tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; } if (size == 3) { From 66432d6b8294e3508218b360acfdf7c244eea993 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:13 +0100 Subject: [PATCH 22/29] target/arm: Convert Neon VSHR 2-reg-shift insns to decodetree Convert the VSHR 2-reg-shift insns to decodetree. Note that unlike the legacy decoder, we present the right shift amount to the trans_ function as a positive integer. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-3-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 25 ++++++++++++++++++++ target/arm/translate-neon.inc.c | 41 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 21 +---------------- 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index fcce2edacd..1b877cc68f 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -208,6 +208,21 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp ###################################################################### &2reg_shift vm vd q shift size +# Right shifts are encoded as N - shift, where N is the element size in bits. +%neon_rshift_i6 16:6 !function=rsub_64 +%neon_rshift_i5 16:5 !function=rsub_32 +%neon_rshift_i4 16:4 !function=rsub_16 +%neon_rshift_i3 16:3 !function=rsub_8 + +@2reg_shr_d .... ... . . . ...... .... .... 1 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 shift=%neon_rshift_i6 +@2reg_shr_s .... ... . . . 1 ..... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5 +@2reg_shr_h .... ... . . . 01 .... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4 +@2reg_shr_b .... ... . . . 001 ... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i3 + @2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=3 @2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \ @@ -217,6 +232,16 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp @2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b + +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b + VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 7f05323fdf..8693b9aa99 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -31,6 +31,24 @@ static inline int plus1(DisasContext *s, int x) return x + 1; } +static inline int rsub_64(DisasContext *s, int x) +{ + return 64 - x; +} + +static inline int rsub_32(DisasContext *s, int x) +{ + return 32 - x; +} +static inline int rsub_16(DisasContext *s, int x) +{ + return 16 - x; +} +static inline int rsub_8(DisasContext *s, int x) +{ + return 8 - x; +} + /* Include the generated Neon decoder */ #include "decode-neon-dp.inc.c" #include "decode-neon-ls.inc.c" @@ -1240,3 +1258,26 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) DO_2SH(VSHL, tcg_gen_gvec_shli) DO_2SH(VSLI, gen_gvec_sli) + +static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Signed shift out of range results in all-sign-bits */ + a->shift = MIN(a->shift, (8 << a->size) - 1); + return do_vector_2sh(s, a, tcg_gen_gvec_sari); +} + +static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); +} + +static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Shift out of range is architecturally valid and results in zero. */ + if (a->shift >= (8 << a->size)) { + return do_vector_2sh(s, a, gen_zero_rd_2sh); + } else { + return do_vector_2sh(s, a, tcg_gen_gvec_shri); + } +} diff --git a/target/arm/translate.c b/target/arm/translate.c index 41fef49dbe..4acc94e3cb 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5296,6 +5296,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) op = (insn >> 8) & 0xf; switch (op) { + case 0: /* VSHR */ case 5: /* VSHL, VSLI */ return 1; /* handled by decodetree */ default: @@ -5330,26 +5331,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } switch (op) { - case 0: /* VSHR */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_sari(size, rd_ofs, rm_ofs, - MIN(shift, (8 << size) - 1), - vec_size, vec_size); - } else if (shift >= 8 << size) { - tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size, - vec_size, 0); - } else { - tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - case 1: /* VSRA */ /* Right shift comes here negative. */ shift = -shift; From 434f71ef96d69dbf57d6bb3883a15d2d0b32dea8 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:14 +0100 Subject: [PATCH 23/29] target/arm: Convert Neon VSRA, VSRI, VRSHR, VRSRA 2-reg-shift insns to decodetree Convert the VSRA, VSRI, VRSHR, VRSRA 2-reg-shift insns to decodetree. (These are the last instructions in the group that are vectorized; the rest all require looping over each element.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-4-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 35 ++++++++++++++++++++++ target/arm/translate-neon.inc.c | 7 +++++ target/arm/translate.c | 52 +++------------------------------ 3 files changed, 46 insertions(+), 48 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 1b877cc68f..659cf13930 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -242,6 +242,41 @@ VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b + +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b + +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b + +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b + +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b + +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b + +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_d +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_s +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_h +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_b + VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 8693b9aa99..2868800059 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1258,6 +1258,13 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) DO_2SH(VSHL, tcg_gen_gvec_shli) DO_2SH(VSLI, gen_gvec_sli) +DO_2SH(VSRI, gen_gvec_sri) +DO_2SH(VSRA_S, gen_gvec_ssra) +DO_2SH(VSRA_U, gen_gvec_usra) +DO_2SH(VRSHR_S, gen_gvec_srshr) +DO_2SH(VRSHR_U, gen_gvec_urshr) +DO_2SH(VRSRA_S, gen_gvec_srsra) +DO_2SH(VRSRA_U, gen_gvec_ursra) static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) { diff --git a/target/arm/translate.c b/target/arm/translate.c index 4acc94e3cb..2d08c64483 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5297,6 +5297,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) switch (op) { case 0: /* VSHR */ + case 1: /* VSRA */ + case 2: /* VRSHR */ + case 3: /* VRSRA */ + case 4: /* VSRI */ case 5: /* VSHL, VSLI */ return 1; /* handled by decodetree */ default: @@ -5330,54 +5334,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) shift = shift - (1 << (size + 3)); } - switch (op) { - case 1: /* VSRA */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_usra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_ssra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 2: /* VRSHR */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_urshr(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_srshr(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 3: /* VRSRA */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_ursra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_srsra(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 4: /* VSRI */ - if (!u) { - return 1; - } - /* Right shift comes here negative. */ - shift = -shift; - gen_gvec_sri(size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - return 0; - } - if (size == 3) { count = q + 1; } else { From 37bfce81b10450071193c8495a07f182ec652e2a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:15 +0100 Subject: [PATCH 24/29] target/arm: Convert VQSHLU, VQSHL 2-reg-shift insns to decodetree Convert the VQSHLU and QVSHL 2-reg-shift insns to decodetree. These are the last of the simple shift-by-immediate insns. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-5-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 15 +++++ target/arm/translate-neon.inc.c | 108 +++++++++++++++++++++++++++++++ target/arm/translate.c | 110 +------------------------------- 3 files changed, 126 insertions(+), 107 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 659cf13930..66c41a53e5 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -286,3 +286,18 @@ VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b + +VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h +VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b + +VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h +VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b + +VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h +VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 2868800059..baa985b16c 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1288,3 +1288,111 @@ static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) return do_vector_2sh(s, a, tcg_gen_gvec_shri); } } + +static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpEnvFn *fn) +{ + /* + * 2-reg-and-shift operations, size == 3 case, where the + * function needs to be passed cpu_env. + */ + TCGv_i64 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i64(dup_const(a->size, a->shift)); + + for (pass = 0; pass < a->q + 1; pass++) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + neon_load_reg64(tmp, a->vm + pass); + fn(tmp, cpu_env, tmp, constimm); + neon_store_reg64(tmp, a->vd + pass); + } + tcg_temp_free_i64(constimm); + return true; +} + +static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpEnvFn *fn) +{ + /* + * 2-reg-and-shift operations, size < 3 case, where the + * helper needs to be passed cpu_env. + */ + TCGv_i32 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i32(dup_const(a->size, a->shift)); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(a->vm, pass); + fn(tmp, cpu_env, tmp, constimm); + neon_store_reg(a->vd, pass, tmp); + } + tcg_temp_free_i32(constimm); + return true; +} + +#define DO_2SHIFT_ENV(INSN, FUNC) \ + static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ + } \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + static NeonGenTwoOpEnvFn * const fns[] = { \ + gen_helper_neon_##FUNC##8, \ + gen_helper_neon_##FUNC##16, \ + gen_helper_neon_##FUNC##32, \ + }; \ + assert(a->size < ARRAY_SIZE(fns)); \ + return do_2shift_env_32(s, a, fns[a->size]); \ + } + +DO_2SHIFT_ENV(VQSHLU, qshlu_s) +DO_2SHIFT_ENV(VQSHL_U, qshl_u) +DO_2SHIFT_ENV(VQSHL_S, qshl_s) diff --git a/target/arm/translate.c b/target/arm/translate.c index 2d08c64483..c32a16085c 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -3011,29 +3011,6 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) } } -#define GEN_NEON_INTEGER_OP_ENV(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \ - break; \ - default: return 1; \ - }} while (0) - static TCGv_i32 neon_load_scratch(int scratch) { TCGv_i32 tmp = tcg_temp_new_i32(); @@ -5252,7 +5229,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int size; int shift; int pass; - int count; int u; int vec_size; uint32_t imm; @@ -5302,6 +5278,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 3: /* VRSRA */ case 4: /* VSRI */ case 5: /* VSHL, VSLI */ + case 6: /* VQSHLU */ + case 7: /* VQSHL */ return 1; /* handled by decodetree */ default: break; @@ -5319,89 +5297,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op < 8) { - /* Shift by immediate: - VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ - if (q && ((rd | rm) & 1)) { - return 1; - } - if (!u && (op == 4 || op == 6)) { - return 1; - } - /* Right shifts are encoded as N - shift, where N is the - element size in bits. */ - if (op <= 4) { - shift = shift - (1 << (size + 3)); - } - - if (size == 3) { - count = q + 1; - } else { - count = q ? 4: 2; - } - - /* To avoid excessive duplication of ops we implement shift - * by immediate using the variable shift operations. - */ - imm = dup_const(size, shift); - - for (pass = 0; pass < count; pass++) { - if (size == 3) { - neon_load_reg64(cpu_V0, rm + pass); - tcg_gen_movi_i64(cpu_V1, imm); - switch (op) { - case 6: /* VQSHLU */ - gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - break; - case 7: /* VQSHL */ - if (u) { - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - gen_helper_neon_qshl_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } - break; - default: - g_assert_not_reached(); - } - neon_store_reg64(cpu_V0, rd + pass); - } else { /* size < 3 */ - /* Operands in T0 and T1. */ - tmp = neon_load_reg(rm, pass); - tmp2 = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp2, imm); - switch (op) { - case 6: /* VQSHLU */ - switch (size) { - case 0: - gen_helper_neon_qshlu_s8(tmp, cpu_env, - tmp, tmp2); - break; - case 1: - gen_helper_neon_qshlu_s16(tmp, cpu_env, - tmp, tmp2); - break; - case 2: - gen_helper_neon_qshlu_s32(tmp, cpu_env, - tmp, tmp2); - break; - default: - abort(); - } - break; - case 7: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - default: - g_assert_not_reached(); - } - tcg_temp_free_i32(tmp2); - neon_store_reg(rd, pass, tmp); - } - } /* for pass */ - } else if (op < 10) { + if (op < 10) { /* Shift by immediate and narrow: VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ int input_unsigned = (op == 8) ? !u : u; From 712182d340e33c2ce86143f25fb2f04ae23d90de Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:16 +0100 Subject: [PATCH 25/29] target/arm: Convert Neon narrowing shifts with op==8 to decodetree Convert the Neon narrowing shifts where op==8 to decodetree: * VSHRN * VRSHRN * VQSHRUN * VQRSHRUN Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-6-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 27 ++++++ target/arm/translate-neon.inc.c | 167 ++++++++++++++++++++++++++++++++ target/arm/translate.c | 1 + 3 files changed, 195 insertions(+) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 66c41a53e5..8161995aee 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -232,6 +232,17 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp @2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 +# Narrowing right shifts: here the Q bit is part of the opcode decode +@2reg_shrn_d .... ... . . . 1 ..... .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 q=0 \ + shift=%neon_rshift_i5 +@2reg_shrn_s .... ... . . . 01 .... .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 \ + shift=%neon_rshift_i4 +@2reg_shrn_h .... ... . . . 001 ... .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \ + shift=%neon_rshift_i3 + VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h @@ -301,3 +312,19 @@ VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b + +VSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d +VSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s +VSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h + +VRSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d +VRSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s +VRSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h + +VQSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d +VQSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s +VQSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h + +VQRSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d +VQRSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s +VQRSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index baa985b16c..fe3fb7f62f 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1396,3 +1396,170 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, DO_2SHIFT_ENV(VQSHLU, qshlu_s) DO_2SHIFT_ENV(VQSHL_U, qshl_u) DO_2SHIFT_ENV(VQSHL_S, qshl_s) + +static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ + TCGv_i64 constimm, rm1, rm2; + TCGv_i32 rd; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count. + */ + constimm = tcg_const_i64(-a->shift); + rm1 = tcg_temp_new_i64(); + rm2 = tcg_temp_new_i64(); + + /* Load both inputs first to avoid potential overwrite if rm == rd */ + neon_load_reg64(rm1, a->vm); + neon_load_reg64(rm2, a->vm + 1); + + shiftfn(rm1, rm1, constimm); + rd = tcg_temp_new_i32(); + narrowfn(rd, cpu_env, rm1); + neon_store_reg(a->vd, 0, rd); + + shiftfn(rm2, rm2, constimm); + rd = tcg_temp_new_i32(); + narrowfn(rd, cpu_env, rm2); + neon_store_reg(a->vd, 1, rd); + + tcg_temp_free_i64(rm1); + tcg_temp_free_i64(rm2); + tcg_temp_free_i64(constimm); + + return true; +} + +static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ + TCGv_i32 constimm, rm1, rm2, rm3, rm4; + TCGv_i64 rtmp; + uint32_t imm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count + * duplicated into each lane of the immediate value. + */ + if (a->size == 1) { + imm = (uint16_t)(-a->shift); + imm |= imm << 16; + } else { + /* size == 2 */ + imm = -a->shift; + } + constimm = tcg_const_i32(imm); + + /* Load all inputs first to avoid potential overwrite */ + rm1 = neon_load_reg(a->vm, 0); + rm2 = neon_load_reg(a->vm, 1); + rm3 = neon_load_reg(a->vm + 1, 0); + rm4 = neon_load_reg(a->vm + 1, 1); + rtmp = tcg_temp_new_i64(); + + shiftfn(rm1, rm1, constimm); + shiftfn(rm2, rm2, constimm); + + tcg_gen_concat_i32_i64(rtmp, rm1, rm2); + tcg_temp_free_i32(rm2); + + narrowfn(rm1, cpu_env, rtmp); + neon_store_reg(a->vd, 0, rm1); + + shiftfn(rm3, rm3, constimm); + shiftfn(rm4, rm4, constimm); + tcg_temp_free_i32(constimm); + + tcg_gen_concat_i32_i64(rtmp, rm3, rm4); + tcg_temp_free_i32(rm4); + + narrowfn(rm3, cpu_env, rtmp); + tcg_temp_free_i64(rtmp); + neon_store_reg(a->vd, 1, rm3); + return true; +} + +#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ + } +#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ + } + +static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + tcg_gen_extrl_i64_i32(dest, src); +} + +static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u16(dest, src); +} + +static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u8(dest, src); +} + +DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) +DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) +DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) +DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) +DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) + +DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) diff --git a/target/arm/translate.c b/target/arm/translate.c index c32a16085c..11330b9296 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5280,6 +5280,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 5: /* VSHL, VSLI */ case 6: /* VQSHLU */ case 7: /* VQSHL */ + case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */ return 1; /* handled by decodetree */ default: break; From b4a3a77bb7a0dff1cc5673fe3be467d9e3635d44 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:17 +0100 Subject: [PATCH 26/29] target/arm: Convert Neon narrowing shifts with op==9 to decodetree Convert the remaining Neon narrowing shifts to decodetree: * VQSHRN * VQRSHRN Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-7-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 20 ++++++ target/arm/translate-neon.inc.c | 15 +++++ target/arm/translate.c | 110 +------------------------------- 3 files changed, 37 insertions(+), 108 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 8161995aee..79d0bfdd70 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -328,3 +328,23 @@ VQSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h VQRSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d VQRSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s VQRSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h + +# VQSHRN with signed input +VQSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d +VQSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s +VQSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h + +# VQRSHRN with signed input +VQRSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d +VQRSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s +VQRSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h + +# VQSHRN with unsigned input +VQSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d +VQSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s +VQSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h + +# VQRSHRN with unsigned input +VQRSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d +VQRSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s +VQRSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index fe3fb7f62f..562470ca08 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1563,3 +1563,18 @@ DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) +DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) + +DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) diff --git a/target/arm/translate.c b/target/arm/translate.c index 11330b9296..883c1a29c7 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -3201,40 +3201,6 @@ static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src) } } -static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift, - int q, int u) -{ - if (q) { - if (u) { - switch (size) { - case 1: gen_helper_neon_rshl_u16(var, var, shift); break; - case 2: gen_helper_neon_rshl_u32(var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_rshl_s16(var, var, shift); break; - case 2: gen_helper_neon_rshl_s32(var, var, shift); break; - default: abort(); - } - } - } else { - if (u) { - switch (size) { - case 1: gen_helper_neon_shl_u16(var, var, shift); break; - case 2: gen_ushl_i32(var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_shl_s16(var, var, shift); break; - case 2: gen_sshl_i32(var, var, shift); break; - default: abort(); - } - } - } -} - static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u) { if (u) { @@ -5281,6 +5247,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 6: /* VQSHLU */ case 7: /* VQSHL */ case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */ + case 9: /* VQSHRN, VQRSHRN */ return 1; /* handled by decodetree */ default: break; @@ -5298,80 +5265,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op < 10) { - /* Shift by immediate and narrow: - VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ - int input_unsigned = (op == 8) ? !u : u; - if (rm & 1) { - return 1; - } - shift = shift - (1 << (size + 3)); - size++; - if (size == 3) { - tmp64 = tcg_const_i64(shift); - neon_load_reg64(cpu_V0, rm); - neon_load_reg64(cpu_V1, rm + 1); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 in; - if (pass == 0) { - in = cpu_V0; - } else { - in = cpu_V1; - } - if (q) { - if (input_unsigned) { - gen_helper_neon_rshl_u64(cpu_V0, in, tmp64); - } else { - gen_helper_neon_rshl_s64(cpu_V0, in, tmp64); - } - } else { - if (input_unsigned) { - gen_ushl_i64(cpu_V0, in, tmp64); - } else { - gen_sshl_i64(cpu_V0, in, tmp64); - } - } - tmp = tcg_temp_new_i32(); - gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); - neon_store_reg(rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i64(tmp64); - } else { - if (size == 1) { - imm = (uint16_t)shift; - imm |= imm << 16; - } else { - /* size == 2 */ - imm = (uint32_t)shift; - } - tmp2 = tcg_const_i32(imm); - tmp4 = neon_load_reg(rm + 1, 0); - tmp5 = neon_load_reg(rm + 1, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(rm, 0); - } else { - tmp = tmp4; - } - gen_neon_shift_narrow(size, tmp, tmp2, q, - input_unsigned); - if (pass == 0) { - tmp3 = neon_load_reg(rm, 1); - } else { - tmp3 = tmp5; - } - gen_neon_shift_narrow(size, tmp3, tmp2, q, - input_unsigned); - tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp3); - tmp = tcg_temp_new_i32(); - gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); - neon_store_reg(rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i32(tmp2); - } - } else if (op == 10) { + if (op == 10) { /* VSHLL, VMOVL */ if (q || (rd & 1)) { return 1; From 968bf842742a5ffbb0041cb31089e61a9f7a833d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:18 +0100 Subject: [PATCH 27/29] target/arm: Convert Neon VSHLL, VMOVL to decodetree Convert the VSHLL and VMOVL insns from the 2-reg-shift group to decodetree. Since the loop always has two passes, we unroll it to avoid the awkward reassignment of one TCGv to another. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-8-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 16 +++++++ target/arm/translate-neon.inc.c | 81 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 46 +------------------ 3 files changed, 99 insertions(+), 44 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 79d0bfdd70..3dde699e97 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -243,6 +243,14 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \ shift=%neon_rshift_i3 +# Long left shifts: again Q is part of opcode decode +@2reg_shll_s .... ... . . . 1 shift:5 .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 +@2reg_shll_h .... ... . . . 01 shift:4 .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 +@2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0 + VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h @@ -348,3 +356,11 @@ VQSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h VQRSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d VQRSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s VQRSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h + +VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s +VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h +VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b + +VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s +VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h +VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 562470ca08..3d566044f3 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1578,3 +1578,84 @@ DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) + +static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenWidenFn *widenfn, bool u) +{ + TCGv_i64 tmp; + TCGv_i32 rm0, rm1; + uint64_t widen_mask = 0; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is a widen-and-shift operation. The shift is always less + * than the width of the source type, so after widening the input + * vector we can simply shift the whole 64-bit widened register, + * and then clear the potential overflow bits resulting from left + * bits of the narrow input appearing as right bits of the left + * neighbour narrow input. Calculate a mask of bits to clear. + */ + if ((a->shift != 0) && (a->size < 2 || u)) { + int esize = 8 << a->size; + widen_mask = MAKE_64BIT_MASK(0, esize); + widen_mask >>= esize - a->shift; + widen_mask = dup_const(a->size + 1, widen_mask); + } + + rm0 = neon_load_reg(a->vm, 0); + rm1 = neon_load_reg(a->vm, 1); + tmp = tcg_temp_new_i64(); + + widenfn(tmp, rm0); + if (a->shift != 0) { + tcg_gen_shli_i64(tmp, tmp, a->shift); + tcg_gen_andi_i64(tmp, tmp, ~widen_mask); + } + neon_store_reg64(tmp, a->vd); + + widenfn(tmp, rm1); + if (a->shift != 0) { + tcg_gen_shli_i64(tmp, tmp, a->shift); + tcg_gen_andi_i64(tmp, tmp, ~widen_mask); + } + neon_store_reg64(tmp, a->vd + 1); + tcg_temp_free_i64(tmp); + return true; +} + +static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + NeonGenWidenFn *widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], false); +} + +static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + NeonGenWidenFn *widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], true); +} diff --git a/target/arm/translate.c b/target/arm/translate.c index 883c1a29c7..a9f52049e7 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5248,6 +5248,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 7: /* VQSHL */ case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */ case 9: /* VQSHRN, VQRSHRN */ + case 10: /* VSHLL, including VMOVL */ return 1; /* handled by decodetree */ default: break; @@ -5265,50 +5266,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) size--; } shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op == 10) { - /* VSHLL, VMOVL */ - if (q || (rd & 1)) { - return 1; - } - tmp = neon_load_reg(rm, 0); - tmp2 = neon_load_reg(rm, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 1) - tmp = tmp2; - - gen_neon_widen(cpu_V0, tmp, size, u); - - if (shift != 0) { - /* The shift is less than the width of the source - type, so we can just shift the whole register. */ - tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); - /* Widen the result of shift: we need to clear - * the potential overflow bits resulting from - * left bits of the narrow input appearing as - * right bits of left the neighbour narrow - * input. */ - if (size < 2 || !u) { - uint64_t imm64; - if (size == 0) { - imm = (0xffu >> (8 - shift)); - imm |= imm << 16; - } else if (size == 1) { - imm = 0xffff >> (16 - shift); - } else { - /* size == 2 */ - imm = 0xffffffff >> (32 - shift); - } - if (size < 2) { - imm64 = imm | (((uint64_t)imm) << 32); - } else { - imm64 = imm; - } - tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64); - } - } - neon_store_reg64(cpu_V0, rd + pass); - } - } else if (op >= 14) { + if (op >= 14) { /* VCVT fixed-point. */ TCGv_ptr fpst; TCGv_i32 shiftv; From 3da26f11711caeaa18318b6afa14dfb81d7650ab Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:19 +0100 Subject: [PATCH 28/29] target/arm: Convert VCVT fixed-point ops to decodetree Convert the VCVT fixed-point conversion operations in the Neon 2-regs-and-shift group to decodetree. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-9-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 11 +++++ target/arm/translate-neon.inc.c | 49 +++++++++++++++++++++ target/arm/translate.c | 75 +-------------------------------- 3 files changed, 62 insertions(+), 73 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 3dde699e97..47a5c90b5d 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -251,6 +251,10 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp @2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0 +# We use size=0 for fp32 and size=1 for fp16 to match the 3-same encodings. +@2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i5 + VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h @@ -364,3 +368,10 @@ VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b + +# VCVT fixed<->float conversions +# TODO: FP16 fixed<->float conversions are opc==0b1100 and 0b1101 +VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt +VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt +VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt +VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 3d566044f3..2a445c7589 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1659,3 +1659,52 @@ static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) }; return do_vshll_2sh(s, a, widenfn[a->size], true); } + +static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoSingleOPFn *fn) +{ + /* FP operations in 2-reg-and-shift group */ + TCGv_i32 tmp, shiftv; + TCGv_ptr fpstatus; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpstatus = get_fpstatus_ptr(1); + shiftv = tcg_const_i32(a->shift); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(a->vm, pass); + fn(tmp, tmp, shiftv, fpstatus); + neon_store_reg(a->vd, pass, tmp); + } + tcg_temp_free_ptr(fpstatus); + tcg_temp_free_i32(shiftv); + return true; +} + +#define DO_FP_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_fp_2sh(s, a, FUNC); \ + } + +DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos) +DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos) +DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero) +DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero) diff --git a/target/arm/translate.c b/target/arm/translate.c index a9f52049e7..166349ee20 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5193,7 +5193,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int q; int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; int size; - int shift; int pass; int u; int vec_size; @@ -5234,78 +5233,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } else if (insn & (1 << 4)) { if ((insn & 0x00380080) != 0) { - /* Two registers and shift. */ - op = (insn >> 8) & 0xf; - - switch (op) { - case 0: /* VSHR */ - case 1: /* VSRA */ - case 2: /* VRSHR */ - case 3: /* VRSRA */ - case 4: /* VSRI */ - case 5: /* VSHL, VSLI */ - case 6: /* VQSHLU */ - case 7: /* VQSHL */ - case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */ - case 9: /* VQSHRN, VQRSHRN */ - case 10: /* VSHLL, including VMOVL */ - return 1; /* handled by decodetree */ - default: - break; - } - - if (insn & (1 << 7)) { - /* 64-bit shift. */ - if (op > 7) { - return 1; - } - size = 3; - } else { - size = 2; - while ((insn & (1 << (size + 19))) == 0) - size--; - } - shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op >= 14) { - /* VCVT fixed-point. */ - TCGv_ptr fpst; - TCGv_i32 shiftv; - VFPGenFixPointFn *fn; - - if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { - return 1; - } - - if (!(op & 1)) { - if (u) { - fn = gen_helper_vfp_ultos; - } else { - fn = gen_helper_vfp_sltos; - } - } else { - if (u) { - fn = gen_helper_vfp_touls_round_to_zero; - } else { - fn = gen_helper_vfp_tosls_round_to_zero; - } - } - - /* We have already masked out the must-be-1 top bit of imm6, - * hence this 32-shift where the ARM ARM has 64-imm6. - */ - shift = 32 - shift; - fpst = get_fpstatus_ptr(1); - shiftv = tcg_const_i32(shift); - for (pass = 0; pass < (q ? 4 : 2); pass++) { - TCGv_i32 tmpf = neon_load_reg(rm, pass); - fn(tmpf, tmpf, shiftv, fpst); - neon_store_reg(rd, pass, tmpf); - } - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(shiftv); - } else { - return 1; - } + /* Two registers and shift: handled by decodetree */ + return 1; } else { /* (insn & 0x00380080) == 0 */ int invert, reg_ofs, vec_size; From 2c35a39eda0b16c2ed85c94cec204bf5efb97812 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 22 May 2020 15:55:20 +0100 Subject: [PATCH 29/29] target/arm: Convert Neon one-register-and-immediate insns to decodetree Convert the insns in the one-register-and-immediate group to decodetree. In the new decode, our asimd_imm_const() function returns a 64-bit value rather than a 32-bit one, which means we don't need to treat cmode=14 op=1 as a special case in the decoder (it is the only encoding where the two halves of the 64-bit value are different). Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20200522145520.6778-10-peter.maydell@linaro.org --- target/arm/neon-dp.decode | 22 ++++++ target/arm/translate-neon.inc.c | 118 ++++++++++++++++++++++++++++++++ target/arm/translate.c | 101 +-------------------------- 3 files changed, 142 insertions(+), 99 deletions(-) diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 47a5c90b5d..bd1b0e13f7 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -375,3 +375,25 @@ VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt + +###################################################################### +# 1-reg-and-modified-immediate grouping: +# 1111 001 i 1 D 000 imm:3 Vd:4 cmode:4 0 Q op 1 Vm:4 +###################################################################### + +&1reg_imm vd q imm cmode op + +%asimd_imm_value 24:1 16:3 0:4 + +@1reg_imm .... ... . . . ... ... .... .... . q:1 . . .... \ + &1reg_imm imm=%asimd_imm_value vd=%vd_dp + +# The cmode/op bits here decode VORR/VBIC/VMOV/VMNV, but +# not in a way we can conveniently represent in decodetree without +# a lot of repetition: +# VORR: op=0, (cmode & 1) && cmode < 12 +# VBIC: op=1, (cmode & 1) && cmode < 12 +# VMOV: everything else +# So we have a single decode line and check the cmode/op in the +# trans function. +Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 2a445c7589..664d361260 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1708,3 +1708,121 @@ DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos) DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos) DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero) DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero) + +static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) +{ + /* + * Expand the encoded constant. + * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + * cmode = 15 op = 1 must UNDEF; we assume decode has handled that. + */ + switch (cmode) { + case 0: case 1: + /* no-op */ + break; + case 2: case 3: + imm <<= 8; + break; + case 4: case 5: + imm <<= 16; + break; + case 6: case 7: + imm <<= 24; + break; + case 8: case 9: + imm |= imm << 16; + break; + case 10: case 11: + imm = (imm << 8) | (imm << 24); + break; + case 12: + imm = (imm << 8) | 0xff; + break; + case 13: + imm = (imm << 16) | 0xffff; + break; + case 14: + if (op) { + /* + * This is the only case where the top and bottom 32 bits + * of the encoded constant differ. + */ + uint64_t imm64 = 0; + int n; + + for (n = 0; n < 8; n++) { + if (imm & (1 << n)) { + imm64 |= (0xffULL << (n * 8)); + } + } + return imm64; + } + imm |= (imm << 8) | (imm << 16) | (imm << 24); + break; + case 15: + imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) + | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); + break; + } + if (op) { + imm = ~imm; + } + return dup_const(MO_32, imm); +} + +static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, + GVecGen2iFn *fn) +{ + uint64_t imm; + int reg_ofs, vec_size; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + reg_ofs = neon_reg_offset(a->vd, 0); + vec_size = a->q ? 16 : 8; + imm = asimd_imm_const(a->imm, a->cmode, a->op); + + fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); + return true; +} + +static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); +} + +static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) +{ + /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ + GVecGen2iFn *fn; + + if ((a->cmode & 1) && a->cmode < 12) { + /* for op=1, the imm will be inverted, so BIC becomes AND. */ + fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; + } else { + /* There is one unallocated cmode/op combination in this space */ + if (a->cmode == 15 && a->op == 1) { + return false; + } + fn = gen_VMOV_1r; + } + return do_1reg_imm(s, a, fn); +} diff --git a/target/arm/translate.c b/target/arm/translate.c index 166349ee20..bcdfec34d2 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -5232,105 +5232,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) /* Three register same length: handled by decodetree */ return 1; } else if (insn & (1 << 4)) { - if ((insn & 0x00380080) != 0) { - /* Two registers and shift: handled by decodetree */ - return 1; - } else { /* (insn & 0x00380080) == 0 */ - int invert, reg_ofs, vec_size; - - if (q && (rd & 1)) { - return 1; - } - - op = (insn >> 8) & 0xf; - /* One register and immediate. */ - imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); - invert = (insn & (1 << 5)) != 0; - /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. - * We choose to not special-case this and will behave as if a - * valid constant encoding of 0 had been given. - */ - switch (op) { - case 0: case 1: - /* no-op */ - break; - case 2: case 3: - imm <<= 8; - break; - case 4: case 5: - imm <<= 16; - break; - case 6: case 7: - imm <<= 24; - break; - case 8: case 9: - imm |= imm << 16; - break; - case 10: case 11: - imm = (imm << 8) | (imm << 24); - break; - case 12: - imm = (imm << 8) | 0xff; - break; - case 13: - imm = (imm << 16) | 0xffff; - break; - case 14: - imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) { - imm = ~imm; - } - break; - case 15: - if (invert) { - return 1; - } - imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) - | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); - break; - } - if (invert) { - imm = ~imm; - } - - reg_ofs = neon_reg_offset(rd, 0); - vec_size = q ? 16 : 8; - - if (op & 1 && op < 12) { - if (invert) { - /* The immediate value has already been inverted, - * so BIC becomes AND. - */ - tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } else { - tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } - } else { - /* VMOV, VMVN. */ - if (op == 14 && invert) { - TCGv_i64 t64 = tcg_temp_new_i64(); - - for (pass = 0; pass <= q; ++pass) { - uint64_t val = 0; - int n; - - for (n = 0; n < 8; n++) { - if (imm & (1 << (n + pass * 8))) { - val |= 0xffull << (n * 8); - } - } - tcg_gen_movi_i64(t64, val); - neon_store_reg64(t64, rd + pass); - } - tcg_temp_free_i64(t64); - } else { - tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size, - vec_size, imm); - } - } - } + /* Two registers and shift or reg and imm: handled by decodetree */ + return 1; } else { /* (insn & 0x00800010 == 0x00800000) */ if (size != 3) { op = (insn >> 8) & 0xf;