From bed04a2b9caec96cf339980b64d2939ee9541328 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 2 Sep 2019 09:49:46 +0200 Subject: [PATCH 01/30] s390x/tcg: Reset exception_index to -1 instead of 0 We use the marker "-1" for "no exception". s390_cpu_do_interrupt() might get confused by that. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 29fcce426e..39ee9b3175 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -1747,7 +1747,7 @@ uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2) if (env->int_pgm_code == PGM_PROTECTION) { /* retry if reading is possible */ - cs->exception_index = 0; + cs->exception_index = -1; if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) { /* Fetching permitted; storing not permitted */ return 1; @@ -1757,7 +1757,7 @@ uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2) switch (env->int_pgm_code) { case PGM_PROTECTION: /* Fetching not permitted; storing not permitted */ - cs->exception_index = 0; + cs->exception_index = -1; return 2; case PGM_ADDRESSING: case PGM_TRANS_SPEC: @@ -1767,7 +1767,7 @@ uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2) } /* Translation not available */ - cs->exception_index = 0; + cs->exception_index = -1; return 3; } From d292671ade5408197e36887dc554427a91edc498 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 14:44:31 +0200 Subject: [PATCH 02/30] s390x/tcg: MVCL: Zero out unused bits of address We have to zero out unused bits in 24 and 31-bit addressing mode. Provide a new helper. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 39ee9b3175..b02ad148e5 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -469,6 +469,25 @@ static inline uint64_t get_address(CPUS390XState *env, int reg) return wrap_address(env, env->regs[reg]); } +/* + * Store the address to the given register, zeroing out unused leftmost + * bits in bit positions 32-63 (24-bit and 31-bit mode only). + */ +static inline void set_address_zero(CPUS390XState *env, int reg, + uint64_t address) +{ + if (env->psw.mask & PSW_MASK_64) { + env->regs[reg] = address; + } else { + if (!(env->psw.mask & PSW_MASK_32)) { + address &= 0x00ffffff; + } else { + address &= 0x7fffffff; + } + env->regs[reg] = deposit64(env->regs[reg], 0, 32, address); + } +} + static inline void set_address(CPUS390XState *env, int reg, uint64_t address) { if (env->psw.mask & PSW_MASK_64) { @@ -772,8 +791,8 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen); env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen); - set_address(env, r1, dest); - set_address(env, r2, src); + set_address_zero(env, r1, dest); + set_address_zero(env, r2, src); return cc; } From fbc17598d509be10d5d2349090bbb889ccb4f055 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 14:52:25 +0200 Subject: [PATCH 03/30] s390x/tcg: MVCL: Detect destructive overlaps We'll have to zero-out unused bit positions, so make sure to write the addresses back. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index b02ad148e5..223312a4b1 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -52,6 +52,19 @@ static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key) return true; } +static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest, + uint64_t src, uint32_t len) +{ + if (!len || src == dest) { + return false; + } + /* Take care of wrapping at the end of address space. */ + if (unlikely(wrap_address(env, src + len - 1) < src)) { + return dest > src || dest <= wrap_address(env, src + len - 1); + } + return dest > src && dest <= src + len - 1; +} + /* Reduce the length so that addr + len doesn't cross a page boundary. */ static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr) { @@ -787,7 +800,11 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) uint8_t pad = env->regs[r2 + 1] >> 24; uint32_t cc; - cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra); + if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) { + cc = 3; + } else { + cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra); + } env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen); env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen); From f1c2e27cb5140b385e87f0d8be7606fb79aaa1f2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 14:56:19 +0200 Subject: [PATCH 04/30] s390x/tcg: MVCL: Process max 4k bytes at a time Process max 4k bytes at a time, writing back registers between the accesses. The instruction is interruptible. "For operands longer than 2K bytes, access exceptions are not recognized for locations more than 2K bytes beyond the current location being processed." Note that on z/Architecture, 2k vs. 4k access cannot get differentiated as long as pages are not crossed. This seems to be a leftover from ESA/390. Simply stay within single pages. MVCL handling is quite different than MVCLE/MVCLU handling, so split up the handlers. Defer interrupt handling, as that will require more thought, add a TODO for that. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 44 +++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 223312a4b1..58ab2e48e3 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -798,19 +798,51 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) uint64_t srclen = env->regs[r2 + 1] & 0xffffff; uint64_t src = get_address(env, r2); uint8_t pad = env->regs[r2 + 1] >> 24; - uint32_t cc; + uint32_t cc, cur_len; if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) { cc = 3; + } else if (srclen == destlen) { + cc = 0; + } else if (destlen < srclen) { + cc = 1; } else { - cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra); + cc = 2; } - env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen); - env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen); - set_address_zero(env, r1, dest); - set_address_zero(env, r2, src); + /* We might have to zero-out some bits even if there was no action. */ + if (unlikely(!destlen || cc == 3)) { + set_address_zero(env, r2, src); + set_address_zero(env, r1, dest); + return cc; + } else if (!srclen) { + set_address_zero(env, r2, src); + } + /* + * Only perform one type of type of operation (move/pad) in one step. + * Stay within single pages. + */ + while (destlen) { + cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK)); + if (!srclen) { + fast_memset(env, dest, pad, cur_len, ra); + } else { + cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len); + + fast_memmove(env, dest, src, cur_len, ra); + src = wrap_address(env, src + cur_len); + srclen -= cur_len; + env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen); + set_address_zero(env, r2, src); + } + dest = wrap_address(env, dest + cur_len); + destlen -= cur_len; + env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen); + set_address_zero(env, r1, dest); + + /* TODO: Deliver interrupts. */ + } return cc; } From d573ffde0ced387b90283a20e45471f38b9605f8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Aug 2019 10:04:19 +0200 Subject: [PATCH 05/30] s390x/tcg: MVC: Increment the length once Let's increment the length once. While at it, cleanup the comment. The memset() example is given as a programming note in the PoP, so drop the description. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 58ab2e48e3..013e8d6045 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -320,16 +320,20 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", __func__, l, dest, src); - /* mvc and memmove do not behave the same when areas overlap! */ - /* mvc with source pointing to the byte after the destination is the - same as memset with the first source byte */ + /* MVC always copies one more byte than specified - maximum is 256 */ + l++; + + /* + * "When the operands overlap, the result is obtained as if the operands + * were processed one byte at a time". Only non-destructive overlaps + * behave like memmove(). + */ if (dest == src + 1) { - fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l + 1, ra); - } else if (dest < src || src + l < dest) { - fast_memmove(env, dest, src, l + 1, ra); + fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l, ra); + } else if (dest < src || src + l <= dest) { + fast_memmove(env, dest, src, l, ra); } else { - /* slow version with byte accesses which always work */ - for (i = 0; i <= l; i++) { + for (i = 0; i < l; i++) { uint8_t x = cpu_ldub_data_ra(env, src + i, ra); cpu_stb_data_ra(env, dest + i, x, ra); } From b7dd1f7fd4568326027e8fdf6dcca656e1ce95b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 13:38:12 +0200 Subject: [PATCH 06/30] s390x/tcg: MVC: Use is_destructive_overlap() Let's use the new helper, that also detects destructive overlaps when wrapping. We'll make the remaining code (e.g., fast_memmove()) aware of wrapping later. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 013e8d6045..c31cf49593 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -330,7 +330,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, */ if (dest == src + 1) { fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l, ra); - } else if (dest < src || src + l <= dest) { + } else if (!is_destructive_overlap(env, dest, src, l)) { fast_memmove(env, dest, src, l, ra); } else { for (i = 0; i < l; i++) { From bf349f1a0daf86481ad62df405f84ad70eaa7e68 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 15:09:23 +0200 Subject: [PATCH 07/30] s390x/tcg: MVPG: Check for specification exceptions Perform the checks documented in the PoP. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index c31cf49593..7dfa848744 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -672,6 +672,13 @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2) /* move page */ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2) { + const bool f = extract64(r0, 11, 1); + const bool s = extract64(r0, 10, 1); + + if ((f && s) || extract64(r0, 12, 4)) { + s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC()); + } + /* ??? missing r0 handling, which includes access keys, but more importantly optional suppression of the exception! */ fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC()); From a3910396baaf2baa569e361e1a4fdcc487abf032 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 15:09:47 +0200 Subject: [PATCH 08/30] s390x/tcg: MVPG: Properly wrap the addresses We have to mask of any unused bits. While at it, document what exactly is missing. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 7dfa848744..746f647303 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -679,8 +679,15 @@ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2) s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC()); } - /* ??? missing r0 handling, which includes access keys, but more - importantly optional suppression of the exception! */ + r1 = wrap_address(env, r1 & TARGET_PAGE_MASK); + r2 = wrap_address(env, r2 & TARGET_PAGE_MASK); + + /* + * TODO: + * - Access key handling + * - CC-option with surpression of page-translation exceptions + * - Store r1/r2 register identifiers at real location 162 + */ fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC()); return 0; /* data moved */ } From 86678418b266efcf2a8539e625a98d3ffc5018c4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 2 Sep 2019 16:43:21 +0200 Subject: [PATCH 09/30] s390x/tcg: MVCLU/MVCLE: Process max 4k bytes at a time Let's stay within single pages. ... and indicate cc=3 in case there is work remaining. Keep unicode padding simple. While reworking, properly wrap the addresses. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 54 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 746f647303..86238e0163 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -768,8 +768,8 @@ static inline uint32_t do_mvcl(CPUS390XState *env, uint64_t *src, uint64_t *srclen, uint16_t pad, int wordsize, uintptr_t ra) { - uint64_t len = MIN(*srclen, *destlen); - uint32_t cc; + int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK)); + int i, cc; if (*destlen == *srclen) { cc = 0; @@ -779,32 +779,40 @@ static inline uint32_t do_mvcl(CPUS390XState *env, cc = 2; } - /* Copy the src array */ - fast_memmove(env, *dest, *src, len, ra); - *src += len; - *srclen -= len; - *dest += len; - *destlen -= len; + if (!*destlen) { + return cc; + } - /* Pad the remaining area */ - if (wordsize == 1) { - fast_memset(env, *dest, pad, *destlen, ra); - *dest += *destlen; - *destlen = 0; + /* + * Only perform one type of type of operation (move/pad) at a time. + * Stay within single pages. + */ + if (*srclen) { + /* Copy the src array */ + len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len); + *destlen -= len; + *srclen -= len; + fast_memmove(env, *dest, *src, len, ra); + *src = wrap_address(env, *src + len); + *dest = wrap_address(env, *dest + len); + } else if (wordsize == 1) { + /* Pad the remaining area */ + *destlen -= len; + fast_memset(env, *dest, pad, len, ra); + *dest = wrap_address(env, *dest + len); } else { - /* If remaining length is odd, pad with odd byte first. */ - if (*destlen & 1) { - cpu_stb_data_ra(env, *dest, pad & 0xff, ra); - *dest += 1; - *destlen -= 1; - } - /* The remaining length is even, pad using words. */ - for (; *destlen; *dest += 2, *destlen -= 2) { - cpu_stw_data_ra(env, *dest, pad, ra); + /* The remaining length selects the padding byte. */ + for (i = 0; i < len; (*destlen)--, i++) { + if (*destlen & 1) { + cpu_stb_data_ra(env, *dest, pad, ra); + } else { + cpu_stb_data_ra(env, *dest, pad >> 8, ra); + } + *dest = wrap_address(env, *dest + 1); } } - return cc; + return *destlen ? 3 : cc; } /* move long */ From 43df3e71e3c3cc53d294b3c3bed0e860df91ea13 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 13:44:55 +0200 Subject: [PATCH 10/30] s390x/tcg: MVCS/MVCP: Check for special operation exceptions Let's perform the documented checks. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 86238e0163..20e1ac0ea9 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -1960,12 +1960,18 @@ uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2) uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) { + const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; uintptr_t ra = GETPC(); int cc = 0, i; HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n", __func__, l, a1, a2); + if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) || + psw_as == AS_HOME || psw_as == AS_ACCREG) { + s390_program_interrupt(env, PGM_SPECIAL_OP, ILEN_AUTO, ra); + } + if (l > 256) { /* max 256 */ l = 256; @@ -1983,12 +1989,18 @@ uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) { + const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; uintptr_t ra = GETPC(); int cc = 0, i; HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n", __func__, l, a1, a2); + if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) || + psw_as == AS_HOME || psw_as == AS_ACCREG) { + s390_program_interrupt(env, PGM_SPECIAL_OP, ILEN_AUTO, ra); + } + if (l > 256) { /* max 256 */ l = 256; From a7627565ae52a6806640906c6e9872443899fca8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 13 Sep 2019 12:27:21 +0200 Subject: [PATCH 11/30] s390x/tcg: MVCOS: Lengths are 32 bit in 24/31-bit mode Triggered by a review comment from Richard, also MVCOS has a 32-bit length in 24/31-bit addressing mode. Add a new helper. Rename wrap_length() to wrap_length31(). Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 20e1ac0ea9..320e9ee65c 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -528,7 +528,15 @@ static inline void set_address(CPUS390XState *env, int reg, uint64_t address) } } -static inline uint64_t wrap_length(CPUS390XState *env, uint64_t length) +static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length) +{ + if (!(env->psw.mask & PSW_MASK_64)) { + return (uint32_t)length; + } + return length; +} + +static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length) { if (!(env->psw.mask & PSW_MASK_64)) { /* 24-Bit and 31-Bit mode */ @@ -539,7 +547,7 @@ static inline uint64_t wrap_length(CPUS390XState *env, uint64_t length) static inline uint64_t get_length(CPUS390XState *env, int reg) { - return wrap_length(env, env->regs[reg]); + return wrap_length31(env, env->regs[reg]); } static inline void set_length(CPUS390XState *env, int reg, uint64_t length) @@ -2378,7 +2386,7 @@ uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src, s390_program_interrupt(env, PGM_PRIVILEGED, 6, ra); } - len = wrap_length(env, len); + len = wrap_length32(env, len); if (len > 4096) { cc = 3; len = 4096; From 373290d8a8bd9457c40cb2ddcc8a87dd7f03bfe6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 13:46:24 +0200 Subject: [PATCH 12/30] s390x/tcg: MVCS/MVCP: Properly wrap the length ... and don't perform any move in case the length is zero. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 320e9ee65c..41d7336a1a 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -1980,10 +1980,13 @@ uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) s390_program_interrupt(env, PGM_SPECIAL_OP, ILEN_AUTO, ra); } + l = wrap_length32(env, l); if (l > 256) { /* max 256 */ l = 256; cc = 3; + } else if (!l) { + return cc; } /* XXX replace w/ memcpy */ @@ -2009,10 +2012,13 @@ uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) s390_program_interrupt(env, PGM_SPECIAL_OP, ILEN_AUTO, ra); } + l = wrap_length32(env, l); if (l > 256) { /* max 256 */ l = 256; cc = 3; + } else if (!l) { + return cc; } /* XXX replace w/ memcpy */ From 087b8193edea9ba74401f9f27ad9ab79598a113a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 12:05:53 +0200 Subject: [PATCH 13/30] s390x/tcg: MVST: Check for specification exceptions Bit position 32-55 of general register 0 must be zero. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 41d7336a1a..ec27be174b 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -706,6 +706,9 @@ uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s) uintptr_t ra = GETPC(); uint32_t len; + if (c & 0xffffff00ull) { + s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, ra); + } c = c & 0xff; d = wrap_address(env, d); s = wrap_address(env, s); From 2bb525e20d72386f49a3b0aacb88a65d5446baac Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 14:13:51 +0200 Subject: [PATCH 14/30] s390x/tcg: MVST: Fix storing back the addresses to registers 24 and 31-bit address space handling is wrong when it comes to storing back the addresses to the register. While at it, read gprs 0 implicitly. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/helper.h | 2 +- target/s390x/insn-data.def | 2 +- target/s390x/mem_helper.c | 26 +++++++++++--------------- target/s390x/translate.c | 8 ++++++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index e9aff83b05..56e8149866 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -20,7 +20,7 @@ DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(mvpg, TCG_CALL_NO_WG, i32, env, i64, i64, i64) DEF_HELPER_FLAGS_4(mvz, TCG_CALL_NO_WG, void, env, i32, i64, i64) -DEF_HELPER_4(mvst, i64, env, i64, i64, i64) +DEF_HELPER_3(mvst, i32, env, i32, i32) DEF_HELPER_4(ex, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(stam, TCG_CALL_NO_WG, void, env, i32, i64, i32) DEF_HELPER_FLAGS_4(lam, TCG_CALL_NO_WG, void, env, i32, i64, i32) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index f421184fcd..449eee1662 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -637,7 +637,7 @@ /* MOVE PAGE */ C(0xb254, MVPG, RRE, Z, r1_o, r2_o, 0, 0, mvpg, 0) /* MOVE STRING */ - C(0xb255, MVST, RRE, Z, r1_o, r2_o, 0, 0, mvst, 0) + C(0xb255, MVST, RRE, Z, 0, 0, 0, 0, mvst, 0) /* MOVE WITH OPTIONAL SPECIFICATION */ C(0xc800, MVCOS, SSF, MVCOS, la1, a2, 0, 0, mvcos, 0) /* MOVE WITH OFFSET */ diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index ec27be174b..a24506676b 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -700,18 +700,18 @@ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2) return 0; /* data moved */ } -/* string copy (c is string terminator) */ -uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s) +/* string copy */ +uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2) { + const uint64_t d = get_address(env, r1); + const uint64_t s = get_address(env, r2); + const uint8_t c = env->regs[0]; uintptr_t ra = GETPC(); uint32_t len; - if (c & 0xffffff00ull) { + if (env->regs[0] & 0xffffff00ull) { s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, ra); } - c = c & 0xff; - d = wrap_address(env, d); - s = wrap_address(env, s); /* Lest we fail to service interrupts in a timely manner, limit the amount of work we're willing to do. For now, let's cap at 8k. */ @@ -719,17 +719,13 @@ uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s) uint8_t v = cpu_ldub_data_ra(env, s + len, ra); cpu_stb_data_ra(env, d + len, v, ra); if (v == c) { - /* Complete. Set CC=1 and advance R1. */ - env->cc_op = 1; - env->retxl = s; - return d + len; + set_address_zero(env, r1, d + len); + return 1; } } - - /* Incomplete. Set CC=3 and signal to advance R1 and R2. */ - env->cc_op = 3; - env->retxl = s + len; - return d + len; + set_address_zero(env, r1, d + len); + set_address_zero(env, r2, s + len); + return 3; } /* load access registers r1 to r3 from memory at a2 */ diff --git a/target/s390x/translate.c b/target/s390x/translate.c index 2927247c82..b0a2500e5f 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -3488,9 +3488,13 @@ static DisasJumpType op_mvpg(DisasContext *s, DisasOps *o) static DisasJumpType op_mvst(DisasContext *s, DisasOps *o) { - gen_helper_mvst(o->in1, cpu_env, regs[0], o->in1, o->in2); + TCGv_i32 t1 = tcg_const_i32(get_field(s->fields, r1)); + TCGv_i32 t2 = tcg_const_i32(get_field(s->fields, r2)); + + gen_helper_mvst(cc_op, cpu_env, t1, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); set_cc_static(s); - return_low128(o->in2); return DISAS_NEXT; } From 817791e83994deee2e5a59e3847f16673c8941bc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 13:26:31 +0200 Subject: [PATCH 15/30] s390x/tcg: Always use MMU_USER_IDX for CONFIG_USER_ONLY Although we basically ignore the index all the time for CONFIG_USER_ONLY, let's simply skip all the checks and always return MMU_USER_IDX in cpu_mmu_index() and get_mem_index(). Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/cpu.h | 4 ++++ target/s390x/translate.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 79202c0980..163dae13d7 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -328,6 +328,9 @@ extern const VMStateDescription vmstate_s390_cpu; static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch) { +#ifdef CONFIG_USER_ONLY + return MMU_USER_IDX; +#else if (!(env->psw.mask & PSW_MASK_DAT)) { return MMU_REAL_IDX; } @@ -351,6 +354,7 @@ static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch) default: abort(); } +#endif } static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc, diff --git a/target/s390x/translate.c b/target/s390x/translate.c index b0a2500e5f..a3e43ff9ec 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -318,6 +318,9 @@ static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc) static int get_mem_index(DisasContext *s) { +#ifdef CONFIG_USER_ONLY + return MMU_USER_IDX; +#else if (!(s->base.tb->flags & FLAG_MASK_DAT)) { return MMU_REAL_IDX; } @@ -333,6 +336,7 @@ static int get_mem_index(DisasContext *s) tcg_abort(); break; } +#endif } static void gen_exception(int excp) From 70ebd9ce1cb08ffb0ec0d7185f16520998545a7a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 09:50:28 +0200 Subject: [PATCH 16/30] s390x/tcg: Fault-safe memset Replace fast_memset() by access_memset(), that first tries to probe access to all affected pages (maximum is two). We'll use the same mechanism for other types of accesses soon. Only in very rare cases (especially TLB_NOTDIRTY), we'll have to fallback to ld/st helpers. Try to speed up that case as suggested by Richard. We'll rework most involved handlers soon to do all accesses via new fault-safe helpers, especially MVC. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 125 +++++++++++++++++++++++++++++++------- 1 file changed, 104 insertions(+), 21 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index a24506676b..dd5da70746 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -117,27 +117,95 @@ static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr, } } -static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte, - uint32_t l, uintptr_t ra) -{ - int mmu_idx = cpu_mmu_index(env, false); +/* An access covers at most 4096 bytes and therefore at most two pages. */ +typedef struct S390Access { + target_ulong vaddr1; + target_ulong vaddr2; + char *haddr1; + char *haddr2; + uint16_t size1; + uint16_t size2; + /* + * If we can't access the host page directly, we'll have to do I/O access + * via ld/st helpers. These are internal details, so we store the + * mmu idx to do the access here instead of passing it around in the + * helpers. Maybe, one day we can get rid of ld/st access - once we can + * handle TLB_NOTDIRTY differently. We don't expect these special accesses + * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP + * pages, we might trigger a new MMU translation - very unlikely that + * the mapping changes in between and we would trigger a fault. + */ + int mmu_idx; +} S390Access; - while (l > 0) { - void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx); - if (p) { - /* Access to the whole page in write mode granted. */ - uint32_t l_adj = adj_len_to_page(l, dest); - memset(p, byte, l_adj); - dest += l_adj; - l -= l_adj; +static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size, + MMUAccessType access_type, int mmu_idx, + uintptr_t ra) +{ + S390Access access = { + .vaddr1 = vaddr, + .size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)), + .mmu_idx = mmu_idx, + }; + + g_assert(size > 0 && size <= 4096); + access.haddr1 = probe_access(env, access.vaddr1, access.size1, access_type, + mmu_idx, ra); + + if (unlikely(access.size1 != size)) { + /* The access crosses page boundaries. */ + access.vaddr2 = wrap_address(env, vaddr + access.size1); + access.size2 = size - access.size1; + access.haddr2 = probe_access(env, access.vaddr2, access.size2, + access_type, mmu_idx, ra); + } + return access; +} + +/* Helper to handle memset on a single page. */ +static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr, + uint8_t byte, uint16_t size, int mmu_idx, + uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + g_assert(haddr); + memset(haddr, byte, size); +#else + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + int i; + + if (likely(haddr)) { + memset(haddr, byte, size); + } else { + /* + * Do a single access and test if we can then get access to the + * page. This is especially relevant to speed up TLB_NOTDIRTY. + */ + g_assert(size > 0); + helper_ret_stb_mmu(env, vaddr, byte, oi, ra); + haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); + if (likely(haddr)) { + memset(haddr + 1, byte, size - 1); } else { - /* We failed to get access to the whole page. The next write - access will likely fill the QEMU TLB for the next iteration. */ - cpu_stb_data_ra(env, dest, byte, ra); - dest++; - l--; + for (i = 1; i < size; i++) { + helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra); + } } } +#endif +} + +static void access_memset(CPUS390XState *env, S390Access *desta, + uint8_t byte, uintptr_t ra) +{ + + do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1, + desta->mmu_idx, ra); + if (likely(!desta->size2)) { + return; + } + do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2, + desta->mmu_idx, ra); } #ifndef CONFIG_USER_ONLY @@ -259,15 +327,19 @@ uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest, static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access desta; uint32_t i; uint8_t c = 0; HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", __func__, l, dest, src); + desta = access_prepare(env, dest, l + 1, MMU_DATA_STORE, mmu_idx, ra); + /* xor with itself is the same as memset(0) */ if (src == dest) { - fast_memset(env, dest, 0, l + 1, ra); + access_memset(env, &desta, 0, ra); return 0; } @@ -315,6 +387,8 @@ uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest, static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access desta; uint32_t i; HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", @@ -323,13 +397,15 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, /* MVC always copies one more byte than specified - maximum is 256 */ l++; + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + /* * "When the operands overlap, the result is obtained as if the operands * were processed one byte at a time". Only non-destructive overlaps * behave like memmove(). */ if (dest == src + 1) { - fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l, ra); + access_memset(env, &desta, cpu_ldub_data_ra(env, src, ra), ra); } else if (!is_destructive_overlap(env, dest, src, l)) { fast_memmove(env, dest, src, l, ra); } else { @@ -775,7 +851,9 @@ static inline uint32_t do_mvcl(CPUS390XState *env, uint64_t *src, uint64_t *srclen, uint16_t pad, int wordsize, uintptr_t ra) { + const int mmu_idx = cpu_mmu_index(env, false); int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK)); + S390Access desta; int i, cc; if (*destlen == *srclen) { @@ -805,7 +883,8 @@ static inline uint32_t do_mvcl(CPUS390XState *env, } else if (wordsize == 1) { /* Pad the remaining area */ *destlen -= len; - fast_memset(env, *dest, pad, len, ra); + desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + access_memset(env, &desta, pad, ra); *dest = wrap_address(env, *dest + len); } else { /* The remaining length selects the padding byte. */ @@ -825,6 +904,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env, /* move long */ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) { + const int mmu_idx = cpu_mmu_index(env, false); uintptr_t ra = GETPC(); uint64_t destlen = env->regs[r1 + 1] & 0xffffff; uint64_t dest = get_address(env, r1); @@ -832,6 +912,7 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) uint64_t src = get_address(env, r2); uint8_t pad = env->regs[r2 + 1] >> 24; uint32_t cc, cur_len; + S390Access desta; if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) { cc = 3; @@ -859,7 +940,9 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) while (destlen) { cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK)); if (!srclen) { - fast_memset(env, dest, pad, cur_len, ra); + desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx, + ra); + access_memset(env, &desta, pad, ra); } else { cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len); From b6c636f2cd6c50cdc558e3c02fbc7f2d102955b2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 10:39:49 +0200 Subject: [PATCH 17/30] s390x/tcg: Fault-safe memmove Replace fast_memmove() variants by access_memmove() variants, that first try to probe access to all affected pages (maximum is two pages). Introduce access_get_byte()/access_set_byte(). We might be able to speed up memmove in special cases even further (do single-byte access, use memmove() for remaining bytes in page), however, we'll skip that for now. In MVCOS, simply always call access_memmove_as() and drop the TODO about LAP. LAP is already handled in the MMU. Get rid of adj_len_to_page(), which is now unused. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 234 ++++++++++++++++++++++---------------- 1 file changed, 137 insertions(+), 97 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index dd5da70746..12ffe72c88 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -65,17 +65,6 @@ static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest, return dest > src && dest <= src + len - 1; } -/* Reduce the length so that addr + len doesn't cross a page boundary. */ -static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr) -{ -#ifndef CONFIG_USER_ONLY - if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) { - return -(addr | TARGET_PAGE_MASK); - } -#endif - return len; -} - /* Trigger a SPECIFICATION exception if an address or a length is not naturally aligned. */ static inline void check_alignment(CPUS390XState *env, uint64_t v, @@ -208,39 +197,116 @@ static void access_memset(CPUS390XState *env, S390Access *desta, desta->mmu_idx, ra); } -#ifndef CONFIG_USER_ONLY -static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src, - uint32_t len, int dest_idx, int src_idx, - uintptr_t ra) +static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr, + int offset, int mmu_idx, uintptr_t ra) { - TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx); - TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx); - uint32_t len_adj; - void *src_p; - void *dest_p; - uint8_t x; +#ifdef CONFIG_USER_ONLY + return ldub_p(*haddr + offset); +#else + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + uint8_t byte; - while (len > 0) { - src = wrap_address(env, src); - dest = wrap_address(env, dest); - src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx); - dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx); + if (likely(*haddr)) { + return ldub_p(*haddr + offset); + } + /* + * Do a single access and test if we can then get access to the + * page. This is especially relevant to speed up TLB_NOTDIRTY. + */ + byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra); + *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx); + return byte; +#endif +} - if (src_p && dest_p) { - /* Access to both whole pages granted. */ - len_adj = adj_len_to_page(adj_len_to_page(len, src), dest); - memmove(dest_p, src_p, len_adj); - } else { - /* We failed to get access to one or both whole pages. The next - read or write access will likely fill the QEMU TLB for the - next iteration. */ - len_adj = 1; - x = helper_ret_ldub_mmu(env, src, oi_src, ra); - helper_ret_stb_mmu(env, dest, x, oi_dest, ra); +static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, + int offset, uintptr_t ra) +{ + if (offset < access->size1) { + return do_access_get_byte(env, access->vaddr1, &access->haddr1, + offset, access->mmu_idx, ra); + } + return do_access_get_byte(env, access->vaddr2, &access->haddr2, + offset - access->size1, access->mmu_idx, ra); +} + +static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr, + int offset, uint8_t byte, int mmu_idx, + uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + stb_p(*haddr + offset, byte); +#else + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + + if (likely(*haddr)) { + stb_p(*haddr + offset, byte); + return; + } + /* + * Do a single access and test if we can then get access to the + * page. This is especially relevant to speed up TLB_NOTDIRTY. + */ + helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra); + *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); +#endif +} + +static void access_set_byte(CPUS390XState *env, S390Access *access, + int offset, uint8_t byte, uintptr_t ra) +{ + if (offset < access->size1) { + do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte, + access->mmu_idx, ra); + } else { + do_access_set_byte(env, access->vaddr2, &access->haddr2, + offset - access->size1, byte, access->mmu_idx, ra); + } +} + +/* + * Move data with the same semantics as memmove() in case ranges don't overlap + * or src > dest. Undefined behavior on destructive overlaps. + */ +static void access_memmove(CPUS390XState *env, S390Access *desta, + S390Access *srca, uintptr_t ra) +{ + int diff; + + g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2); + + /* Fallback to slow access in case we don't have access to all host pages */ + if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) || + !srca->haddr1 || (srca->size2 && !srca->haddr2))) { + int i; + + for (i = 0; i < desta->size1 + desta->size2; i++) { + uint8_t byte = access_get_byte(env, srca, i, ra); + + access_set_byte(env, desta, i, byte, ra); + } + return; + } + + if (srca->size1 == desta->size1) { + memmove(desta->haddr1, srca->haddr1, srca->size1); + if (unlikely(srca->size2)) { + memmove(desta->haddr2, srca->haddr2, srca->size2); + } + } else if (srca->size1 < desta->size1) { + diff = desta->size1 - srca->size1; + memmove(desta->haddr1, srca->haddr1, srca->size1); + memmove(desta->haddr1 + srca->size1, srca->haddr2, diff); + if (likely(desta->size2)) { + memmove(desta->haddr2, srca->haddr2 + diff, desta->size2); + } + } else { + diff = srca->size1 - desta->size1; + memmove(desta->haddr1, srca->haddr1, desta->size1); + memmove(desta->haddr2, srca->haddr1 + desta->size1, diff); + if (likely(srca->size2)) { + memmove(desta->haddr2 + diff, srca->haddr2, srca->size2); } - src += len_adj; - dest += len_adj; - len -= len_adj; } } @@ -259,45 +325,6 @@ static int mmu_idx_from_as(uint8_t as) } } -static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src, - uint32_t len, uint8_t dest_as, uint8_t src_as, - uintptr_t ra) -{ - int src_idx = mmu_idx_from_as(src_as); - int dest_idx = mmu_idx_from_as(dest_as); - - fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra); -} -#endif - -static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src, - uint32_t l, uintptr_t ra) -{ - int mmu_idx = cpu_mmu_index(env, false); - - while (l > 0) { - void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx); - void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx); - if (src_p && dest_p) { - /* Access to both whole pages granted. */ - uint32_t l_adj = adj_len_to_page(l, src); - l_adj = adj_len_to_page(l_adj, dest); - memmove(dest_p, src_p, l_adj); - src += l_adj; - dest += l_adj; - l -= l_adj; - } else { - /* We failed to get access to one or both whole pages. The next - read or write access will likely fill the QEMU TLB for the - next iteration. */ - cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra); - src++; - dest++; - l--; - } - } -} - /* and on array */ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) @@ -388,7 +415,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) { const int mmu_idx = cpu_mmu_index(env, false); - S390Access desta; + S390Access srca, desta; uint32_t i; HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", @@ -397,6 +424,7 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, /* MVC always copies one more byte than specified - maximum is 256 */ l++; + srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); /* @@ -405,9 +433,9 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, * behave like memmove(). */ if (dest == src + 1) { - access_memset(env, &desta, cpu_ldub_data_ra(env, src, ra), ra); + access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra); } else if (!is_destructive_overlap(env, dest, src, l)) { - fast_memmove(env, dest, src, l, ra); + access_memmove(env, &desta, &srca, ra); } else { for (i = 0; i < l; i++) { uint8_t x = cpu_ldub_data_ra(env, src + i, ra); @@ -756,8 +784,11 @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2) /* move page */ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2) { + const int mmu_idx = cpu_mmu_index(env, false); const bool f = extract64(r0, 11, 1); const bool s = extract64(r0, 10, 1); + uintptr_t ra = GETPC(); + S390Access srca, desta; if ((f && s) || extract64(r0, 12, 4)) { s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, GETPC()); @@ -772,7 +803,11 @@ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2) * - CC-option with surpression of page-translation exceptions * - Store r1/r2 register identifiers at real location 162 */ - fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC()); + srca = access_prepare(env, r2, TARGET_PAGE_SIZE, MMU_DATA_LOAD, mmu_idx, + ra); + desta = access_prepare(env, r1, TARGET_PAGE_SIZE, MMU_DATA_STORE, mmu_idx, + ra); + access_memmove(env, &desta, &srca, ra); return 0; /* data moved */ } @@ -853,7 +888,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env, { const int mmu_idx = cpu_mmu_index(env, false); int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK)); - S390Access desta; + S390Access srca, desta; int i, cc; if (*destlen == *srclen) { @@ -877,7 +912,9 @@ static inline uint32_t do_mvcl(CPUS390XState *env, len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len); *destlen -= len; *srclen -= len; - fast_memmove(env, *dest, *src, len, ra); + srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + access_memmove(env, &desta, &srca, ra); *src = wrap_address(env, *src + len); *dest = wrap_address(env, *dest + len); } else if (wordsize == 1) { @@ -911,8 +948,8 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) uint64_t srclen = env->regs[r2 + 1] & 0xffffff; uint64_t src = get_address(env, r2); uint8_t pad = env->regs[r2 + 1] >> 24; + S390Access srca, desta; uint32_t cc, cur_len; - S390Access desta; if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) { cc = 3; @@ -946,7 +983,11 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) } else { cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len); - fast_memmove(env, dest, src, cur_len, ra); + srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx, + ra); + desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx, + ra); + access_memmove(env, &desta, &srca, ra); src = wrap_address(env, src + cur_len); srclen -= cur_len; env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen); @@ -2488,16 +2529,15 @@ uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src, s390_program_interrupt(env, PGM_ADDRESSING, 6, ra); } - /* FIXME: a) LAP - * b) Access using correct keys - * c) AR-mode - */ -#ifdef CONFIG_USER_ONLY - /* psw keys are never valid in user mode, we will never reach this */ - g_assert_not_reached(); -#else - fast_memmove_as(env, dest, src, len, dest_as, src_as, ra); -#endif + /* FIXME: Access using correct keys and AR-mode */ + if (len) { + S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD, + mmu_idx_from_as(src_as), ra); + S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE, + mmu_idx_from_as(dest_as), ra); + + access_memmove(env, &desta, &srca, ra); + } return cc; } From efb1a76ef9e39b4a5628a27f0f1df74336f8d240 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Sep 2019 13:50:49 +0200 Subject: [PATCH 18/30] s390x/tcg: MVCS/MVCP: Use access_memmove() As we are moving between address spaces, we can use access_memmove() without checking for destructive overlaps (especially of real storage locations): "Each storage operand is processed left to right. The storage-operand-consistency rules are the same as for MOVE (MVC), except that when the operands overlap in real storage, the use of the common real- storage locations is not necessarily recognized." Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 12ffe72c88..04c4228f13 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -2092,8 +2092,9 @@ uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2) uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) { const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; + S390Access srca, desta; uintptr_t ra = GETPC(); - int cc = 0, i; + int cc = 0; HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n", __func__, l, a1, a2); @@ -2112,20 +2113,19 @@ uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) return cc; } - /* XXX replace w/ memcpy */ - for (i = 0; i < l; i++) { - uint8_t x = cpu_ldub_primary_ra(env, a2 + i, ra); - cpu_stb_secondary_ra(env, a1 + i, x, ra); - } - + /* TODO: Access key handling */ + srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra); + desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra); + access_memmove(env, &desta, &srca, ra); return cc; } uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) { const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; + S390Access srca, desta; uintptr_t ra = GETPC(); - int cc = 0, i; + int cc = 0; HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n", __func__, l, a1, a2); @@ -2144,12 +2144,10 @@ uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) return cc; } - /* XXX replace w/ memcpy */ - for (i = 0; i < l; i++) { - uint8_t x = cpu_ldub_secondary_ra(env, a2 + i, ra); - cpu_stb_primary_ra(env, a1 + i, x, ra); - } - + /* TODO: Access key handling */ + srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra); + desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra); + access_memmove(env, &desta, &srca, ra); return cc; } From b7809f369266d6cc1a253688f80a66bfbce9e6dd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Sep 2019 11:30:29 +0200 Subject: [PATCH 19/30] s390x/tcg: MVC: Fault-safe handling on destructive overlaps The last remaining bit for MVC is handling destructive overlaps in a fault-safe way. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 04c4228f13..aed53a37da 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -438,8 +438,9 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, access_memmove(env, &desta, &srca, ra); } else { for (i = 0; i < l; i++) { - uint8_t x = cpu_ldub_data_ra(env, src + i, ra); - cpu_stb_data_ra(env, dest + i, x, ra); + uint8_t byte = access_get_byte(env, &srca, i, ra); + + access_set_byte(env, &desta, i, byte, ra); } } From b8e7b2fe1d8125cf1283ab4076203f1363c91226 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 14:56:07 +0200 Subject: [PATCH 20/30] s390x/tcg: MVCLU: Fault-safe handling The last remaining bit is padding with two bytes. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index aed53a37da..271b1db664 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -925,15 +925,17 @@ static inline uint32_t do_mvcl(CPUS390XState *env, access_memset(env, &desta, pad, ra); *dest = wrap_address(env, *dest + len); } else { + desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + /* The remaining length selects the padding byte. */ for (i = 0; i < len; (*destlen)--, i++) { if (*destlen & 1) { - cpu_stb_data_ra(env, *dest, pad, ra); + access_set_byte(env, &desta, i, pad, ra); } else { - cpu_stb_data_ra(env, *dest, pad >> 8, ra); + access_set_byte(env, &desta, i, pad >> 8, ra); } - *dest = wrap_address(env, *dest + 1); } + *dest = wrap_address(env, *dest + len); } return *destlen ? 3 : cc; From 8c4a732076b2002916777ce1502d9ceb66da9c04 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Sep 2019 16:31:15 +0200 Subject: [PATCH 21/30] s390x/tcg: OC: Fault-safe handling We can process a maximum of 256 bytes, crossing two pages. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 271b1db664..570e995b77 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -389,17 +389,26 @@ uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest, static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; uint32_t i; uint8_t c = 0; HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", __func__, l, dest, src); - for (i = 0; i <= l; i++) { - uint8_t x = cpu_ldub_data_ra(env, src + i, ra); - x |= cpu_ldub_data_ra(env, dest + i, ra); + /* OC always processes one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca1, i, ra) | + access_get_byte(env, &srca2, i, ra); + c |= x; - cpu_stb_data_ra(env, dest + i, x, ra); + access_set_byte(env, &desta, i, x, ra); } return c != 0; } From a8821dd56ea65f73858df0b400702da79608cfeb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Sep 2019 16:48:29 +0200 Subject: [PATCH 22/30] s390x/tcg: XC: Fault-safe handling We can process a maximum of 256 bytes, crossing two pages. While at it, increment the length once. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 570e995b77..0d4e0bc45a 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -355,14 +355,19 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) { const int mmu_idx = cpu_mmu_index(env, false); - S390Access desta; + S390Access srca1, srca2, desta; uint32_t i; uint8_t c = 0; HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", __func__, l, dest, src); - desta = access_prepare(env, dest, l + 1, MMU_DATA_STORE, mmu_idx, ra); + /* XC always processes one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); /* xor with itself is the same as memset(0) */ if (src == dest) { @@ -370,11 +375,12 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest, return 0; } - for (i = 0; i <= l; i++) { - uint8_t x = cpu_ldub_data_ra(env, src + i, ra); - x ^= cpu_ldub_data_ra(env, dest + i, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca1, i, ra) ^ + access_get_byte(env, &srca2, i, ra); + c |= x; - cpu_stb_data_ra(env, dest + i, x, ra); + access_set_byte(env, &desta, i, x, ra); } return c != 0; } From 4d78c68bafe4c12d8185a4c33932a1aea35c4921 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Sep 2019 16:51:21 +0200 Subject: [PATCH 23/30] s390x/tcg: NC: Fault-safe handling We can process a maximum of 256 bytes, crossing two pages. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 0d4e0bc45a..a97e4aa535 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -329,17 +329,26 @@ static int mmu_idx_from_as(uint8_t as) static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src, uintptr_t ra) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; uint32_t i; uint8_t c = 0; HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", __func__, l, dest, src); - for (i = 0; i <= l; i++) { - uint8_t x = cpu_ldub_data_ra(env, src + i, ra); - x &= cpu_ldub_data_ra(env, dest + i, ra); + /* NC always processes one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca1, i, ra) & + access_get_byte(env, &srca2, i, ra); + c |= x; - cpu_stb_data_ra(env, dest + i, x, ra); + access_set_byte(env, &desta, i, x, ra); } return c != 0; } From 65a27df92779207c3005e7f0462f4bcc9b6545f7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Sep 2019 17:09:01 +0200 Subject: [PATCH 24/30] s390x/tcg: MVCIN: Fault-safe handling We can process a maximum of 256 bytes, crossing two pages. Calculate the accessed range upfront - src is accessed right-to-left. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index a97e4aa535..eeb2552979 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -479,12 +479,21 @@ void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) /* move inverse */ void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca, desta; uintptr_t ra = GETPC(); int i; - for (i = 0; i <= l; i++) { - uint8_t v = cpu_ldub_data_ra(env, src - i, ra); - cpu_stb_data_ra(env, dest + i, v, ra); + /* MVCIN always copies one more byte than specified - maximum is 256 */ + l++; + + src = wrap_address(env, src - l + 1); + srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra); + + access_set_byte(env, &desta, i, x, ra); } } From ab8bab68bba3e0c58428249f8b467b93adf98c07 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 11:32:06 +0200 Subject: [PATCH 25/30] s390x/tcg: MVN: Fault-safe handling We can process a maximum of 256 bytes, crossing two pages. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index eeb2552979..7c981f7902 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -500,13 +500,22 @@ void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) /* move numerics */ void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; uintptr_t ra = GETPC(); int i; - for (i = 0; i <= l; i++) { - uint8_t v = cpu_ldub_data_ra(env, dest + i, ra) & 0xf0; - v |= cpu_ldub_data_ra(env, src + i, ra) & 0x0f; - cpu_stb_data_ra(env, dest + i, v, ra); + /* MVN always copies one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) | + (access_get_byte(env, &srca2, i, ra) & 0xf0); + + access_set_byte(env, &desta, i, x, ra); } } From 6514f42bf8fd03b2d227ae2d0e6bada9b7304573 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 11:37:34 +0200 Subject: [PATCH 26/30] s390x/tcg: MVZ: Fault-safe handling We can process a maximum of 256 bytes, crossing two pages. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 7c981f7902..b781362e16 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -553,13 +553,22 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) /* move zones */ void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) { + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; uintptr_t ra = GETPC(); int i; - for (i = 0; i <= l; i++) { - uint8_t b = cpu_ldub_data_ra(env, dest + i, ra) & 0x0f; - b |= cpu_ldub_data_ra(env, src + i, ra) & 0xf0; - cpu_stb_data_ra(env, dest + i, b, ra); + /* MVZ always copies one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) | + (access_get_byte(env, &srca2, i, ra) & 0x0f); + + access_set_byte(env, &desta, i, x, ra); } } From bb36ed88e91252c8315c151d12df227999604e62 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 14:15:30 +0200 Subject: [PATCH 27/30] s390x/tcg: MVST: Fault-safe handling Access at most single pages and document why. Using the access helpers might over-indicate watchpoints within the same page, I guess we can live with that. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index b781362e16..4a4b4ea0b7 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -866,23 +866,33 @@ uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2) /* string copy */ uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2) { + const int mmu_idx = cpu_mmu_index(env, false); const uint64_t d = get_address(env, r1); const uint64_t s = get_address(env, r2); const uint8_t c = env->regs[0]; + const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK)); + S390Access srca, desta; uintptr_t ra = GETPC(); - uint32_t len; + int i; if (env->regs[0] & 0xffffff00ull) { s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, ra); } - /* Lest we fail to service interrupts in a timely manner, limit the - amount of work we're willing to do. For now, let's cap at 8k. */ - for (len = 0; len < 0x2000; ++len) { - uint8_t v = cpu_ldub_data_ra(env, s + len, ra); - cpu_stb_data_ra(env, d + len, v, ra); + /* + * Our access should not exceed single pages, as we must not report access + * exceptions exceeding the actually copied range (which we don't know at + * this point). We might over-indicate watchpoints within the pages + * (if we ever care, we have to limit processing to a single byte). + */ + srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < len; i++) { + const uint8_t v = access_get_byte(env, &srca, i, ra); + + access_set_byte(env, &desta, i, v, ra); if (v == c) { - set_address_zero(env, r1, d + len); + set_address_zero(env, r1, d + i); return 1; } } From ab89acd0b77355888dcbd68725fdcba03b08e466 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 19:36:30 +0200 Subject: [PATCH 28/30] s390x/tcg: MVO: Fault-safe handling Each operand can have a maximum length of 16. Make sure to prepare all reads/writes before writing. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/mem_helper.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 4a4b4ea0b7..44e535856d 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -522,31 +522,34 @@ void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) /* move with offset */ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) { + const int mmu_idx = cpu_mmu_index(env, false); + /* MVO always processes one more byte than specified - maximum is 16 */ + const int len_dest = (l >> 4) + 1; + const int len_src = (l & 0xf) + 1; uintptr_t ra = GETPC(); - int len_dest = l >> 4; - int len_src = l & 0xf; uint8_t byte_dest, byte_src; - int i; + S390Access srca, desta; + int i, j; - src += len_src; - dest += len_dest; + srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra); /* Handle rightmost byte */ - byte_src = cpu_ldub_data_ra(env, src, ra); - byte_dest = cpu_ldub_data_ra(env, dest, ra); + byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra); + byte_src = access_get_byte(env, &srca, len_src - 1, ra); byte_dest = (byte_dest & 0x0f) | (byte_src << 4); - cpu_stb_data_ra(env, dest, byte_dest, ra); + access_set_byte(env, &desta, len_dest - 1, byte_dest, ra); /* Process remaining bytes from right to left */ - for (i = 1; i <= len_dest; i++) { + for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) { byte_dest = byte_src >> 4; - if (len_src - i >= 0) { - byte_src = cpu_ldub_data_ra(env, src - i, ra); + if (j >= 0) { + byte_src = access_get_byte(env, &srca, j, ra); } else { byte_src = 0; } byte_dest |= byte_src << 4; - cpu_stb_data_ra(env, dest - i, byte_dest, ra); + access_set_byte(env, &desta, i, byte_dest, ra); } } From d9e13c341995537cf6bd18c0c163ce5f93d4c0a2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Sep 2019 20:18:51 +0200 Subject: [PATCH 29/30] tests/tcg: target/s390x: Test MVO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add the simple test based on the example from the PoP. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: David Hildenbrand --- tests/tcg/s390x/Makefile.target | 1 + tests/tcg/s390x/mvo.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/tcg/s390x/mvo.c diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 151dc075aa..6a3bfa8b29 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -6,3 +6,4 @@ TESTS+=ipm TESTS+=exrl-trt TESTS+=exrl-trtr TESTS+=pack +TESTS+=mvo diff --git a/tests/tcg/s390x/mvo.c b/tests/tcg/s390x/mvo.c new file mode 100644 index 0000000000..5546fe2a97 --- /dev/null +++ b/tests/tcg/s390x/mvo.c @@ -0,0 +1,25 @@ +#include +#include + +int main(void) +{ + uint8_t dest[6] = {0xff, 0x77, 0x88, 0x99, 0x0c, 0xff}; + uint8_t src[5] = {0xee, 0x12, 0x34, 0x56, 0xee}; + uint8_t expected[6] = {0xff, 0x01, 0x23, 0x45, 0x6c, 0xff}; + int i; + + asm volatile ( + " mvo 0(4,%[dest]),0(3,%[src])\n" + : + : [dest] "d" (dest + 1), + [src] "d" (src + 1) + : "memory"); + + for (i = 0; i < sizeof(expected); i++) { + if (dest[i] != expected[i]) { + fprintf(stderr, "bad data\n"); + return 1; + } + } + return 0; +} From 5d69cbdfdd5cd6dadc9f0c986899844a0e4de703 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 19 Sep 2019 11:21:48 +0200 Subject: [PATCH 30/30] tests/tcg: target/s390x: Test MVC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add a test that especially verifies that no data will be touched in case we cross page boundaries and one page access triggers a fault. Before the fault-safe handling fixes, the test failes with: TEST mvc on s390x data modified during a fault make[2]: *** [../Makefile.target:116: run-mvc] Error 1 Acked-by: Alex Bennée Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- tests/tcg/s390x/Makefile.target | 1 + tests/tcg/s390x/mvc.c | 109 ++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 tests/tcg/s390x/mvc.c diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 6a3bfa8b29..241ef28f61 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -7,3 +7,4 @@ TESTS+=exrl-trt TESTS+=exrl-trtr TESTS+=pack TESTS+=mvo +TESTS+=mvc diff --git a/tests/tcg/s390x/mvc.c b/tests/tcg/s390x/mvc.c new file mode 100644 index 0000000000..aa552d52e5 --- /dev/null +++ b/tests/tcg/s390x/mvc.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include + +jmp_buf jmp_env; + +static void handle_sigsegv(int sig) +{ + siglongjmp(jmp_env, 1); +} + +#define ALLOC_SIZE (2 * 4096) + +static inline void mvc_256(const char *dst, const char *src) +{ + asm volatile ( + " mvc 0(256,%[dst]),0(%[src])\n" + : + : [dst] "d" (dst), + [src] "d" (src) + : "memory"); +} + +int main(void) +{ + char *src, *dst; + int i; + + /* register the SIGSEGV handler */ + if (signal(SIGSEGV, handle_sigsegv) == SIG_ERR) { + fprintf(stderr, "SIGSEGV not registered\n"); + return 1; + } + + /* prepare the buffers - two consecutive pages */ + src = valloc(ALLOC_SIZE); + dst = valloc(ALLOC_SIZE); + memset(src, 0xff, ALLOC_SIZE); + memset(dst, 0x0, ALLOC_SIZE); + + /* protect the second pages */ + if (mprotect(src + 4096, 4096, PROT_NONE) || + mprotect(dst + 4096, 4096, PROT_NONE)) { + fprintf(stderr, "mprotect failed\n"); + return 1; + } + + /* fault on second destination page */ + if (sigsetjmp(jmp_env, 1) == 0) { + mvc_256(dst + 4096 - 128, src); + fprintf(stderr, "fault not triggered\n"); + return 1; + } + + /* fault on second source page */ + if (sigsetjmp(jmp_env, 1) == 0) { + mvc_256(dst, src + 4096 - 128); + fprintf(stderr, "fault not triggered\n"); + return 1; + } + + /* fault on second source and second destination page */ + if (sigsetjmp(jmp_env, 1) == 0) { + mvc_256(dst + 4096 - 128, src + 4096 - 128); + fprintf(stderr, "fault not triggered\n"); + return 1; + } + + /* restore permissions */ + if (mprotect(src + 4096, 4096, PROT_READ | PROT_WRITE) || + mprotect(dst + 4096, 4096, PROT_READ | PROT_WRITE)) { + fprintf(stderr, "mprotect failed\n"); + return 1; + } + + /* no data must be touched during the faults */ + for (i = 0; i < ALLOC_SIZE; i++) { + if (src[i] != 0xff || dst[i]) { + fprintf(stderr, "data modified during a fault\n"); + return 1; + } + } + + /* test if MVC works now correctly accross page boundaries */ + mvc_256(dst + 4096 - 128, src + 4096 - 128); + for (i = 0; i < ALLOC_SIZE; i++) { + if (src[i] != 0xff) { + fprintf(stderr, "src modified\n"); + return 1; + } + if (i < 4096 - 128 || i >= 4096 + 128) { + if (dst[i]) { + fprintf(stderr, "wrong dst modified\n"); + return 1; + } + } else { + if (dst[i] != 0xff) { + fprintf(stderr, "wrong data moved\n"); + return 1; + } + } + } + + return 0; +}