diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 1266f45fb1f..9bd98eb4f91 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -5246,10 +5246,25 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16))) return false; - if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) + /* Expand memcpy for constant length operands without a loop if it + is shorter that way. + + With a constant length argument a + memcpy loop (without pfd) is 36 bytes -> 6 * mvc */ + if (GET_CODE (len) == CONST_INT + && INTVAL (len) >= 0 + && INTVAL (len) <= 256 * 6 + && (!TARGET_MVCLE || INTVAL (len) <= 256)) { - if (INTVAL (len) > 0) - emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1))); + HOST_WIDE_INT o, l; + + for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256) + { + rtx newdst = adjust_address (dst, BLKmode, o); + rtx newsrc = adjust_address (src, BLKmode, o); + emit_insn (gen_movmem_short (newdst, newsrc, + GEN_INT (l > 256 ? 255 : l - 1))); + } } else if (TARGET_MVCLE) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 16b27e61d6f..46734e9cab4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-01-05 Andreas Krebbel + + * gcc.target/s390/memcpy-1.c: New test. + 2017-01-04 Jeff Law PR tree-optimization/78812 diff --git a/gcc/testsuite/gcc.target/s390/memcpy-1.c b/gcc/testsuite/gcc.target/s390/memcpy-1.c new file mode 100644 index 00000000000..58c1b490b67 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/memcpy-1.c @@ -0,0 +1,53 @@ +/* Make sure that short memcpy's with constant length are emitted + without loop statements. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch" } */ + +/* 3 MVCs */ +void +*memcpy1(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 700); +} + +/* NOP */ +void +*memcpy2(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 0); +} + +/* 1 MVC */ +void +*memcpy3(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 256); +} + +/* 2 MVCs */ +void +*memcpy4(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 512); +} + +/* 3 MVCs */ +void +*memcpy5(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 768); +} + +/* Loop with 2 MVCs */ +void +*memcpy6(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 1537); +} + +/* memcpy6 uses a loop - check for the two load address instructions + used to increment src and dest. */ +/* { dg-final { scan-assembler-times "la" 2 } } */ + +/* { dg-final { scan-assembler-times "mvc" 11 } } */