S/390: Unroll mvc loop for memcpy with small constant lengths.
See the memset unrolling patch. The very same applies to memcpys with constant lengths. 2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com> * config/s390/s390.c (s390_expand_movmem): Unroll MVC loop for small constant length operands. gcc/testsuite/ChangeLog: 2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com> * gcc.target/s390/memcpy-1.c: New test. From-SVN: r244098
This commit is contained in:
parent
8597cd335e
commit
f5a537e390
|
@ -5246,10 +5246,25 @@ s390_expand_movmem (rtx dst, rtx src, rtx len)
|
|||
&& (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
|
||||
return false;
|
||||
|
||||
if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
|
||||
/* Expand memcpy for constant length operands without a loop if it
|
||||
is shorter that way.
|
||||
|
||||
With a constant length argument a
|
||||
memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
|
||||
if (GET_CODE (len) == CONST_INT
|
||||
&& INTVAL (len) >= 0
|
||||
&& INTVAL (len) <= 256 * 6
|
||||
&& (!TARGET_MVCLE || INTVAL (len) <= 256))
|
||||
{
|
||||
if (INTVAL (len) > 0)
|
||||
emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
|
||||
HOST_WIDE_INT o, l;
|
||||
|
||||
for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
|
||||
{
|
||||
rtx newdst = adjust_address (dst, BLKmode, o);
|
||||
rtx newsrc = adjust_address (src, BLKmode, o);
|
||||
emit_insn (gen_movmem_short (newdst, newsrc,
|
||||
GEN_INT (l > 256 ? 255 : l - 1)));
|
||||
}
|
||||
}
|
||||
|
||||
else if (TARGET_MVCLE)
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/s390/memcpy-1.c: New test.
|
||||
|
||||
2017-01-04 Jeff Law <law@redhat.com>
|
||||
|
||||
PR tree-optimization/78812
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/* Make sure that short memcpy's with constant length are emitted
|
||||
without loop statements. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -mzarch" } */
|
||||
|
||||
/* 3 MVCs */
|
||||
void
|
||||
*memcpy1(void *dest, const void *src)
|
||||
{
|
||||
return __builtin_memcpy (dest, src, 700);
|
||||
}
|
||||
|
||||
/* NOP */
|
||||
void
|
||||
*memcpy2(void *dest, const void *src)
|
||||
{
|
||||
return __builtin_memcpy (dest, src, 0);
|
||||
}
|
||||
|
||||
/* 1 MVC */
|
||||
void
|
||||
*memcpy3(void *dest, const void *src)
|
||||
{
|
||||
return __builtin_memcpy (dest, src, 256);
|
||||
}
|
||||
|
||||
/* 2 MVCs */
|
||||
void
|
||||
*memcpy4(void *dest, const void *src)
|
||||
{
|
||||
return __builtin_memcpy (dest, src, 512);
|
||||
}
|
||||
|
||||
/* 3 MVCs */
|
||||
void
|
||||
*memcpy5(void *dest, const void *src)
|
||||
{
|
||||
return __builtin_memcpy (dest, src, 768);
|
||||
}
|
||||
|
||||
/* Loop with 2 MVCs */
|
||||
void
|
||||
*memcpy6(void *dest, const void *src)
|
||||
{
|
||||
return __builtin_memcpy (dest, src, 1537);
|
||||
}
|
||||
|
||||
/* memcpy6 uses a loop - check for the two load address instructions
|
||||
used to increment src and dest. */
|
||||
/* { dg-final { scan-assembler-times "la" 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "mvc" 11 } } */
|
Loading…
Reference in New Issue