rs6000.c (expand_block_clear): Use vector instructions if available.

* config/rs6000/rs6000.c (expand_block_clear): Use vector
	instructions if available.
	(expand_block_move): Likewise.

From-SVN: r86996
This commit is contained in:
Geoffrey Keating 2004-09-02 22:38:18 +00:00 committed by Geoffrey Keating
parent 02f14c3856
commit 5514620adb
5 changed files with 70 additions and 19 deletions

View File

@ -1,3 +1,9 @@
2004-09-02 Geoffrey Keating <geoffk@apple.com>
* config/rs6000/rs6000.c (expand_block_clear): Use vector
instructions if available.
(expand_block_move): Likewise.
2004-09-03 Jan Hubicka <jh@suse.cz>
* rtl.def (RANGE_INFO, RANGE_REG, RANGE_VAR, RANGE_LIVE): Kill.

View File

@ -8319,11 +8319,12 @@ expand_block_clear (rtx operands[])
rtx orig_dest = operands[0];
rtx bytes_rtx = operands[1];
rtx align_rtx = operands[2];
int constp = (GET_CODE (bytes_rtx) == CONST_INT);
int align;
int bytes;
bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
HOST_WIDE_INT align;
HOST_WIDE_INT bytes;
int offset;
int clear_bytes;
int clear_step;
/* If this is not a fixed size move, just call memcpy */
if (! constp)
@ -8339,49 +8340,59 @@ expand_block_clear (rtx operands[])
if (bytes <= 0)
return 1;
if (bytes > (TARGET_POWERPC64 && align >= 32 ? 64 : 32))
return 0;
/* Use the builtin memset after a point, to avoid huge code bloat.
When optimize_size, avoid any significant code bloat; calling
memset is about 4 instructions, so allow for one instruction to
load zero and three to do clearing. */
if (TARGET_ALTIVEC && align >= 128)
clear_step = 16;
else if (TARGET_POWERPC64 && align >= 32)
clear_step = 8;
else
clear_step = 4;
if (optimize_size && bytes > 16)
if (optimize_size && bytes > 3 * clear_step)
return 0;
if (! optimize_size && bytes > 8 * clear_step)
return 0;
for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
{
rtx (*mov) (rtx, rtx);
enum machine_mode mode = BLKmode;
rtx dest;
if (bytes >= 8 && TARGET_POWERPC64
/* 64-bit loads and stores require word-aligned
displacements. */
&& (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
{
clear_bytes = 16;
mode = V4SImode;
}
else if (bytes >= 8 && TARGET_POWERPC64
/* 64-bit loads and stores require word-aligned
displacements. */
&& (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
{
clear_bytes = 8;
mode = DImode;
mov = gen_movdi;
}
else if (bytes >= 4 && !STRICT_ALIGNMENT)
else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
{ /* move 4 bytes */
clear_bytes = 4;
mode = SImode;
mov = gen_movsi;
}
else if (bytes == 2 && !STRICT_ALIGNMENT)
else if (bytes == 2 && (align >= 16 || !STRICT_ALIGNMENT))
{ /* move 2 bytes */
clear_bytes = 2;
mode = HImode;
mov = gen_movhi;
}
else /* move 1 byte at a time */
{
clear_bytes = 1;
mode = QImode;
mov = gen_movqi;
}
dest = adjust_address (orig_dest, mode, offset);
emit_insn ((*mov) (dest, const0_rtx));
emit_move_insn (dest, CONST0_RTX (mode));
}
return 1;
@ -8441,7 +8452,15 @@ expand_block_move (rtx operands[])
enum machine_mode mode = BLKmode;
rtx src, dest;
if (TARGET_STRING
/* Altivec first, since it will be faster than a string move
when it applies, and usually not significantly larger. */
if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
{
move_bytes = 16;
mode = V4SImode;
gen_func.mov = gen_movv4si;
}
else if (TARGET_STRING
&& bytes > 24 /* move up to 32 bytes at a time */
&& ! fixed_regs[5]
&& ! fixed_regs[6]

View File

@ -1,3 +1,8 @@
2004-09-02 Geoffrey Keating <geoffk@apple.com>
* gcc.dg/ppc-vector-memcpy.c: New.
* gcc.dg/ppc-vector-memset.c: New.
2004-09-02 Chao-ying Fu <fu@mips.com>
* gcc.target/mips/mips-3d-1.c: New test.

View File

@ -0,0 +1,9 @@
/* { dg-do compile { target powerpc*-*-* } } */
/* { dg-options "-O -maltivec" } */
/* { dg-final { scan-assembler "lvx" } } */
void foo(void)
{
int x[8] __attribute__((aligned(128))) = { 1 };
bar (x);
}

View File

@ -0,0 +1,12 @@
/* { dg-do compile { target powerpc*-*-* } } */
/* { dg-options "-O -maltivec" } */
/* { dg-final { scan-assembler "stvx" } } */
#include <string.h>
void foo(void)
{
int x[8] __attribute__((aligned(128)));
memset (x, 0, sizeof (x));
bar (x);
}