expr.h (move_by_pieces_ninsns): Declare.

gcc/
	* expr.h (move_by_pieces_ninsns): Declare.
	* expr.c (move_by_pieces_ninsns): Make external.
	* config/mips/mips-protos.h (mips_move_by_pieces_p): Declare.
	(mips_store_by_pieces_p): Likewise.
	* config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p.
	(STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p.
	* config/mips/mips.c (mips_move_by_pieces_p): New function.
	(mips_store_by_pieces_p): Likewise.

gcc/testsuite/
	* gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets.
	Increase copy to 5 bytes.  Look for at least two "mem/s/u"s,
	rather than a specific number.

From-SVN: r182801
This commit is contained in:
Richard Sandiford 2012-01-02 13:56:36 +00:00 committed by Richard Sandiford
parent 9f929ce61f
commit 0d8f5d625f
8 changed files with 119 additions and 33 deletions

View File

@ -1,3 +1,14 @@
2012-01-02 Richard Sandiford <rdsandiford@googlemail.com>
* expr.h (move_by_pieces_ninsns): Declare.
* expr.c (move_by_pieces_ninsns): Make external.
* config/mips/mips-protos.h (mips_move_by_pieces_p): Declare.
(mips_store_by_pieces_p): Likewise.
* config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p.
(STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p.
* config/mips/mips.c (mips_move_by_pieces_p): New function.
(mips_store_by_pieces_p): Likewise.
2012-01-02 Jakub Jelinek <jakub@redhat.com>
* passes.c (register_one_dump_file): Free full_name.

View File

@ -239,6 +239,8 @@ extern void mips_split_call (rtx, rtx);
extern bool mips_get_pic_call_symbol (rtx *, int);
extern void mips_expand_fcc_reload (rtx, rtx, rtx);
extern void mips_set_return_address (rtx, rtx);
extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
extern bool mips_expand_block_move (rtx, rtx, rtx);
extern void mips_expand_synci_loop (rtx, rtx);

View File

@ -6537,6 +6537,92 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
emit_insn (gen_slt_sf (dest, fp2, fp1));
}
/* Implement MOVE_BY_PIECES_P. */
bool
mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
{
if (HAVE_movmemsi)
{
/* movmemsi is meant to generate code that is at least as good as
move_by_pieces. However, movmemsi effectively uses a by-pieces
implementation both for moves smaller than a word and for
word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT
bytes. We should allow the tree-level optimisers to do such
moves by pieces, as it often exposes other optimization
opportunities. We might as well continue to use movmemsi at
the rtl level though, as it produces better code when
scheduling is disabled (such as at -O). */
if (currently_expanding_to_rtl)
return false;
if (align < BITS_PER_WORD)
return size < UNITS_PER_WORD;
return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
}
/* The default value. If this becomes a target hook, we should
call the default definition instead. */
return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
< (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
}
/* Implement STORE_BY_PIECES_P. */
bool
mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
{
/* Storing by pieces involves moving constants into registers
of size MIN (ALIGN, BITS_PER_WORD), then storing them.
We need to decide whether it is cheaper to load the address of
constant data into a register and use a block move instead. */
/* If the data is only byte aligned, then:
(a1) A block move of less than 4 bytes would involve three 3 LBs and
3 SBs. We might as well use 3 single-instruction LIs and 3 SBs
instead.
(a2) A block move of 4 bytes from aligned source data can use an
LW/SWL/SWR sequence. This is often better than the 4 LIs and
4 SBs that we would generate when storing by pieces. */
if (align <= BITS_PER_UNIT)
return size < 4;
/* If the data is 2-byte aligned, then:
(b1) A block move of less than 4 bytes would use a combination of LBs,
LHs, SBs and SHs. We get better code by using single-instruction
LIs, SBs and SHs instead.
(b2) A block move of 4 bytes from aligned source data would again use
an LW/SWL/SWR sequence. In most cases, loading the address of
the source data would require at least one extra instruction.
It is often more efficient to use 2 single-instruction LIs and
2 SHs instead.
(b3) A block move of up to 3 additional bytes would be like (b1).
(b4) A block move of 8 bytes from aligned source data can use two
LW/SWL/SWR sequences or a single LD/SDL/SDR sequence. Both
sequences are better than the 4 LIs and 4 SHs that we'd generate
when storing by pieces.
The reasoning for higher alignments is similar:
(c1) A block move of less than 4 bytes would be the same as (b1).
(c2) A block move of 4 bytes would use an LW/SW sequence. Again,
loading the address of the source data would typically require
at least one extra instruction. It is generally better to use
LUI/ORI/SW instead.
(c3) A block move of up to 3 additional bytes would be like (b1).
(c4) A block move of 8 bytes can use two LW/SW sequences or a single
LD/SD sequence, and in these cases we've traditionally preferred
the memory copy over the more bulky constant moves. */
return size < 8;
}
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
Assume that the areas do not overlap. */

View File

@ -2782,23 +2782,8 @@ while (0)
? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
: MIPS_CALL_RATIO / 2)
/* movmemsi is meant to generate code that is at least as good as
move_by_pieces. However, movmemsi effectively uses a by-pieces
implementation both for moves smaller than a word and for word-aligned
moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes. We should
allow the tree-level optimisers to do such moves by pieces, as it
often exposes other optimization opportunities. We might as well
continue to use movmemsi at the rtl level though, as it produces
better code when scheduling is disabled (such as at -O). */
#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
(HAVE_movmemsi \
? (!currently_expanding_to_rtl \
&& ((ALIGN) < BITS_PER_WORD \
? (SIZE) < UNITS_PER_WORD \
: (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \
: (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
< (unsigned int) MOVE_RATIO (false)))
#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
mips_move_by_pieces_p (SIZE, ALIGN)
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
@ -2813,16 +2798,8 @@ while (0)
#define SET_RATIO(speed) \
((speed) ? 15 : MIPS_CALL_RATIO - 2)
/* STORE_BY_PIECES_P can be used when copying a constant string, but
in that case each word takes 3 insns (lui, ori, sw), or more in
64-bit mode, instead of 2 (lw, sw). For now we always fail this
and let the move_by_pieces code copy the string from read-only
memory. In the future, this could be tuned further for multi-issue
CPUs that can issue stores down one pipe and arithmetic instructions
down another; in that case, the lui/ori/sw combination would be a
win for long enough strings. */
#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
#define STORE_BY_PIECES_P(SIZE, ALIGN) \
mips_store_by_pieces_p (SIZE, ALIGN)
#ifndef __mips16
/* Since the bits of the _init and _fini function is spread across

View File

@ -123,9 +123,6 @@ struct store_by_pieces_d
int reverse;
};
static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
unsigned int,
unsigned int);
static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
struct move_by_pieces_d *);
static bool block_move_libcall_safe_for_call_parm (void);
@ -1016,7 +1013,7 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
/* Return number of insns required to move L bytes by pieces.
ALIGN (in bits) is maximum alignment we can assume. */
static unsigned HOST_WIDE_INT
unsigned HOST_WIDE_INT
move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
unsigned int max_size)
{

View File

@ -367,6 +367,10 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
succeed. */
extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
unsigned int,
unsigned int);
/* Return nonzero if it is desirable to store LEN bytes generated by
CONSTFUN with several move instructions by store_by_pieces
function. CONSTFUNDATA is a pointer which will be passed as argument

View File

@ -1,3 +1,9 @@
2012-01-02 Richard Sandiford <rdsandiford@googlemail.com>
* gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets.
Increase copy to 5 bytes. Look for at least two "mem/s/u"s,
rather than a specific number.
2012-01-02 Paul Thomas <pault@gcc.gnu.org>
PR fortran/46262

View File

@ -1,11 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-expand" } */
#ifdef __mips
__attribute__((nomips16))
#endif
void
f1 (char *p)
{
__builtin_memcpy (p, "123", 3);
__builtin_memcpy (p, "12345", 5);
}
/* { dg-final { scan-rtl-dump-times "mem/s/u" 3 "expand" { target mips*-*-* } } } */
/* { dg-final { scan-rtl-dump "mem/s/u.*mem/s/u" "expand" { target mips*-*-* } } } */
/* { dg-final { cleanup-rtl-dump "expand" } } */