From 0d8f5d625faf1a8a063bb849770665e743110aaf Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 2 Jan 2012 13:56:36 +0000 Subject: [PATCH] expr.h (move_by_pieces_ninsns): Declare. gcc/ * expr.h (move_by_pieces_ninsns): Declare. * expr.c (move_by_pieces_ninsns): Make external. * config/mips/mips-protos.h (mips_move_by_pieces_p): Declare. (mips_store_by_pieces_p): Likewise. * config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p. (STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p. * config/mips/mips.c (mips_move_by_pieces_p): New function. (mips_store_by_pieces_p): Likewise. gcc/testsuite/ * gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets. Increase copy to 5 bytes. Look for at least two "mem/s/u"s, rather than a specific number. From-SVN: r182801 --- gcc/ChangeLog | 11 +++++ gcc/config/mips/mips-protos.h | 2 + gcc/config/mips/mips.c | 86 +++++++++++++++++++++++++++++++++ gcc/config/mips/mips.h | 31 ++---------- gcc/expr.c | 5 +- gcc/expr.h | 4 ++ gcc/testsuite/ChangeLog | 6 +++ gcc/testsuite/gcc.dg/memcpy-4.c | 7 ++- 8 files changed, 119 insertions(+), 33 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5b0d4a68223..4cdef41ddca 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2012-01-02 Richard Sandiford + + * expr.h (move_by_pieces_ninsns): Declare. + * expr.c (move_by_pieces_ninsns): Make external. + * config/mips/mips-protos.h (mips_move_by_pieces_p): Declare. + (mips_store_by_pieces_p): Likewise. + * config/mips/mips.h (MOVE_BY_PIECES_P): Call mips_move_by_pieces_p. + (STORE_BY_PIECES_P): Likewise mips_store_by_pieces_p. + * config/mips/mips.c (mips_move_by_pieces_p): New function. + (mips_store_by_pieces_p): Likewise. + 2012-01-02 Jakub Jelinek * passes.c (register_one_dump_file): Free full_name. diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 1791ce7c143..ca0fb5eba67 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -239,6 +239,8 @@ extern void mips_split_call (rtx, rtx); extern bool mips_get_pic_call_symbol (rtx *, int); extern void mips_expand_fcc_reload (rtx, rtx, rtx); extern void mips_set_return_address (rtx, rtx); +extern bool mips_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); +extern bool mips_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); extern bool mips_expand_block_move (rtx, rtx, rtx); extern void mips_expand_synci_loop (rtx, rtx); diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index bdbf94a48b0..7b3b6852cc9 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -6537,6 +6537,92 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch) emit_insn (gen_slt_sf (dest, fp2, fp1)); } +/* Implement MOVE_BY_PIECES_P. */ + +bool +mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) +{ + if (HAVE_movmemsi) + { + /* movmemsi is meant to generate code that is at least as good as + move_by_pieces. However, movmemsi effectively uses a by-pieces + implementation both for moves smaller than a word and for + word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT + bytes. We should allow the tree-level optimisers to do such + moves by pieces, as it often exposes other optimization + opportunities. We might as well continue to use movmemsi at + the rtl level though, as it produces better code when + scheduling is disabled (such as at -O). */ + if (currently_expanding_to_rtl) + return false; + if (align < BITS_PER_WORD) + return size < UNITS_PER_WORD; + return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT; + } + /* The default value. If this becomes a target hook, we should + call the default definition instead. */ + return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1) + < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())); +} + +/* Implement STORE_BY_PIECES_P. */ + +bool +mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) +{ + /* Storing by pieces involves moving constants into registers + of size MIN (ALIGN, BITS_PER_WORD), then storing them. + We need to decide whether it is cheaper to load the address of + constant data into a register and use a block move instead. */ + + /* If the data is only byte aligned, then: + + (a1) A block move of less than 4 bytes would involve three 3 LBs and + 3 SBs. We might as well use 3 single-instruction LIs and 3 SBs + instead. + + (a2) A block move of 4 bytes from aligned source data can use an + LW/SWL/SWR sequence. This is often better than the 4 LIs and + 4 SBs that we would generate when storing by pieces. */ + if (align <= BITS_PER_UNIT) + return size < 4; + + /* If the data is 2-byte aligned, then: + + (b1) A block move of less than 4 bytes would use a combination of LBs, + LHs, SBs and SHs. We get better code by using single-instruction + LIs, SBs and SHs instead. + + (b2) A block move of 4 bytes from aligned source data would again use + an LW/SWL/SWR sequence. In most cases, loading the address of + the source data would require at least one extra instruction. + It is often more efficient to use 2 single-instruction LIs and + 2 SHs instead. + + (b3) A block move of up to 3 additional bytes would be like (b1). + + (b4) A block move of 8 bytes from aligned source data can use two + LW/SWL/SWR sequences or a single LD/SDL/SDR sequence. Both + sequences are better than the 4 LIs and 4 SHs that we'd generate + when storing by pieces. + + The reasoning for higher alignments is similar: + + (c1) A block move of less than 4 bytes would be the same as (b1). + + (c2) A block move of 4 bytes would use an LW/SW sequence. Again, + loading the address of the source data would typically require + at least one extra instruction. It is generally better to use + LUI/ORI/SW instead. + + (c3) A block move of up to 3 additional bytes would be like (b1). + + (c4) A block move of 8 bytes can use two LW/SW sequences or a single + LD/SD sequence, and in these cases we've traditionally preferred + the memory copy over the more bulky constant moves. */ + return size < 8; +} + /* Emit straight-line code to move LENGTH bytes from SRC to DEST. Assume that the areas do not overlap. */ diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 55f9b07c416..23d40baf9a6 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -2782,23 +2782,8 @@ while (0) ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ : MIPS_CALL_RATIO / 2) -/* movmemsi is meant to generate code that is at least as good as - move_by_pieces. However, movmemsi effectively uses a by-pieces - implementation both for moves smaller than a word and for word-aligned - moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes. We should - allow the tree-level optimisers to do such moves by pieces, as it - often exposes other optimization opportunities. We might as well - continue to use movmemsi at the rtl level though, as it produces - better code when scheduling is disabled (such as at -O). */ - -#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ - (HAVE_movmemsi \ - ? (!currently_expanding_to_rtl \ - && ((ALIGN) < BITS_PER_WORD \ - ? (SIZE) < UNITS_PER_WORD \ - : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \ - : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ - < (unsigned int) MOVE_RATIO (false))) +#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ + mips_move_by_pieces_p (SIZE, ALIGN) /* For CLEAR_RATIO, when optimizing for size, give a better estimate of the length of a memset call, but use the default otherwise. */ @@ -2813,16 +2798,8 @@ while (0) #define SET_RATIO(speed) \ ((speed) ? 15 : MIPS_CALL_RATIO - 2) -/* STORE_BY_PIECES_P can be used when copying a constant string, but - in that case each word takes 3 insns (lui, ori, sw), or more in - 64-bit mode, instead of 2 (lw, sw). For now we always fail this - and let the move_by_pieces code copy the string from read-only - memory. In the future, this could be tuned further for multi-issue - CPUs that can issue stores down one pipe and arithmetic instructions - down another; in that case, the lui/ori/sw combination would be a - win for long enough strings. */ - -#define STORE_BY_PIECES_P(SIZE, ALIGN) 0 +#define STORE_BY_PIECES_P(SIZE, ALIGN) \ + mips_store_by_pieces_p (SIZE, ALIGN) #ifndef __mips16 /* Since the bits of the _init and _fini function is spread across diff --git a/gcc/expr.c b/gcc/expr.c index c10f9157687..9825d126df4 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -123,9 +123,6 @@ struct store_by_pieces_d int reverse; }; -static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT, - unsigned int, - unsigned int); static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode, struct move_by_pieces_d *); static bool block_move_libcall_safe_for_call_parm (void); @@ -1016,7 +1013,7 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len, /* Return number of insns required to move L bytes by pieces. ALIGN (in bits) is maximum alignment we can assume. */ -static unsigned HOST_WIDE_INT +unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, unsigned int max_size) { diff --git a/gcc/expr.h b/gcc/expr.h index 7a323bacd6a..0096367a727 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -367,6 +367,10 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int, succeed. */ extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int); +extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT, + unsigned int, + unsigned int); + /* Return nonzero if it is desirable to store LEN bytes generated by CONSTFUN with several move instructions by store_by_pieces function. CONSTFUNDATA is a pointer which will be passed as argument diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e24d96c4522..dd29a88d25e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2012-01-02 Richard Sandiford + + * gcc.dg/memcpy-4.c: Add nomips16 attribute for MIPS targets. + Increase copy to 5 bytes. Look for at least two "mem/s/u"s, + rather than a specific number. + 2012-01-02 Paul Thomas PR fortran/46262 diff --git a/gcc/testsuite/gcc.dg/memcpy-4.c b/gcc/testsuite/gcc.dg/memcpy-4.c index 4fe72ec5b89..80a943bdb78 100644 --- a/gcc/testsuite/gcc.dg/memcpy-4.c +++ b/gcc/testsuite/gcc.dg/memcpy-4.c @@ -1,11 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-rtl-expand" } */ +#ifdef __mips +__attribute__((nomips16)) +#endif void f1 (char *p) { - __builtin_memcpy (p, "123", 3); + __builtin_memcpy (p, "12345", 5); } -/* { dg-final { scan-rtl-dump-times "mem/s/u" 3 "expand" { target mips*-*-* } } } */ +/* { dg-final { scan-rtl-dump "mem/s/u.*mem/s/u" "expand" { target mips*-*-* } } } */ /* { dg-final { cleanup-rtl-dump "expand" } } */