re PR target/11787 (always call memcpy for block move in mips16)

2007-08-24  Sandra Loosemore  <sandra@codesourcery.com>
            Nigel Stephens <nigel@mips.com>

	PR target/11787

	gcc/

	* doc/tm.texi (SET_RATIO, SET_BY_PIECES_P): Document new macros.
	(STORE_BY_PIECES_P): No longer applies to __builtin_memset.
	* expr.c (SET_BY_PIECES_P): Define.
	(can_store_by_pieces, store_by_pieces): Add MEMSETP argument; use
	it to decide whether to use SET_BY_PIECES_P or STORE_BY_PIECES_P.
	(store_expr):  Pass MEMSETP argument to can_store_by_pieces and
	store_by_pieces.
	* expr.h (SET_RATIO): Define.
	(can_store_by_pieces, store_by_pieces):	Update prototypes.
	* builtins.c (expand_builtin_memcpy): Pass MEMSETP argument to
	can_store_by_pieces/store_by_pieces.
	(expand_builtin_memcpy_args): Likewise.
	(expand_builtin_strncpy): Likewise.
	(expand_builtin_memset_args): Likewise.  Also remove special case
	for optimize_size so that can_store_by_pieces/SET_BY_PIECES_P can
	decide what to do instead.
	* value-prof.c (tree_stringops_transform): Pass MEMSETP argument
	to can_store_by_pieces.

	* config/sh/sh.h (SET_BY_PIECES_P): Clone from STORE_BY_PIECES_P.
	* config/s390/s390.h (SET_BY_PIECES_P): Likewise.

	* config/mips/mips.opt (mmemcpy): Change from Var to Mask.
	* config/mips/mips.c (override_options): Make -Os default to -mmemcpy.
	* config/mips/mips.h (MIPS_CALL_RATIO): Define.
	(MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Define.
	(STORE_BY_PIECES_P): Define.

Co-Authored-By: Nigel Stephens <nigel@mips.com>

From-SVN: r127790
This commit is contained in:
Sandra Loosemore 2007-08-24 19:54:05 -04:00 committed by Sandra Loosemore
parent f9837879d5
commit cfa311506c
11 changed files with 173 additions and 35 deletions

View File

@ -1,3 +1,36 @@
2007-08-24 Sandra Loosemore <sandra@codesourcery.com>
Nigel Stephens <nigel@mips.com>
PR target/11787
* doc/tm.texi (SET_RATIO, SET_BY_PIECES_P): Document new macros.
(STORE_BY_PIECES_P): No longer applies to __builtin_memset.
* expr.c (SET_BY_PIECES_P): Define.
(can_store_by_pieces, store_by_pieces): Add MEMSETP argument; use
it to decide whether to use SET_BY_PIECES_P or STORE_BY_PIECES_P.
(store_expr): Pass MEMSETP argument to can_store_by_pieces and
store_by_pieces.
* expr.h (SET_RATIO): Define.
(can_store_by_pieces, store_by_pieces): Update prototypes.
* builtins.c (expand_builtin_memcpy): Pass MEMSETP argument to
can_store_by_pieces/store_by_pieces.
(expand_builtin_memcpy_args): Likewise.
(expand_builtin_strncpy): Likewise.
(expand_builtin_memset_args): Likewise. Also remove special case
for optimize_size so that can_store_by_pieces/SET_BY_PIECES_P can
decide what to do instead.
* value-prof.c (tree_stringops_transform): Pass MEMSETP argument
to can_store_by_pieces.
* config/sh/sh.h (SET_BY_PIECES_P): Clone from STORE_BY_PIECES_P.
* config/s390/s390.h (SET_BY_PIECES_P): Likewise.
* config/mips/mips.opt (mmemcpy): Change from Var to Mask.
* config/mips/mips.c (override_options): Make -Os default to -mmemcpy.
* config/mips/mips.h (MIPS_CALL_RATIO): Define.
(MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Define.
(STORE_BY_PIECES_P): Define.
2007-08-24 Tom Tromey <tromey@redhat.com>
* varpool.c (varpool_last_needed_node): Fix comment typo.

View File

@ -3331,11 +3331,11 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode)
&& GET_CODE (len_rtx) == CONST_INT
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
&& can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
(void *) src_str, dest_align))
(void *) src_str, dest_align, false))
{
dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx),
builtin_memcpy_read_str,
(void *) src_str, dest_align, 0);
(void *) src_str, dest_align, false, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
return dest_mem;
@ -3444,13 +3444,14 @@ expand_builtin_mempcpy_args (tree dest, tree src, tree len, tree type,
&& GET_CODE (len_rtx) == CONST_INT
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
&& can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
(void *) src_str, dest_align))
(void *) src_str, dest_align, false))
{
dest_mem = get_memory_rtx (dest, len);
set_mem_align (dest_mem, dest_align);
dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx),
builtin_memcpy_read_str,
(void *) src_str, dest_align, endp);
(void *) src_str, dest_align,
false, endp);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
return dest_mem;
@ -3792,13 +3793,13 @@ expand_builtin_strncpy (tree exp, rtx target, enum machine_mode mode)
if (!p || dest_align == 0 || !host_integerp (len, 1)
|| !can_store_by_pieces (tree_low_cst (len, 1),
builtin_strncpy_read_str,
(void *) p, dest_align))
(void *) p, dest_align, false))
return NULL_RTX;
dest_mem = get_memory_rtx (dest, len);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_strncpy_read_str,
(void *) p, dest_align, 0);
(void *) p, dest_align, false, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
return dest_mem;
@ -3926,14 +3927,15 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
* We can't pass builtin_memset_gen_str as that emits RTL. */
c = 1;
if (host_integerp (len, 1)
&& !(optimize_size && tree_low_cst (len, 1) > 1)
&& can_store_by_pieces (tree_low_cst (len, 1),
builtin_memset_read_str, &c, dest_align))
builtin_memset_read_str, &c, dest_align,
true))
{
val_rtx = force_reg (TYPE_MODE (unsigned_char_type_node),
val_rtx);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_memset_gen_str, val_rtx, dest_align, 0);
builtin_memset_gen_str, val_rtx, dest_align,
true, 0);
}
else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
dest_align, expected_align,
@ -3951,11 +3953,11 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
if (c)
{
if (host_integerp (len, 1)
&& !(optimize_size && tree_low_cst (len, 1) > 1)
&& can_store_by_pieces (tree_low_cst (len, 1),
builtin_memset_read_str, &c, dest_align))
builtin_memset_read_str, &c, dest_align,
true))
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_memset_read_str, &c, dest_align, 0);
builtin_memset_read_str, &c, dest_align, true, 0);
else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
dest_align, expected_align,
expected_size))

View File

@ -5323,6 +5323,11 @@ override_options (void)
flag_delayed_branch = 0;
}
/* Prefer a call to memcpy over inline code when optimizing for size,
though see MOVE_RATIO in mips.h. */
if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0)
target_flags |= MASK_MEMCPY;
#ifdef MIPS_TFMODE_FORMAT
REAL_MODE_FORMAT (TFmode) = &MIPS_TFMODE_FORMAT;
#endif

View File

@ -2785,6 +2785,57 @@ while (0)
#undef PTRDIFF_TYPE
#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
values were determined experimentally by benchmarking with CSiBE.
In theory, the call overhead is higher for TARGET_ABICALLS (especially
for o32 where we have to restore $gp afterwards as well as make an
indirect call), but in practice, bumping this up higher for
TARGET_ABICALLS doesn't make much difference to code size. */
#define MIPS_CALL_RATIO 8
/* Define MOVE_RATIO to encourage use of movmemsi when enabled,
since it should always generate code at least as good as
move_by_pieces(). But when inline movmemsi pattern is disabled
(i.e., with -mips16 or -mmemcpy), instead use a value approximating
the length of a memcpy call sequence, so that move_by_pieces will
generate inline code if it is shorter than a function call.
Since move_by_pieces_ninsns() counts memory-to-memory moves, but
we'll have to generate a load/store pair for each, halve the value of
MIPS_CALL_RATIO to take that into account.
The default value for MOVE_RATIO when HAVE_movmemsi is true is 2.
There is no point to setting it to less than this to try to disable
move_by_pieces entirely, because that also disables some desirable
tree-level optimizations, specifically related to optimizing a
one-byte string copy into a simple move byte operation. */
#define MOVE_RATIO \
((TARGET_MIPS16 || TARGET_MEMCPY) ? MIPS_CALL_RATIO / 2 : 2)
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
#define CLEAR_RATIO \
(optimize_size ? MIPS_CALL_RATIO : 15)
/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
optimizing for size adjust the ratio to account for the overhead of
loading the constant and replicating it across the word. */
#define SET_RATIO \
(optimize_size ? MIPS_CALL_RATIO - 2 : 15)
/* STORE_BY_PIECES_P can be used when copying a constant string, but
in that case each word takes 3 insns (lui, ori, sw), or more in
64-bit mode, instead of 2 (lw, sw). For now we always fail this
and let the move_by_pieces code copy the string from read-only
memory. In the future, this could be tuned further for multi-issue
CPUs that can issue stores down one pipe and arithmetic instructions
down another; in that case, the lui/ori/sw combination would be a
win for long enough strings. */
#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
#ifndef __mips16
/* Since the bits of the _init and _fini function is spread across

View File

@ -173,7 +173,7 @@ Target Report RejectNegative Mask(LONG64)
Use a 64-bit long type
mmemcpy
Target Report Var(TARGET_MEMCPY)
Target Report Mask(MEMCPY)
Don't optimize block moves
mmips-tfile

View File

@ -803,10 +803,13 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte
|| (TARGET_64BIT && (SIZE) == 8) )
/* This macro is used to determine whether store_by_pieces should be
called to "memset" storage with byte values other than zero, or
to "memcpy" storage when the source is a constant string. */
called to "memcpy" storage when the source is a constant string. */
#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
/* Likewise to decide whether to "memset" storage with byte values
other than zero. */
#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
/* Don't perform CSE on function addresses. */
#define NO_FUNCTION_CSE

View File

@ -2184,6 +2184,8 @@ struct sh_args {
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
< (TARGET_SMALLCODE ? 2 : ((ALIGN >= 32) ? 16 : 2)))
#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
/* Macros to check register numbers against specific register classes. */
/* These assume that REGNO is a hard or pseudo reg number.

View File

@ -5897,12 +5897,30 @@ will be used. Defaults to 1 if @code{move_by_pieces_ninsns} returns less
than @code{CLEAR_RATIO}.
@end defmac
@defmac SET_RATIO
The threshold of number of scalar move insns, @emph{below} which a sequence
of insns should be generated to set memory to a constant value, instead of
a block set insn or a library call.
Increasing the value will always make code faster, but
eventually incurs high cost in increased code size.
If you don't define this, it defaults to the value of @code{MOVE_RATIO}.
@end defmac
@defmac SET_BY_PIECES_P (@var{size}, @var{alignment})
A C expression used to determine whether @code{store_by_pieces} will be
used to set a chunk of memory to a constant value, or whether some
other mechanism will be used. Used by @code{__builtin_memset} when
storing values other than constant zero.
Defaults to 1 if @code{move_by_pieces_ninsns} returns less
than @code{SET_RATIO}.
@end defmac
@defmac STORE_BY_PIECES_P (@var{size}, @var{alignment})
A C expression used to determine whether @code{store_by_pieces} will be
used to set a chunk of memory to a constant value, or whether some other
mechanism will be used. Used by @code{__builtin_memset} when storing
values other than constant zero and by @code{__builtin_strcpy} when
when called with a constant source string.
used to set a chunk of memory to a constant string value, or whether some
other mechanism will be used. Used by @code{__builtin_strcpy} when
called with a constant source string.
Defaults to 1 if @code{move_by_pieces_ninsns} returns less
than @code{MOVE_RATIO}.
@end defmac

View File

@ -186,8 +186,15 @@ static bool float_extend_from_mem[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
#endif
/* This macro is used to determine whether store_by_pieces should be
called to "memset" storage with byte values other than zero, or
to "memcpy" storage when the source is a constant string. */
called to "memset" storage with byte values other than zero. */
#ifndef SET_BY_PIECES_P
#define SET_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
< (unsigned int) SET_RATIO)
#endif
/* This macro is used to determine whether store_by_pieces should be
called to "memcpy" storage when the source is a constant string. */
#ifndef STORE_BY_PIECES_P
#define STORE_BY_PIECES_P(SIZE, ALIGN) \
(move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
@ -2191,13 +2198,14 @@ use_group_regs (rtx *call_fusage, rtx regs)
/* Determine whether the LEN bytes generated by CONSTFUN can be
stored to memory using several move instructions. CONSTFUNDATA is
a pointer which will be passed as argument in every CONSTFUN call.
ALIGN is maximum alignment we can assume. Return nonzero if a
call to store_by_pieces should succeed. */
ALIGN is maximum alignment we can assume. MEMSETP is true if this is
a memset operation and false if it's a copy of a constant string.
Return nonzero if a call to store_by_pieces should succeed. */
int
can_store_by_pieces (unsigned HOST_WIDE_INT len,
rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode),
void *constfundata, unsigned int align)
void *constfundata, unsigned int align, bool memsetp)
{
unsigned HOST_WIDE_INT l;
unsigned int max_size;
@ -2210,7 +2218,9 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
if (len == 0)
return 1;
if (! STORE_BY_PIECES_P (len, align))
if (! (memsetp
? SET_BY_PIECES_P (len, align)
: STORE_BY_PIECES_P (len, align)))
return 0;
tmode = mode_for_size (STORE_MAX_PIECES * BITS_PER_UNIT, MODE_INT, 1);
@ -2285,7 +2295,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
ALIGN is maximum alignment we can assume.
ALIGN is maximum alignment we can assume. MEMSETP is true if this is
a memset operation and false if it's a copy of a constant string.
If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
mempcpy, and if ENDP is 2 return memory the end minus one byte ala
stpcpy. */
@ -2293,7 +2304,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
rtx
store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode),
void *constfundata, unsigned int align, int endp)
void *constfundata, unsigned int align, bool memsetp, int endp)
{
struct store_by_pieces data;
@ -2303,7 +2314,9 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
return to;
}
gcc_assert (STORE_BY_PIECES_P (len, align));
gcc_assert (memsetp
? SET_BY_PIECES_P (len, align)
: STORE_BY_PIECES_P (len, align));
data.constfun = constfun;
data.constfundata = constfundata;
data.len = len;
@ -4498,7 +4511,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
str_copy_len = MIN (str_copy_len, exp_len);
if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str,
(void *) TREE_STRING_POINTER (exp),
MEM_ALIGN (target)))
MEM_ALIGN (target), false))
goto normal_expr;
dest_mem = target;
@ -4507,7 +4520,8 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal)
str_copy_len, builtin_strncpy_read_str,
(void *) TREE_STRING_POINTER (exp),
MEM_ALIGN (target),
exp_len > str_copy_len ? 1 : 0);
exp_len > str_copy_len ? 1 : 0,
false);
if (exp_len > str_copy_len)
clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len),
BLOCK_OP_NORMAL);

View File

@ -84,6 +84,13 @@ enum expand_modifier {EXPAND_NORMAL = 0, EXPAND_STACK_PARM, EXPAND_SUM,
#define CLEAR_RATIO (optimize_size ? 3 : 15)
#endif
#endif
/* If a memory set (to value other than zero) operation would take
SET_RATIO or more simple move-instruction sequences, we will do a movmem
or libcall instead. */
#ifndef SET_RATIO
#define SET_RATIO MOVE_RATIO
#endif
enum direction {none, upward, downward};
@ -444,20 +451,23 @@ extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
CONSTFUN with several move instructions by store_by_pieces
function. CONSTFUNDATA is a pointer which will be passed as argument
in every CONSTFUN call.
ALIGN is maximum alignment we can assume. */
ALIGN is maximum alignment we can assume.
MEMSETP is true if this is a real memset/bzero, not a copy
of a const string. */
extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
rtx (*) (void *, HOST_WIDE_INT,
enum machine_mode),
void *, unsigned int);
void *, unsigned int, bool);
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
ALIGN is maximum alignment we can assume.
MEMSETP is true if this is a real memset/bzero, not a copy.
Returns TO + LEN. */
extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT,
rtx (*) (void *, HOST_WIDE_INT, enum machine_mode),
void *, unsigned int, int);
void *, unsigned int, bool, int);
/* Emit insns to set X from Y. */
extern rtx emit_move_insn (rtx, rtx);

View File

@ -1392,13 +1392,13 @@ tree_stringops_transform (block_stmt_iterator *bsi)
case BUILT_IN_MEMSET:
if (!can_store_by_pieces (val, builtin_memset_read_str,
CALL_EXPR_ARG (call, 1),
dest_align))
dest_align, true))
return false;
break;
case BUILT_IN_BZERO:
if (!can_store_by_pieces (val, builtin_memset_read_str,
integer_zero_node,
dest_align))
dest_align, true))
return false;
break;
default: