md.texi (setmem): Document new parameter.

* md.texi (setmem): Document new parameter.
	* optabs.c (maybe_gen_insn): Support 9 operands.
	* builtins.c (determine_block_size): Add probable_max_size;
	support anti-ranges.
	(expand_builtin_memcpy. expand_builtin_memset_args): Pass around
	probable_max_size.
	* expr.c (emit_block_move_via_movmem, emit_block_move_hints,
	emit_block_move, clear_storage_hints, set_storage_via_setmem):
	Likewise.
	* expr.h (emit_block_move_hints, clear_storage_hints,
	set_storage_via_setmem): Update prototype.
	* i386.md (setmem, movmem patterns): Add 9th operand.
	* i386-protos.h (ix86_expand_set_or_movmem): Update prototype.
	* i386.c (ix86_expand_set_or_movmem): Take probable_max_size_exp
	argument; pass it to decide_alg.

	* gcc.target/i386/memcpy-3.c: New testcase.

From-SVN: r204997
This commit is contained in:
Jan Hubicka 2013-11-19 02:42:34 +01:00 committed by Jan Hubicka
parent 0874db6e96
commit 82bb7d4e82
10 changed files with 147 additions and 59 deletions

View File

@ -3096,12 +3096,15 @@ builtin_memcpy_read_str (void *data, HOST_WIDE_INT offset,
}
/* LEN specify length of the block of memcpy/memset operation.
Figure out its range and put it into MIN_SIZE/MAX_SIZE. */
Figure out its range and put it into MIN_SIZE/MAX_SIZE.
In some cases we can make very likely guess on max size, then we
set it into PROBABLE_MAX_SIZE. */
static void
determine_block_size (tree len, rtx len_rtx,
unsigned HOST_WIDE_INT *min_size,
unsigned HOST_WIDE_INT *max_size)
unsigned HOST_WIDE_INT *max_size,
unsigned HOST_WIDE_INT *probable_max_size)
{
if (CONST_INT_P (len_rtx))
{
@ -3111,28 +3114,47 @@ determine_block_size (tree len, rtx len_rtx,
else
{
double_int min, max;
if (TREE_CODE (len) == SSA_NAME
&& get_range_info (len, &min, &max) == VR_RANGE)
{
if (min.fits_uhwi ())
*min_size = min.to_uhwi ();
else
*min_size = 0;
if (max.fits_uhwi ())
*max_size = max.to_uhwi ();
else
*max_size = (HOST_WIDE_INT)-1;
}
enum value_range_type range_type = VR_UNDEFINED;
/* Determine bounds from the type. */
if (tree_fits_uhwi_p (TYPE_MIN_VALUE (TREE_TYPE (len))))
*min_size = tree_to_uhwi (TYPE_MIN_VALUE (TREE_TYPE (len)));
else
*min_size = 0;
if (tree_fits_uhwi_p (TYPE_MAX_VALUE (TREE_TYPE (len))))
*probable_max_size = *max_size = tree_to_uhwi (TYPE_MAX_VALUE (TREE_TYPE (len)));
else
*probable_max_size = *max_size = GET_MODE_MASK (GET_MODE (len_rtx));
if (TREE_CODE (len) == SSA_NAME)
range_type = get_range_info (len, &min, &max);
if (range_type == VR_RANGE)
{
if (tree_fits_uhwi_p (TYPE_MIN_VALUE (TREE_TYPE (len))))
*min_size = tree_to_uhwi (TYPE_MIN_VALUE (TREE_TYPE (len)));
else
*min_size = 0;
if (tree_fits_uhwi_p (TYPE_MAX_VALUE (TREE_TYPE (len))))
*max_size = tree_to_uhwi (TYPE_MAX_VALUE (TREE_TYPE (len)));
else
*max_size = GET_MODE_MASK (GET_MODE (len_rtx));
if (min.fits_uhwi () && *min_size < min.to_uhwi ())
*min_size = min.to_uhwi ();
if (max.fits_uhwi () && *max_size > max.to_uhwi ())
*probable_max_size = *max_size = max.to_uhwi ();
}
else if (range_type == VR_ANTI_RANGE)
{
/* Anti range 0...N lets us to determine minmal size to N+1. */
if (min.is_zero ())
{
if ((max + double_int_one).fits_uhwi ())
*min_size = (max + double_int_one).to_uhwi ();
}
/* Code like
int n;
if (n < 100)
memcpy (a,b, n)
Produce anti range allowing negative values of N. We still
can use the information and make a guess that N is not negative.
*/
else if (!max.ule (double_int_one.lshift (30))
&& min.fits_uhwi ())
*probable_max_size = min.to_uhwi () - 1;
}
}
gcc_checking_assert (*max_size <=
@ -3164,6 +3186,7 @@ expand_builtin_memcpy (tree exp, rtx target)
unsigned int expected_align = 0;
unsigned HOST_WIDE_INT min_size;
unsigned HOST_WIDE_INT max_size;
unsigned HOST_WIDE_INT probable_max_size;
/* If DEST is not a pointer type, call the normal function. */
if (dest_align == 0)
@ -3183,7 +3206,8 @@ expand_builtin_memcpy (tree exp, rtx target)
dest_mem = get_memory_rtx (dest, len);
set_mem_align (dest_mem, dest_align);
len_rtx = expand_normal (len);
determine_block_size (len, len_rtx, &min_size, &max_size);
determine_block_size (len, len_rtx, &min_size, &max_size,
&probable_max_size);
src_str = c_getstr (src);
/* If SRC is a string constant and block move would be done
@ -3213,7 +3237,7 @@ expand_builtin_memcpy (tree exp, rtx target)
CALL_EXPR_TAILCALL (exp)
? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
expected_align, expected_size,
min_size, max_size);
min_size, max_size, probable_max_size);
if (dest_addr == 0)
{
@ -3629,6 +3653,7 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
unsigned int expected_align = 0;
unsigned HOST_WIDE_INT min_size;
unsigned HOST_WIDE_INT max_size;
unsigned HOST_WIDE_INT probable_max_size;
dest_align = get_pointer_alignment (dest);
@ -3657,7 +3682,8 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
len = builtin_save_expr (len);
len_rtx = expand_normal (len);
determine_block_size (len, len_rtx, &min_size, &max_size);
determine_block_size (len, len_rtx, &min_size, &max_size,
&probable_max_size);
dest_mem = get_memory_rtx (dest, len);
val_mode = TYPE_MODE (unsigned_char_type_node);
@ -3684,7 +3710,8 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
}
else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
dest_align, expected_align,
expected_size, min_size, max_size))
expected_size, min_size, max_size,
probable_max_size))
goto do_libcall;
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
@ -3706,7 +3733,8 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
else if (!set_storage_via_setmem (dest_mem, len_rtx,
gen_int_mode (c, val_mode),
dest_align, expected_align,
expected_size, min_size, max_size))
expected_size, min_size, max_size,
probable_max_size))
goto do_libcall;
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
@ -3719,7 +3747,8 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
CALL_EXPR_TAILCALL (orig_exp)
? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
expected_align, expected_size,
min_size, max_size);
min_size, max_size,
probable_max_size);
if (dest_addr == 0)
{

View File

@ -60,7 +60,7 @@ extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx,
rtx, rtx, rtx, bool);
rtx, rtx, rtx, rtx, bool);
extern bool constant_address_p (rtx);
extern bool legitimate_pic_operand_p (rtx);

View File

@ -23715,7 +23715,8 @@ bool
ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
rtx align_exp, rtx expected_align_exp,
rtx expected_size_exp, rtx min_size_exp,
rtx max_size_exp, bool issetmem)
rtx max_size_exp, rtx probable_max_size_exp,
bool issetmem)
{
rtx destreg;
rtx srcreg = NULL;
@ -23739,6 +23740,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
/* TODO: Once vlaue ranges are available, fill in proper data. */
unsigned HOST_WIDE_INT min_size = 0;
unsigned HOST_WIDE_INT max_size = -1;
unsigned HOST_WIDE_INT probable_max_size = -1;
bool misaligned_prologue_used = false;
if (CONST_INT_P (align_exp))
@ -23754,13 +23756,19 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
align = MEM_ALIGN (dst) / BITS_PER_UNIT;
if (CONST_INT_P (count_exp))
min_size = max_size = count = expected_size = INTVAL (count_exp);
if (min_size_exp)
min_size = INTVAL (min_size_exp);
if (max_size_exp)
max_size = INTVAL (max_size_exp);
if (CONST_INT_P (expected_size_exp) && count == 0)
expected_size = INTVAL (expected_size_exp);
min_size = max_size = probable_max_size = count = expected_size
= INTVAL (count_exp);
else
{
if (min_size_exp)
min_size = INTVAL (min_size_exp);
if (max_size_exp)
max_size = INTVAL (max_size_exp);
if (probable_max_size_exp)
probable_max_size = INTVAL (probable_max_size_exp);
if (CONST_INT_P (expected_size_exp) && count == 0)
expected_size = INTVAL (expected_size_exp);
}
/* Make sure we don't need to care about overflow later on. */
if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
@ -23768,7 +23776,8 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
alg = decide_alg (count, expected_size, min_size, max_size, issetmem,
alg = decide_alg (count, expected_size, min_size, probable_max_size,
issetmem,
issetmem && val_exp == const0_rtx,
&dynamic_check, &noalign);
if (alg == libcall)

View File

@ -15506,13 +15506,15 @@
(use (match_operand:SI 4 "const_int_operand"))
(use (match_operand:SI 5 "const_int_operand"))
(use (match_operand:SI 6 ""))
(use (match_operand:SI 7 ""))]
(use (match_operand:SI 7 ""))
(use (match_operand:SI 8 ""))]
""
{
if (ix86_expand_set_or_movmem (operands[0], operands[1],
operands[2], NULL, operands[3],
operands[4], operands[5],
operands[6], operands[7], false))
operands[6], operands[7],
operands[8], false))
DONE;
else
FAIL;
@ -15702,14 +15704,15 @@
(use (match_operand:SI 4 "const_int_operand"))
(use (match_operand:SI 5 "const_int_operand"))
(use (match_operand:SI 6 ""))
(use (match_operand:SI 7 ""))]
(use (match_operand:SI 7 ""))
(use (match_operand:SI 8 ""))]
""
{
if (ix86_expand_set_or_movmem (operands[0], NULL,
operands[1], operands[2],
operands[3], operands[4],
operands[5], operands[6],
operands[7], true))
operands[7], operands[8], true))
DONE;
else
FAIL;

View File

@ -5352,6 +5352,8 @@ all cases. This expected alignment is also in bytes, just like operand 4.
Expected size, when unknown, is set to @code{(const_int -1)}.
Operand 7 is the minimal size of the block and operand 8 is the
maximal size of the block (NULL if it can not be represented as CONST_INT).
Operand 9 is the probable maximal size (i.e. we can not rely on it for correctness,
but it can be used for choosing proper code sequence for a given size).
The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}.

View File

@ -129,7 +129,8 @@ static void move_by_pieces_1 (insn_gen_fn, machine_mode,
struct move_by_pieces_d *);
static bool block_move_libcall_safe_for_call_parm (void);
static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT,
unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
static tree emit_block_move_libcall_fn (int);
static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
@ -1131,7 +1132,8 @@ rtx
emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size)
unsigned HOST_WIDE_INT max_size,
unsigned HOST_WIDE_INT probable_max_size)
{
bool may_use_call;
rtx retval = 0;
@ -1188,7 +1190,7 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
move_by_pieces (x, y, INTVAL (size), align, 0);
else if (emit_block_move_via_movmem (x, y, size, align,
expected_align, expected_size,
min_size, max_size))
min_size, max_size, probable_max_size))
;
else if (may_use_call
&& ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
@ -1224,7 +1226,7 @@ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
else
max = GET_MODE_MASK (GET_MODE (size));
return emit_block_move_hints (x, y, size, method, 0, -1,
min, max);
min, max, max);
}
/* A subroutine of emit_block_move. Returns true if calling the
@ -1289,7 +1291,8 @@ static bool
emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size)
unsigned HOST_WIDE_INT max_size,
unsigned HOST_WIDE_INT probable_max_size)
{
int save_volatile_ok = volatile_ok;
enum machine_mode mode;
@ -1298,8 +1301,8 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
expected_align = align;
if (expected_size != -1)
{
if ((unsigned HOST_WIDE_INT)expected_size > max_size)
expected_size = max_size;
if ((unsigned HOST_WIDE_INT)expected_size > probable_max_size)
expected_size = probable_max_size;
if ((unsigned HOST_WIDE_INT)expected_size < min_size)
expected_size = min_size;
}
@ -1328,7 +1331,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
|| max_size <= (GET_MODE_MASK (mode) >> 1)
|| GET_MODE_BITSIZE (mode) >= GET_MODE_BITSIZE (Pmode)))
{
struct expand_operand ops[8];
struct expand_operand ops[9];
unsigned int nops;
/* ??? When called via emit_block_move_for_call, it'd be
@ -1336,7 +1339,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
that it doesn't fail the expansion because it thinks
emitting the libcall would be more efficient. */
nops = insn_data[(int) code].n_generator_args;
gcc_assert (nops == 4 || nops == 6 || nops == 8);
gcc_assert (nops == 4 || nops == 6 || nops == 8 || nops == 9);
create_fixed_operand (&ops[0], x);
create_fixed_operand (&ops[1], y);
@ -1348,7 +1351,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
create_integer_operand (&ops[5], expected_size);
}
if (nops == 8)
if (nops >= 8)
{
create_integer_operand (&ops[6], min_size);
/* If we can not represent the maximal size,
@ -1358,6 +1361,15 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
else
create_fixed_operand (&ops[7], NULL);
}
if (nops == 9)
{
/* If we can not represent the maximal size,
make parameter NULL. */
if ((HOST_WIDE_INT) probable_max_size != -1)
create_integer_operand (&ops[8], probable_max_size);
else
create_fixed_operand (&ops[8], NULL);
}
if (maybe_expand_insn (code, nops, ops))
{
volatile_ok = save_volatile_ok;
@ -2747,7 +2759,8 @@ rtx
clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size)
unsigned HOST_WIDE_INT max_size,
unsigned HOST_WIDE_INT probable_max_size)
{
enum machine_mode mode = GET_MODE (object);
unsigned int align;
@ -2789,7 +2802,7 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
clear_by_pieces (object, INTVAL (size), align);
else if (set_storage_via_setmem (object, size, const0_rtx, align,
expected_align, expected_size,
min_size, max_size))
min_size, max_size, probable_max_size))
;
else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object)))
return set_storage_via_libcall (object, size, const0_rtx,
@ -2808,7 +2821,7 @@ clear_storage (rtx object, rtx size, enum block_op_methods method)
min = max = UINTVAL (size);
else
max = GET_MODE_MASK (GET_MODE (size));
return clear_storage_hints (object, size, method, 0, -1, min, max);
return clear_storage_hints (object, size, method, 0, -1, min, max, max);
}
@ -2907,7 +2920,8 @@ bool
set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size)
unsigned HOST_WIDE_INT max_size,
unsigned HOST_WIDE_INT probable_max_size)
{
/* Try the most limited insn first, because there's no point
including more than one in the machine description unless
@ -2942,11 +2956,11 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
|| max_size <= (GET_MODE_MASK (mode) >> 1)
|| GET_MODE_BITSIZE (mode) >= GET_MODE_BITSIZE (Pmode)))
{
struct expand_operand ops[8];
struct expand_operand ops[9];
unsigned int nops;
nops = insn_data[(int) code].n_generator_args;
gcc_assert (nops == 4 || nops == 6 || nops == 8);
gcc_assert (nops == 4 || nops == 6 || nops == 8 || nops == 9);
create_fixed_operand (&ops[0], object);
/* The check above guarantees that this size conversion is valid. */
@ -2958,7 +2972,7 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
create_integer_operand (&ops[5], expected_size);
}
if (nops == 8)
if (nops >= 8)
{
create_integer_operand (&ops[6], min_size);
/* If we can not represent the maximal size,
@ -2968,6 +2982,15 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
else
create_fixed_operand (&ops[7], NULL);
}
if (nops == 9)
{
/* If we can not represent the maximal size,
make parameter NULL. */
if ((HOST_WIDE_INT) probable_max_size != -1)
create_integer_operand (&ops[8], probable_max_size);
else
create_fixed_operand (&ops[8], NULL);
}
if (maybe_expand_insn (code, nops, ops))
return true;
}

View File

@ -302,6 +302,7 @@ extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
unsigned int, HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
extern bool emit_storent_insn (rtx to, rtx from);
@ -365,6 +366,7 @@ extern rtx clear_storage (rtx, rtx, enum block_op_methods);
extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods,
unsigned int, HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
/* The same, but always output an library call. */
rtx set_storage_via_libcall (rtx, rtx, rtx, bool);
@ -373,6 +375,7 @@ rtx set_storage_via_libcall (rtx, rtx, rtx, bool);
extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
unsigned int, HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,

View File

@ -8229,6 +8229,10 @@ maybe_gen_insn (enum insn_code icode, unsigned int nops,
return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
ops[3].value, ops[4].value, ops[5].value,
ops[6].value, ops[7].value);
case 9:
return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
ops[3].value, ops[4].value, ops[5].value,
ops[6].value, ops[7].value, ops[8].value);
}
gcc_unreachable ();
}

View File

@ -1,3 +1,7 @@
2013-11-19 Jan Hubicka <jh@suse.cz>
* gcc.target/i386/memcpy-3.c: New testcase.
2013-11-18 Jan Hubicka <jh@suse.cz>
Uros Bizjak <ubizjak@gmail.com>

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
void *a;
void *b;
t(int c)
{
if (c<10)
__builtin_memcpy (a,b,c);
}
/* Memcpy should be inlined because block size is known. */
/* { dg-final { scan-assembler-not "(jmp|call)\[\\t \]*memcpy" } } */