Generate offset adjusted operation for op_by_pieces operations
Add an overlap_op_by_pieces_p target hook for op_by_pieces operations between two areas of memory to generate one offset adjusted operation in the smallest integer mode for the remaining bytes on the last piece operation of a memory region to avoid doing more than one smaller operations. Pass the RTL information from the previous iteration to m_constfn in op_by_pieces operation so that builtin_memset_[read|gen]_str can generate the new RTL from the previous RTL. Tested on Linux/x86-64. gcc/ PR middle-end/90773 * builtins.c (builtin_memcpy_read_str): Add a dummy argument. (builtin_strncpy_read_str): Likewise. (builtin_memset_read_str): Add an argument for the previous RTL information and generate the new RTL from the previous RTL info. (builtin_memset_gen_str): Likewise. * builtins.h (builtin_strncpy_read_str): Update the prototype. (builtin_memset_read_str): Likewise. * expr.c (by_pieces_ninsns): If targetm.overlap_op_by_pieces_p() returns true, round up size and alignment to the widest integer mode for maximum size. (pieces_addr::adjust): Add a pointer to by_pieces_prev argument and pass it to m_constfn. (op_by_pieces_d): Add m_push and m_overlap_op_by_pieces. (op_by_pieces_d::op_by_pieces_d): Add a bool argument to initialize m_push. Initialize m_overlap_op_by_pieces with targetm.overlap_op_by_pieces_p (). (op_by_pieces_d::run): Pass the previous RTL information to pieces_addr::adjust and generate overlapping operations if m_overlap_op_by_pieces is true. (PUSHG_P): New. (move_by_pieces_d::move_by_pieces_d): Updated for op_by_pieces_d change. (store_by_pieces_d::store_by_pieces_d): Updated for op_by_pieces_d change. (can_store_by_pieces): Use by_pieces_constfn on constfun. (store_by_pieces): Use by_pieces_constfn on constfun. Updated for op_by_pieces_d change. (clear_by_pieces_1): Add a dummy argument. (clear_by_pieces): Updated for op_by_pieces_d change. (compare_by_pieces_d::compare_by_pieces_d): Likewise. (string_cst_read_str): Add a dummy argument. * expr.h (by_pieces_constfn): Add a dummy argument. (by_pieces_prev): New. * target.def (overlap_op_by_pieces_p): New target hook. * config/i386/i386.c (TARGET_OVERLAP_OP_BY_PIECES_P): New. * doc/tm.texi.in: Add TARGET_OVERLAP_OP_BY_PIECES_P. * doc/tm.texi: Regenerated. gcc/testsuite/ PR middle-end/90773 * g++.dg/pr90773-1.h: New test. * g++.dg/pr90773-1a.C: Likewise. * g++.dg/pr90773-1b.C: Likewise. * g++.dg/pr90773-1c.C: Likewise. * g++.dg/pr90773-1d.C: Likewise. * gcc.target/i386/pr90773-1.c: Likewise. * gcc.target/i386/pr90773-2.c: Likewise. * gcc.target/i386/pr90773-3.c: Likewise. * gcc.target/i386/pr90773-4.c: Likewise. * gcc.target/i386/pr90773-5.c: Likewise. * gcc.target/i386/pr90773-6.c: Likewise. * gcc.target/i386/pr90773-7.c: Likewise. * gcc.target/i386/pr90773-8.c: Likewise. * gcc.target/i386/pr90773-9.c: Likewise. * gcc.target/i386/pr90773-10.c: Likewise. * gcc.target/i386/pr90773-11.c: Likewise. * gcc.target/i386/pr90773-12.c: Likewise. * gcc.target/i386/pr90773-13.c: Likewise. * gcc.target/i386/pr90773-14.c: Likewise.
This commit is contained in:
parent
af4ccaa751
commit
985b3a6837
@ -128,7 +128,6 @@ static rtx expand_builtin_va_copy (tree);
|
||||
static rtx inline_expand_builtin_bytecmp (tree, rtx);
|
||||
static rtx expand_builtin_strcmp (tree, rtx);
|
||||
static rtx expand_builtin_strncmp (tree, rtx, machine_mode);
|
||||
static rtx builtin_memcpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
|
||||
static rtx expand_builtin_memchr (tree, rtx);
|
||||
static rtx expand_builtin_memcpy (tree, rtx);
|
||||
static rtx expand_builtin_memory_copy_args (tree dest, tree src, tree len,
|
||||
@ -145,7 +144,6 @@ static rtx expand_builtin_stpcpy (tree, rtx, machine_mode);
|
||||
static rtx expand_builtin_stpncpy (tree, rtx);
|
||||
static rtx expand_builtin_strncat (tree, rtx);
|
||||
static rtx expand_builtin_strncpy (tree, rtx);
|
||||
static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, scalar_int_mode);
|
||||
static rtx expand_builtin_memset (tree, rtx, machine_mode);
|
||||
static rtx expand_builtin_memset_args (tree, tree, tree, rtx, machine_mode, tree);
|
||||
static rtx expand_builtin_bzero (tree);
|
||||
@ -3860,7 +3858,7 @@ expand_builtin_strnlen (tree exp, rtx target, machine_mode target_mode)
|
||||
a target constant. */
|
||||
|
||||
static rtx
|
||||
builtin_memcpy_read_str (void *data, HOST_WIDE_INT offset,
|
||||
builtin_memcpy_read_str (void *data, void *, HOST_WIDE_INT offset,
|
||||
scalar_int_mode mode)
|
||||
{
|
||||
/* The REPresentation pointed to by DATA need not be a nul-terminated
|
||||
@ -6373,7 +6371,7 @@ expand_builtin_stpncpy (tree exp, rtx)
|
||||
constant. */
|
||||
|
||||
rtx
|
||||
builtin_strncpy_read_str (void *data, HOST_WIDE_INT offset,
|
||||
builtin_strncpy_read_str (void *data, void *, HOST_WIDE_INT offset,
|
||||
scalar_int_mode mode)
|
||||
{
|
||||
const char *str = (const char *) data;
|
||||
@ -6584,12 +6582,22 @@ expand_builtin_strncpy (tree exp, rtx target)
|
||||
|
||||
/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
|
||||
bytes from constant string DATA + OFFSET and return it as target
|
||||
constant. */
|
||||
constant. If PREV isn't nullptr, it has the RTL info from the
|
||||
previous iteration. */
|
||||
|
||||
rtx
|
||||
builtin_memset_read_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
|
||||
builtin_memset_read_str (void *data, void *prevp,
|
||||
HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
|
||||
scalar_int_mode mode)
|
||||
{
|
||||
by_pieces_prev *prev = (by_pieces_prev *) prevp;
|
||||
if (prev != nullptr && prev->data != nullptr)
|
||||
{
|
||||
/* Use the previous data in the same mode. */
|
||||
if (prev->mode == mode)
|
||||
return prev->data;
|
||||
}
|
||||
|
||||
const char *c = (const char *) data;
|
||||
char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
|
||||
|
||||
@ -6601,16 +6609,28 @@ builtin_memset_read_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
|
||||
/* Callback routine for store_by_pieces. Return the RTL of a register
|
||||
containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned
|
||||
char value given in the RTL register data. For example, if mode is
|
||||
4 bytes wide, return the RTL for 0x01010101*data. */
|
||||
4 bytes wide, return the RTL for 0x01010101*data. If PREV isn't
|
||||
nullptr, it has the RTL info from the previous iteration. */
|
||||
|
||||
static rtx
|
||||
builtin_memset_gen_str (void *data, HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
|
||||
builtin_memset_gen_str (void *data, void *prevp,
|
||||
HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
|
||||
scalar_int_mode mode)
|
||||
{
|
||||
rtx target, coeff;
|
||||
size_t size;
|
||||
char *p;
|
||||
|
||||
by_pieces_prev *prev = (by_pieces_prev *) prevp;
|
||||
if (prev != nullptr && prev->data != nullptr)
|
||||
{
|
||||
/* Use the previous data in the same mode. */
|
||||
if (prev->mode == mode)
|
||||
return prev->data;
|
||||
|
||||
return simplify_gen_subreg (mode, prev->data, prev->mode, 0);
|
||||
}
|
||||
|
||||
size = GET_MODE_SIZE (mode);
|
||||
if (size == 1)
|
||||
return (rtx) data;
|
||||
|
@ -110,8 +110,10 @@ extern void expand_builtin_update_setjmp_buf (rtx);
|
||||
extern tree mathfn_built_in (tree, enum built_in_function fn);
|
||||
extern tree mathfn_built_in (tree, combined_fn);
|
||||
extern tree mathfn_built_in_type (combined_fn);
|
||||
extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
|
||||
extern rtx builtin_memset_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
|
||||
extern rtx builtin_strncpy_read_str (void *, void *, HOST_WIDE_INT,
|
||||
scalar_int_mode);
|
||||
extern rtx builtin_memset_read_str (void *, void *, HOST_WIDE_INT,
|
||||
scalar_int_mode);
|
||||
extern rtx expand_builtin_saveregs (void);
|
||||
extern tree std_build_builtin_va_list (void);
|
||||
extern tree std_fn_abi_va_list (tree);
|
||||
|
@ -23538,6 +23538,9 @@ ix86_run_selftests (void)
|
||||
#undef TARGET_ADDRESS_COST
|
||||
#define TARGET_ADDRESS_COST ix86_address_cost
|
||||
|
||||
#undef TARGET_OVERLAP_OP_BY_PIECES_P
|
||||
#define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
|
||||
|
||||
#undef TARGET_FLAGS_REGNUM
|
||||
#define TARGET_FLAGS_REGNUM FLAGS_REG
|
||||
#undef TARGET_FIXED_CONDITION_CODE_REGS
|
||||
|
@ -6767,6 +6767,13 @@ in code size, for example where the number of insns emitted to perform a
|
||||
move would be greater than that of a library call.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_OVERLAP_OP_BY_PIECES_P (void)
|
||||
This target hook should return true if when the @code{by_pieces}
|
||||
infrastructure is used, an offset adjusted unaligned memory operation
|
||||
in the smallest integer mode for the last piece operation of a memory
|
||||
region can be generated to avoid doing more than one smaller operations.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_COMPARE_BY_PIECES_BRANCH_RATIO (machine_mode @var{mode})
|
||||
When expanding a block comparison in MODE, gcc can try to reduce the
|
||||
number of branches at the expense of more memory operations. This hook
|
||||
|
@ -4588,6 +4588,8 @@ If you don't define this, a reasonable default is used.
|
||||
|
||||
@hook TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
|
||||
|
||||
@hook TARGET_OVERLAP_OP_BY_PIECES_P
|
||||
|
||||
@hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO
|
||||
|
||||
@defmac MOVE_MAX_PIECES
|
||||
|
105
gcc/expr.c
105
gcc/expr.c
@ -815,12 +815,27 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
|
||||
unsigned int max_size, by_pieces_operation op)
|
||||
{
|
||||
unsigned HOST_WIDE_INT n_insns = 0;
|
||||
scalar_int_mode mode;
|
||||
|
||||
if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES)
|
||||
{
|
||||
/* NB: Round up L and ALIGN to the widest integer mode for
|
||||
MAX_SIZE. */
|
||||
mode = widest_int_mode_for_size (max_size);
|
||||
if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
|
||||
{
|
||||
unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
|
||||
if (up > l)
|
||||
l = up;
|
||||
align = GET_MODE_ALIGNMENT (mode);
|
||||
}
|
||||
}
|
||||
|
||||
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
|
||||
|
||||
while (max_size > 1 && l > 0)
|
||||
{
|
||||
scalar_int_mode mode = widest_int_mode_for_size (max_size);
|
||||
mode = widest_int_mode_for_size (max_size);
|
||||
enum insn_code icode;
|
||||
|
||||
unsigned int modesize = GET_MODE_SIZE (mode);
|
||||
@ -888,7 +903,8 @@ class pieces_addr
|
||||
void *m_cfndata;
|
||||
public:
|
||||
pieces_addr (rtx, bool, by_pieces_constfn, void *);
|
||||
rtx adjust (scalar_int_mode, HOST_WIDE_INT);
|
||||
rtx adjust (scalar_int_mode, HOST_WIDE_INT,
|
||||
by_pieces_prev * = nullptr);
|
||||
void increment_address (HOST_WIDE_INT);
|
||||
void maybe_predec (HOST_WIDE_INT);
|
||||
void maybe_postinc (HOST_WIDE_INT);
|
||||
@ -990,10 +1006,12 @@ pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse,
|
||||
but we still modify the MEM's properties. */
|
||||
|
||||
rtx
|
||||
pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset)
|
||||
pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset,
|
||||
by_pieces_prev *prev)
|
||||
{
|
||||
if (m_constfn)
|
||||
return m_constfn (m_cfndata, offset, mode);
|
||||
/* Pass the previous data to m_constfn. */
|
||||
return m_constfn (m_cfndata, prev, offset, mode);
|
||||
if (m_obj == NULL_RTX)
|
||||
return NULL_RTX;
|
||||
if (m_auto)
|
||||
@ -1051,6 +1069,10 @@ class op_by_pieces_d
|
||||
unsigned int m_align;
|
||||
unsigned int m_max_size;
|
||||
bool m_reverse;
|
||||
/* True if this is a stack push. */
|
||||
bool m_push;
|
||||
/* True if targetm.overlap_op_by_pieces_p () returns true. */
|
||||
bool m_overlap_op_by_pieces;
|
||||
|
||||
/* Virtual functions, overriden by derived classes for the specific
|
||||
operation. */
|
||||
@ -1062,7 +1084,7 @@ class op_by_pieces_d
|
||||
|
||||
public:
|
||||
op_by_pieces_d (rtx, bool, rtx, bool, by_pieces_constfn, void *,
|
||||
unsigned HOST_WIDE_INT, unsigned int);
|
||||
unsigned HOST_WIDE_INT, unsigned int, bool);
|
||||
void run ();
|
||||
};
|
||||
|
||||
@ -1077,10 +1099,11 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
|
||||
by_pieces_constfn from_cfn,
|
||||
void *from_cfn_data,
|
||||
unsigned HOST_WIDE_INT len,
|
||||
unsigned int align)
|
||||
unsigned int align, bool push)
|
||||
: m_to (to, to_load, NULL, NULL),
|
||||
m_from (from, from_load, from_cfn, from_cfn_data),
|
||||
m_len (len), m_max_size (MOVE_MAX_PIECES + 1)
|
||||
m_len (len), m_max_size (MOVE_MAX_PIECES + 1),
|
||||
m_push (push)
|
||||
{
|
||||
int toi = m_to.get_addr_inc ();
|
||||
int fromi = m_from.get_addr_inc ();
|
||||
@ -1109,6 +1132,8 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
|
||||
|
||||
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
|
||||
m_align = align;
|
||||
|
||||
m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p ();
|
||||
}
|
||||
|
||||
/* This function returns the largest usable integer mode for LEN bytes
|
||||
@ -1145,6 +1170,9 @@ op_by_pieces_d::run ()
|
||||
scalar_int_mode mode = widest_int_mode_for_size (m_max_size);
|
||||
mode = get_usable_mode (mode, m_len);
|
||||
|
||||
by_pieces_prev to_prev = { nullptr, mode };
|
||||
by_pieces_prev from_prev = { nullptr, mode };
|
||||
|
||||
do
|
||||
{
|
||||
unsigned int size = GET_MODE_SIZE (mode);
|
||||
@ -1155,8 +1183,12 @@ op_by_pieces_d::run ()
|
||||
if (m_reverse)
|
||||
m_offset -= size;
|
||||
|
||||
to1 = m_to.adjust (mode, m_offset);
|
||||
from1 = m_from.adjust (mode, m_offset);
|
||||
to1 = m_to.adjust (mode, m_offset, &to_prev);
|
||||
to_prev.data = to1;
|
||||
to_prev.mode = mode;
|
||||
from1 = m_from.adjust (mode, m_offset, &from_prev);
|
||||
from_prev.data = from1;
|
||||
from_prev.mode = mode;
|
||||
|
||||
m_to.maybe_predec (-(HOST_WIDE_INT)size);
|
||||
m_from.maybe_predec (-(HOST_WIDE_INT)size);
|
||||
@ -1177,9 +1209,32 @@ op_by_pieces_d::run ()
|
||||
if (m_len == 0)
|
||||
return;
|
||||
|
||||
/* NB: widest_int_mode_for_size checks SIZE > 1. */
|
||||
mode = widest_int_mode_for_size (size);
|
||||
mode = get_usable_mode (mode, m_len);
|
||||
if (!m_push && m_overlap_op_by_pieces)
|
||||
{
|
||||
/* NB: Generate overlapping operations if it is not a stack
|
||||
push since stack push must not overlap. Get the smallest
|
||||
integer mode for M_LEN bytes. */
|
||||
mode = smallest_int_mode_for_size (m_len * BITS_PER_UNIT);
|
||||
mode = get_usable_mode (mode, GET_MODE_SIZE (mode));
|
||||
int gap = GET_MODE_SIZE (mode) - m_len;
|
||||
if (gap > 0)
|
||||
{
|
||||
/* If size of MODE > M_LEN, generate the last operation
|
||||
in MODE for the remaining bytes with ovelapping memory
|
||||
from the previois operation. */
|
||||
if (m_reverse)
|
||||
m_offset += gap;
|
||||
else
|
||||
m_offset -= gap;
|
||||
m_len += gap;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* NB: widest_int_mode_for_size checks SIZE > 1. */
|
||||
mode = widest_int_mode_for_size (size);
|
||||
mode = get_usable_mode (mode, m_len);
|
||||
}
|
||||
}
|
||||
while (1);
|
||||
|
||||
@ -1190,6 +1245,12 @@ op_by_pieces_d::run ()
|
||||
/* Derived class from op_by_pieces_d, providing support for block move
|
||||
operations. */
|
||||
|
||||
#ifdef PUSH_ROUNDING
|
||||
#define PUSHG_P(to) ((to) == nullptr)
|
||||
#else
|
||||
#define PUSHG_P(to) false
|
||||
#endif
|
||||
|
||||
class move_by_pieces_d : public op_by_pieces_d
|
||||
{
|
||||
insn_gen_fn m_gen_fun;
|
||||
@ -1199,7 +1260,8 @@ class move_by_pieces_d : public op_by_pieces_d
|
||||
public:
|
||||
move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
|
||||
unsigned int align)
|
||||
: op_by_pieces_d (to, false, from, true, NULL, NULL, len, align)
|
||||
: op_by_pieces_d (to, false, from, true, NULL, NULL, len, align,
|
||||
PUSHG_P (to))
|
||||
{
|
||||
}
|
||||
rtx finish_retmode (memop_ret);
|
||||
@ -1294,7 +1356,8 @@ class store_by_pieces_d : public op_by_pieces_d
|
||||
public:
|
||||
store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
|
||||
unsigned HOST_WIDE_INT len, unsigned int align)
|
||||
: op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, align)
|
||||
: op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len,
|
||||
align, false)
|
||||
{
|
||||
}
|
||||
rtx finish_retmode (memop_ret);
|
||||
@ -1349,7 +1412,7 @@ store_by_pieces_d::finish_retmode (memop_ret retmode)
|
||||
|
||||
int
|
||||
can_store_by_pieces (unsigned HOST_WIDE_INT len,
|
||||
rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode),
|
||||
by_pieces_constfn constfun,
|
||||
void *constfundata, unsigned int align, bool memsetp)
|
||||
{
|
||||
unsigned HOST_WIDE_INT l;
|
||||
@ -1396,7 +1459,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
|
||||
if (reverse)
|
||||
offset -= size;
|
||||
|
||||
cst = (*constfun) (constfundata, offset, mode);
|
||||
cst = (*constfun) (constfundata, nullptr, offset, mode);
|
||||
if (!targetm.legitimate_constant_p (mode, cst))
|
||||
return 0;
|
||||
|
||||
@ -1426,7 +1489,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
|
||||
|
||||
rtx
|
||||
store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
|
||||
rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode),
|
||||
by_pieces_constfn constfun,
|
||||
void *constfundata, unsigned int align, bool memsetp,
|
||||
memop_ret retmode)
|
||||
{
|
||||
@ -1454,7 +1517,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
|
||||
Return const0_rtx unconditionally. */
|
||||
|
||||
static rtx
|
||||
clear_by_pieces_1 (void *, HOST_WIDE_INT, scalar_int_mode)
|
||||
clear_by_pieces_1 (void *, void *, HOST_WIDE_INT, scalar_int_mode)
|
||||
{
|
||||
return const0_rtx;
|
||||
}
|
||||
@ -1490,7 +1553,8 @@ class compare_by_pieces_d : public op_by_pieces_d
|
||||
compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
|
||||
void *op1_cfn_data, HOST_WIDE_INT len, int align,
|
||||
rtx_code_label *fail_label)
|
||||
: op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, align)
|
||||
: op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len,
|
||||
align, false)
|
||||
{
|
||||
m_fail_label = fail_label;
|
||||
}
|
||||
@ -5676,7 +5740,8 @@ emit_storent_insn (rtx to, rtx from)
|
||||
/* Helper function for store_expr storing of STRING_CST. */
|
||||
|
||||
static rtx
|
||||
string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
|
||||
string_cst_read_str (void *data, void *, HOST_WIDE_INT offset,
|
||||
scalar_int_mode mode)
|
||||
{
|
||||
tree str = (tree) data;
|
||||
|
||||
|
10
gcc/expr.h
10
gcc/expr.h
@ -107,7 +107,15 @@ enum block_op_methods
|
||||
BLOCK_OP_NO_LIBCALL_RET
|
||||
};
|
||||
|
||||
typedef rtx (*by_pieces_constfn) (void *, HOST_WIDE_INT, scalar_int_mode);
|
||||
typedef rtx (*by_pieces_constfn) (void *, void *, HOST_WIDE_INT,
|
||||
scalar_int_mode);
|
||||
|
||||
/* The second pointer passed to by_pieces_constfn. */
|
||||
struct by_pieces_prev
|
||||
{
|
||||
rtx data;
|
||||
scalar_int_mode mode;
|
||||
};
|
||||
|
||||
extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
|
||||
extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
|
||||
|
@ -3642,6 +3642,15 @@ move would be greater than that of a library call.",
|
||||
enum by_pieces_operation op, bool speed_p),
|
||||
default_use_by_pieces_infrastructure_p)
|
||||
|
||||
DEFHOOK
|
||||
(overlap_op_by_pieces_p,
|
||||
"This target hook should return true if when the @code{by_pieces}\n\
|
||||
infrastructure is used, an offset adjusted unaligned memory operation\n\
|
||||
in the smallest integer mode for the last piece operation of a memory\n\
|
||||
region can be generated to avoid doing more than one smaller operations.",
|
||||
bool, (void),
|
||||
hook_bool_void_false)
|
||||
|
||||
DEFHOOK
|
||||
(compare_by_pieces_branch_ratio,
|
||||
"When expanding a block comparison in MODE, gcc can try to reduce the\n\
|
||||
|
14
gcc/testsuite/g++.dg/pr90773-1.h
Normal file
14
gcc/testsuite/g++.dg/pr90773-1.h
Normal file
@ -0,0 +1,14 @@
|
||||
class fixed_wide_int_storage {
|
||||
public:
|
||||
long val[10];
|
||||
int len;
|
||||
fixed_wide_int_storage ()
|
||||
{
|
||||
len = sizeof (val) / sizeof (val[0]);
|
||||
for (int i = 0; i < len; i++)
|
||||
val[i] = i;
|
||||
}
|
||||
};
|
||||
|
||||
extern void foo (fixed_wide_int_storage);
|
||||
extern int record_increment(void);
|
13
gcc/testsuite/g++.dg/pr90773-1a.C
Normal file
13
gcc/testsuite/g++.dg/pr90773-1a.C
Normal file
@ -0,0 +1,13 @@
|
||||
// { dg-do compile }
|
||||
// { dg-options "-O2" }
|
||||
// { dg-additional-options "-mno-avx -msse2 -mtune=skylake" { target { i?86-*-* x86_64-*-* } } }
|
||||
|
||||
#include "pr90773-1.h"
|
||||
|
||||
int
|
||||
record_increment(void)
|
||||
{
|
||||
fixed_wide_int_storage x;
|
||||
foo (x);
|
||||
return 0;
|
||||
}
|
5
gcc/testsuite/g++.dg/pr90773-1b.C
Normal file
5
gcc/testsuite/g++.dg/pr90773-1b.C
Normal file
@ -0,0 +1,5 @@
|
||||
// { dg-do compile }
|
||||
// { dg-options "-O2" }
|
||||
// { dg-additional-options "-mno-avx512f -march=skylake" { target { i?86-*-* x86_64-*-* } } }
|
||||
|
||||
#include "pr90773-1a.C"
|
5
gcc/testsuite/g++.dg/pr90773-1c.C
Normal file
5
gcc/testsuite/g++.dg/pr90773-1c.C
Normal file
@ -0,0 +1,5 @@
|
||||
// { dg-do compile }
|
||||
// { dg-options "-O2" }
|
||||
// { dg-additional-options "-march=skylake-avx512" { target { i?86-*-* x86_64-*-* } } }
|
||||
|
||||
#include "pr90773-1a.C"
|
19
gcc/testsuite/g++.dg/pr90773-1d.C
Normal file
19
gcc/testsuite/g++.dg/pr90773-1d.C
Normal file
@ -0,0 +1,19 @@
|
||||
// { dg-do run }
|
||||
// { dg-options "-O2" }
|
||||
// { dg-additional-options "-march=native" { target { i?86-*-* x86_64-*-* } } }
|
||||
// { dg-additional-sources "pr90773-1a.C" }
|
||||
|
||||
#include "pr90773-1.h"
|
||||
|
||||
void
|
||||
foo (fixed_wide_int_storage x)
|
||||
{
|
||||
for (int i = 0; i < x.len; i++)
|
||||
if (x.val[i] != i)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
return record_increment ();
|
||||
}
|
17
gcc/testsuite/gcc.target/i386/pr90773-1.c
Normal file
17
gcc/testsuite/gcc.target/i386/pr90773-1.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
|
||||
extern char *dst, *src;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memcpy (dst, src, 15);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movq\[\\t \]+\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movq\[\\t \]+7\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+4\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+8\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+11\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-10.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-10.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (int c)
|
||||
{
|
||||
__builtin_memset (dst, c, 5);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+.+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movb\[\\t \]+.+, 4\\(%\[\^,\]+\\)" 1 } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-11.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-11.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (int c)
|
||||
{
|
||||
__builtin_memset (dst, c, 6);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+.+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 4\\(%\[\^,\]+\\)" 1 } } */
|
11
gcc/testsuite/gcc.target/i386/pr90773-12.c
Normal file
11
gcc/testsuite/gcc.target/i386/pr90773-12.c
Normal file
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */
|
||||
|
||||
void
|
||||
foo (char *dst, char *src)
|
||||
{
|
||||
__builtin_memcpy (dst, src, 255);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movdqu\[\\t \]+\[0-9\]*\\(%\[\^,\]+\\)," 16 } } */
|
||||
/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */
|
11
gcc/testsuite/gcc.target/i386/pr90773-13.c
Normal file
11
gcc/testsuite/gcc.target/i386/pr90773-13.c
Normal file
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */
|
||||
|
||||
void
|
||||
foo (char *dst)
|
||||
{
|
||||
__builtin_memset (dst, 0, 255);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \[0-9\]*\\(%\[\^,\]+\\)" 16 } } */
|
||||
/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-14.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-14.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memset (dst, 1, 20);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+\\\$16843009, 16\\(%\[\^,\]+\\)" 1 } } */
|
20
gcc/testsuite/gcc.target/i386/pr90773-2.c
Normal file
20
gcc/testsuite/gcc.target/i386/pr90773-2.c
Normal file
@ -0,0 +1,20 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
/* { dg-additional-options "-mno-avx -msse2" { target { ! ia32 } } } */
|
||||
/* { dg-additional-options "-mno-sse" { target ia32 } } */
|
||||
|
||||
extern char *dst, *src;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memcpy (dst, src, 19);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movdqu\[\\t \]+\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+15\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+4\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+8\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+12\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+15\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
23
gcc/testsuite/gcc.target/i386/pr90773-3.c
Normal file
23
gcc/testsuite/gcc.target/i386/pr90773-3.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
/* { dg-additional-options "-mno-avx -msse2" { target { ! ia32 } } } */
|
||||
/* { dg-additional-options "-mno-sse" { target ia32 } } */
|
||||
|
||||
extern char *dst, *src;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memcpy (dst, src, 31);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movdqu\[\\t \]+\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movdqu\[\\t \]+15\\(%\[\^,\]+\\)," 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+4\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+8\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+12\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+16\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+20\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+24\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+27\\(%\[\^,\]+\\)," 1 { target ia32 } } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-4.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-4.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memset (dst, 0, 31);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, 15\\(%\[\^,\]+\\)" 1 } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-5.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-5.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memset (dst, 0, 21);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movq\[\\t \]+\\\$0+, 13\\(%\[\^,\]+\\)" 1 } } */
|
11
gcc/testsuite/gcc.target/i386/pr90773-6.c
Normal file
11
gcc/testsuite/gcc.target/i386/pr90773-6.c
Normal file
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
|
||||
|
||||
void
|
||||
foo (char *dst, char *src)
|
||||
{
|
||||
__builtin_memcpy (dst, src, 255);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movdqu\[\\t \]+\[0-9\]*\\(%\[\^,\]+\\)," 16 } } */
|
||||
/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */
|
11
gcc/testsuite/gcc.target/i386/pr90773-7.c
Normal file
11
gcc/testsuite/gcc.target/i386/pr90773-7.c
Normal file
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mno-avx -msse2 -mtune=skylake" } */
|
||||
|
||||
void
|
||||
foo (char *dst)
|
||||
{
|
||||
__builtin_memset (dst, 0, 255);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movups\[\\t \]+%xmm\[0-9\]+, \[0-9\]*\\(%\[\^,\]+\\)" 16 } } */
|
||||
/* { dg-final { scan-assembler-not "mov\[bwlq\]" } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-8.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-8.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memset (dst, 0, 5);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+.+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movb\[\\t \]+.+, 4\\(%\[\^,\]+\\)" 1 } } */
|
13
gcc/testsuite/gcc.target/i386/pr90773-9.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr90773-9.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=generic" } */
|
||||
|
||||
extern char *dst;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
__builtin_memset (dst, 0, 6);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "movl\[\\t \]+.+, \\(%\[\^,\]+\\)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 4\\(%\[\^,\]+\\)" 1 } } */
|
Loading…
Reference in New Issue
Block a user