re PR tree-optimization/52171 (memcmp/strcmp/strncmp can be optimized when the result is tested for [in]equality with 0)

PR tree-optimization/52171
        * builtins.c (expand_cmpstrn_or_cmpmem): Delete, moved elsewhere.
        (expand_builtin_memcmp): New arg RESULT_EQ.  All callers changed.
        Look for constant strings.  Move some code to emit_block_cmp_hints
        and use it.
        * builtins.def (BUILT_IN_MEMCMP_EQ): New.
        * defaults.h (COMPARE_MAX_PIECES): New macro.
        * expr.c (move_by_pieces_d, store_by_pieces_d): Remove old structs.
        (move_by_pieces_1, store_by_pieces_1, store_by_pieces_2): Remvoe.
        (clear_by_pieces_1): Don't declare.  Move definition before use.
        (can_do_by_pieces): New static function.
        (can_move_by_pieces): Use it.  Return bool.
        (by_pieces_ninsns): Renamed from move_by_pieces_ninsns.  New arg
        OP.  All callers changed.  Handle COMPARE_BY_PIECES.
        (class pieces_addr); New.
        (pieces_addr::pieces_addr, pieces_addr::decide_autoinc,
        pieces_addr::adjust, pieces_addr::increment_address,
        pieces_addr::maybe_predec, pieces_addr::maybe_postinc): New member
        functions for it.
        (class op_by_pieces_d): New.
        (op_by_pieces_d::op_by_pieces_d, op_by_pieces_d::run): New member
        functions for it.
        (class move_by_pieces_d, class compare_by_pieces_d,
        class store_by_pieces_d): New subclasses of op_by_pieces_d.
        (move_by_pieces_d::prepare_mode, move_by_pieces_d::generate,
        move_by_pieces_d::finish_endp, store_by_pieces_d::prepare_mode,
        store_by_pieces_d::generate, store_by_pieces_d::finish_endp,
        compare_by_pieces_d::generate, compare_by_pieces_d::prepare_mode,
        compare_by_pieces_d::finish_mode): New member functions.
        (compare_by_pieces, emit_block_cmp_via_cmpmem): New static
        functions.
        (expand_cmpstrn_or_cmpmem): Moved here from builtins.c.
        (emit_block_cmp_hints): New function.
        (move_by_pieces, store_by_pieces, clear_by_pieces): Rewrite to just
        use the newly defined classes.
        * expr.h (by_pieces_constfn): New typedef.
        (can_store_by_pieces, store_by_pieces): Use it in arg declarations.
        (emit_block_cmp_hints, expand_cmpstrn_or_cmpmem): Declare.
        (move_by_pieces_ninsns): Don't declare.
        (can_move_by_pieces): Change return value to bool.
        * target.def (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Update docs.
        (compare_by_pieces_branch_ratio): New hook.
        * target.h (enum by_pieces_operation): Add COMPARE_BY_PIECES.
        (by_pieces_ninsns): Declare.
        * targethooks.c (default_use_by_pieces_infrastructure_p): Handle
        COMPARE_BY_PIECES.
        (default_compare_by_pieces_branch_ratio): New function.
        * targhooks.h (default_compare_by_pieces_branch_ratio): Declare.
        * doc/tm.texi.in (STORE_MAX_PIECES, COMPARE_MAX_PIECES): Document.
        * doc/tm.texi: Regenerate.
        * tree-ssa-strlen.c: Include "builtins.h".
        (handle_builtin_memcmp): New static function.
        (strlen_optimize_stmt): Call it for BUILT_IN_MEMCMP.
        * tree.c (build_common_builtin_nodes): Create __builtin_memcmp_eq.

testsuite/
        PR tree-optimization/52171
        * gcc.dg/pr52171.c: New test.
        * gcc.target/i386/pr52171.c: New test.

From-SVN: r237069
This commit is contained in:
Bernd Schmidt 2016-06-03 14:20:53 +00:00 committed by Bernd Schmidt
parent bfeee8acaa
commit 36b85e4328
17 changed files with 1183 additions and 639 deletions

View File

@ -1,3 +1,60 @@
2016-06-03 Bernd Schmidt <bschmidt@redhat.com>
PR tree-optimization/52171
* builtins.c (expand_cmpstrn_or_cmpmem): Delete, moved elsewhere.
(expand_builtin_memcmp): New arg RESULT_EQ. All callers changed.
Look for constant strings. Move some code to emit_block_cmp_hints
and use it.
* builtins.def (BUILT_IN_MEMCMP_EQ): New.
* defaults.h (COMPARE_MAX_PIECES): New macro.
* expr.c (move_by_pieces_d, store_by_pieces_d): Remove old structs.
(move_by_pieces_1, store_by_pieces_1, store_by_pieces_2): Remvoe.
(clear_by_pieces_1): Don't declare. Move definition before use.
(can_do_by_pieces): New static function.
(can_move_by_pieces): Use it. Return bool.
(by_pieces_ninsns): Renamed from move_by_pieces_ninsns. New arg
OP. All callers changed. Handle COMPARE_BY_PIECES.
(class pieces_addr); New.
(pieces_addr::pieces_addr, pieces_addr::decide_autoinc,
pieces_addr::adjust, pieces_addr::increment_address,
pieces_addr::maybe_predec, pieces_addr::maybe_postinc): New member
functions for it.
(class op_by_pieces_d): New.
(op_by_pieces_d::op_by_pieces_d, op_by_pieces_d::run): New member
functions for it.
(class move_by_pieces_d, class compare_by_pieces_d,
class store_by_pieces_d): New subclasses of op_by_pieces_d.
(move_by_pieces_d::prepare_mode, move_by_pieces_d::generate,
move_by_pieces_d::finish_endp, store_by_pieces_d::prepare_mode,
store_by_pieces_d::generate, store_by_pieces_d::finish_endp,
compare_by_pieces_d::generate, compare_by_pieces_d::prepare_mode,
compare_by_pieces_d::finish_mode): New member functions.
(compare_by_pieces, emit_block_cmp_via_cmpmem): New static
functions.
(expand_cmpstrn_or_cmpmem): Moved here from builtins.c.
(emit_block_cmp_hints): New function.
(move_by_pieces, store_by_pieces, clear_by_pieces): Rewrite to just
use the newly defined classes.
* expr.h (by_pieces_constfn): New typedef.
(can_store_by_pieces, store_by_pieces): Use it in arg declarations.
(emit_block_cmp_hints, expand_cmpstrn_or_cmpmem): Declare.
(move_by_pieces_ninsns): Don't declare.
(can_move_by_pieces): Change return value to bool.
* target.def (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Update docs.
(compare_by_pieces_branch_ratio): New hook.
* target.h (enum by_pieces_operation): Add COMPARE_BY_PIECES.
(by_pieces_ninsns): Declare.
* targethooks.c (default_use_by_pieces_infrastructure_p): Handle
COMPARE_BY_PIECES.
(default_compare_by_pieces_branch_ratio): New function.
* targhooks.h (default_compare_by_pieces_branch_ratio): Declare.
* doc/tm.texi.in (STORE_MAX_PIECES, COMPARE_MAX_PIECES): Document.
* doc/tm.texi: Regenerate.
* tree-ssa-strlen.c: Include "builtins.h".
(handle_builtin_memcmp): New static function.
(strlen_optimize_stmt): Call it for BUILT_IN_MEMCMP.
* tree.c (build_common_builtin_nodes): Create __builtin_memcmp_eq.
2016-06-03 Alan Hayward <alan.hayward@arm.com>
* tree-vect-stmts.c (vect_stmt_relevant_p): Do not vectorize non live

View File

@ -3671,53 +3671,24 @@ expand_cmpstr (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
return NULL_RTX;
}
/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success,
otherwise return null. */
static rtx
expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
HOST_WIDE_INT align)
{
machine_mode insn_mode = insn_data[icode].operand[0].mode;
if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
target = NULL_RTX;
struct expand_operand ops[5];
create_output_operand (&ops[0], target, insn_mode);
create_fixed_operand (&ops[1], arg1_rtx);
create_fixed_operand (&ops[2], arg2_rtx);
create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
TYPE_UNSIGNED (arg3_type));
create_integer_operand (&ops[4], align);
if (maybe_expand_insn (icode, 5, ops))
return ops[0].value;
return NULL_RTX;
}
/* Expand expression EXP, which is a call to the memcmp built-in function.
Return NULL_RTX if we failed and the caller should emit a normal call,
otherwise try to get the result in TARGET, if convenient. */
otherwise try to get the result in TARGET, if convenient.
RESULT_EQ is true if we can relax the returned value to be either zero
or nonzero, without caring about the sign. */
static rtx
expand_builtin_memcmp (tree exp, rtx target)
expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
{
if (!validate_arglist (exp,
POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
return NULL_RTX;
/* Note: The cmpstrnsi pattern, if it exists, is not suitable for
implementing memcmp because it will stop if it encounters two
zero bytes. */
insn_code icode = direct_optab_handler (cmpmem_optab, SImode);
if (icode == CODE_FOR_nothing)
return NULL_RTX;
tree arg1 = CALL_EXPR_ARG (exp, 0);
tree arg2 = CALL_EXPR_ARG (exp, 1);
tree len = CALL_EXPR_ARG (exp, 2);
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
location_t loc = EXPR_LOCATION (exp);
unsigned int arg1_align = get_pointer_alignment (arg1) / BITS_PER_UNIT;
unsigned int arg2_align = get_pointer_alignment (arg2) / BITS_PER_UNIT;
@ -3726,22 +3697,38 @@ expand_builtin_memcmp (tree exp, rtx target)
if (arg1_align == 0 || arg2_align == 0)
return NULL_RTX;
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
location_t loc = EXPR_LOCATION (exp);
rtx arg1_rtx = get_memory_rtx (arg1, len);
rtx arg2_rtx = get_memory_rtx (arg2, len);
rtx arg3_rtx = expand_normal (fold_convert_loc (loc, sizetype, len));
rtx len_rtx = expand_normal (fold_convert_loc (loc, sizetype, len));
/* Set MEM_SIZE as appropriate. */
if (CONST_INT_P (arg3_rtx))
if (CONST_INT_P (len_rtx))
{
set_mem_size (arg1_rtx, INTVAL (arg3_rtx));
set_mem_size (arg2_rtx, INTVAL (arg3_rtx));
set_mem_size (arg1_rtx, INTVAL (len_rtx));
set_mem_size (arg2_rtx, INTVAL (len_rtx));
}
rtx result = expand_cmpstrn_or_cmpmem (icode, target, arg1_rtx, arg2_rtx,
TREE_TYPE (len), arg3_rtx,
MIN (arg1_align, arg2_align));
by_pieces_constfn constfn = NULL;
const char *src_str = c_getstr (arg1);
if (src_str == NULL)
src_str = c_getstr (arg2);
else
std::swap (arg1_rtx, arg2_rtx);
/* If SRC is a string constant and block move would be done
by pieces, we can avoid loading the string from memory
and only stored the computed constants. */
if (src_str
&& CONST_INT_P (len_rtx)
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1)
constfn = builtin_memcpy_read_str;
rtx result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx,
TREE_TYPE (len), target,
result_eq, constfn,
CONST_CAST (char *, src_str));
if (result)
{
/* Return the value in the proper mode for this function. */
@ -6073,9 +6060,15 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
case BUILT_IN_BCMP:
case BUILT_IN_MEMCMP:
target = expand_builtin_memcmp (exp, target);
case BUILT_IN_MEMCMP_EQ:
target = expand_builtin_memcmp (exp, target, fcode == BUILT_IN_MEMCMP_EQ);
if (target)
return target;
if (fcode == BUILT_IN_MEMCMP_EQ)
{
tree newdecl = builtin_decl_explicit (BUILT_IN_MEMCMP);
TREE_OPERAND (exp, 1) = build_fold_addr_expr (newdecl);
}
break;
case BUILT_IN_SETJMP:

View File

@ -864,6 +864,10 @@ DEF_BUILTIN_STUB (BUILT_IN_STACK_SAVE, "__builtin_stack_save")
DEF_BUILTIN_STUB (BUILT_IN_STACK_RESTORE, "__builtin_stack_restore")
DEF_BUILTIN_STUB (BUILT_IN_ALLOCA_WITH_ALIGN, "__builtin_alloca_with_align")
/* An internal version of memcmp, used when the result is only tested for
equality with zero. */
DEF_BUILTIN_STUB (BUILT_IN_MEMCMP_EQ, "__builtin_memcmp_eq")
/* Object size checking builtins. */
DEF_GCC_BUILTIN (BUILT_IN_OBJECT_SIZE, "object_size", BT_FN_SIZE_CONST_PTR_INT, ATTR_PURE_NOTHROW_LEAF_LIST)
DEF_EXT_LIB_BUILTIN_CHKP (BUILT_IN_MEMCPY_CHK, "__memcpy_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)

View File

@ -1039,6 +1039,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
#endif
/* Likewise for block comparisons. */
#ifndef COMPARE_MAX_PIECES
#define COMPARE_MAX_PIECES MOVE_MAX_PIECES
#endif
#ifndef MAX_MOVE_MAX
#define MAX_MOVE_MAX MOVE_MAX
#endif

View File

@ -6315,8 +6315,9 @@ Both @var{size} and @var{alignment} are measured in terms of storage
units.
The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.
These describe the type of memory operation under consideration.
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES} or
@code{COMPARE_BY_PIECES}. These describe the type of memory operation
under consideration.
The parameter @var{speed_p} is true if the code is currently being
optimized for speed rather than size.
@ -6333,11 +6334,33 @@ in code size, for example where the number of insns emitted to perform a
move would be greater than that of a library call.
@end deftypefn
@deftypefn {Target Hook} int TARGET_COMPARE_BY_PIECES_BRANCH_RATIO (machine_mode @var{mode})
When expanding a block comparison in MODE, gcc can try to reduce the
number of branches at the expense of more memory operations. This hook
allows the target to override the default choice. It should return the
factor by which branches should be reduced over the plain expansion with
one comparison per @var{mode}-sized piece. A port can also prevent a
particular mode from being used for block comparisons by returning a
negative number from this hook.
@end deftypefn
@defmac MOVE_MAX_PIECES
A C expression used by @code{move_by_pieces} to determine the largest unit
a load or store used to copy memory is. Defaults to @code{MOVE_MAX}.
@end defmac
@defmac STORE_MAX_PIECES
A C expression used by @code{store_by_pieces} to determine the largest unit
a store used to memory is. Defaults to @code{MOVE_MAX_PIECES}, or two times
the size of @code{HOST_WIDE_INT}, whichever is smaller.
@end defmac
@defmac COMPARE_MAX_PIECES
A C expression used by @code{compare_by_pieces} to determine the largest unit
a load or store used to compare memory is. Defaults to
@code{MOVE_MAX_PIECES}.
@end defmac
@defmac CLEAR_RATIO (@var{speed})
The threshold of number of scalar move insns, @emph{below} which a sequence
of insns should be generated to clear memory instead of a string clear insn

View File

@ -4653,11 +4653,25 @@ If you don't define this, a reasonable default is used.
@hook TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
@hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO
@defmac MOVE_MAX_PIECES
A C expression used by @code{move_by_pieces} to determine the largest unit
a load or store used to copy memory is. Defaults to @code{MOVE_MAX}.
@end defmac
@defmac STORE_MAX_PIECES
A C expression used by @code{store_by_pieces} to determine the largest unit
a store used to memory is. Defaults to @code{MOVE_MAX_PIECES}, or two times
the size of @code{HOST_WIDE_INT}, whichever is smaller.
@end defmac
@defmac COMPARE_MAX_PIECES
A C expression used by @code{compare_by_pieces} to determine the largest unit
a load or store used to compare memory is. Defaults to
@code{MOVE_MAX_PIECES}.
@end defmac
@defmac CLEAR_RATIO (@var{speed})
The threshold of number of scalar move insns, @emph{below} which a sequence
of insns should be generated to clear memory instead of a string clear insn

1398
gcc/expr.c

File diff suppressed because it is too large Load Diff

View File

@ -103,12 +103,16 @@ enum block_op_methods
BLOCK_OP_TAILCALL
};
typedef rtx (*by_pieces_constfn) (void *, HOST_WIDE_INT, machine_mode);
extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
unsigned int, HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
by_pieces_constfn, void *);
extern bool emit_storent_insn (rtx to, rtx from);
/* Copy all or part of a value X into registers starting at REGNO.
@ -173,6 +177,11 @@ extern void use_regs (rtx *, int, int);
/* Mark a PARALLEL as holding a parameter for the next CALL_INSN. */
extern void use_group_regs (rtx *, rtx);
#ifdef GCC_INSN_CODES_H
extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
HOST_WIDE_INT);
#endif
/* Write zeros through the storage of OBJECT.
If OBJECT has BLKmode, SIZE is its length in bytes. */
extern rtx clear_storage (rtx, rtx, enum block_op_methods);
@ -191,10 +200,6 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
unsigned int,
unsigned int);
/* Return nonzero if it is desirable to store LEN bytes generated by
CONSTFUN with several move instructions by store_by_pieces
function. CONSTFUNDATA is a pointer which will be passed as argument
@ -203,8 +208,7 @@ extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
MEMSETP is true if this is a real memset/bzero, not a copy
of a const string. */
extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
rtx (*) (void *, HOST_WIDE_INT,
machine_mode),
by_pieces_constfn,
void *, unsigned int, bool);
/* Generate several move instructions to store LEN bytes generated by
@ -213,8 +217,7 @@ extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
ALIGN is maximum alignment we can assume.
MEMSETP is true if this is a real memset/bzero, not a copy.
Returns TO + LEN. */
extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT,
rtx (*) (void *, HOST_WIDE_INT, machine_mode),
extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT, by_pieces_constfn,
void *, unsigned int, bool, int);
/* Emit insns to set X from Y. */
@ -295,7 +298,7 @@ rtx get_personality_function (tree);
/* Determine whether the LEN bytes can be moved by using several move
instructions. Return nonzero if a call to move_by_pieces should
succeed. */
extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
extern bool can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);

View File

@ -3397,8 +3397,9 @@ Both @var{size} and @var{alignment} are measured in terms of storage\n\
units.\n\
\n\
The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\
These describe the type of memory operation under consideration.\n\
@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES} or\n\
@code{COMPARE_BY_PIECES}. These describe the type of memory operation\n\
under consideration.\n\
\n\
The parameter @var{speed_p} is true if the code is currently being\n\
optimized for speed rather than size.\n\
@ -3417,6 +3418,18 @@ move would be greater than that of a library call.",
enum by_pieces_operation op, bool speed_p),
default_use_by_pieces_infrastructure_p)
DEFHOOK
(compare_by_pieces_branch_ratio,
"When expanding a block comparison in MODE, gcc can try to reduce the\n\
number of branches at the expense of more memory operations. This hook\n\
allows the target to override the default choice. It should return the\n\
factor by which branches should be reduced over the plain expansion with\n\
one comparison per @var{mode}-sized piece. A port can also prevent a\n\
particular mode from being used for block comparisons by returning a\n\
negative number from this hook.",
int, (machine_mode mode),
default_compare_by_pieces_branch_ratio)
DEFHOOK
(optab_supported_p,
"Return true if the optimizers should use optab @var{op} with\n\

View File

@ -79,16 +79,23 @@ enum print_switch_type
};
/* Types of memory operation understood by the "by_pieces" infrastructure.
Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook. */
Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook and
internally by the functions in expr.c. */
enum by_pieces_operation
{
CLEAR_BY_PIECES,
MOVE_BY_PIECES,
SET_BY_PIECES,
STORE_BY_PIECES
STORE_BY_PIECES,
COMPARE_BY_PIECES
};
extern unsigned HOST_WIDE_INT by_pieces_ninsns (unsigned HOST_WIDE_INT,
unsigned int,
unsigned int,
by_pieces_operation);
typedef int (* print_switch_fn_type) (print_switch_type, const char *);
/* An example implementation for ELF targets. Defined in varasm.c */

View File

@ -1482,25 +1482,40 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
switch (op)
{
case CLEAR_BY_PIECES:
max_size = STORE_MAX_PIECES;
ratio = CLEAR_RATIO (speed_p);
break;
case MOVE_BY_PIECES:
max_size = MOVE_MAX_PIECES;
ratio = get_move_ratio (speed_p);
break;
case SET_BY_PIECES:
max_size = STORE_MAX_PIECES;
ratio = SET_RATIO (speed_p);
break;
case STORE_BY_PIECES:
max_size = STORE_MAX_PIECES;
ratio = get_move_ratio (speed_p);
break;
case CLEAR_BY_PIECES:
max_size = STORE_MAX_PIECES;
ratio = CLEAR_RATIO (speed_p);
break;
case MOVE_BY_PIECES:
max_size = MOVE_MAX_PIECES;
ratio = get_move_ratio (speed_p);
break;
case SET_BY_PIECES:
max_size = STORE_MAX_PIECES;
ratio = SET_RATIO (speed_p);
break;
case STORE_BY_PIECES:
max_size = STORE_MAX_PIECES;
ratio = get_move_ratio (speed_p);
break;
case COMPARE_BY_PIECES:
max_size = COMPARE_MAX_PIECES;
/* Pick a likely default, just as in get_move_ratio. */
ratio = speed_p ? 15 : 3;
break;
}
return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
return by_pieces_ninsns (size, alignment, max_size + 1, op) < ratio;
}
/* This hook controls code generation for expanding a memcmp operation by
pieces. Return 1 for the normal pattern of compare/jump after each pair
of loads, or a higher number to reduce the number of branches. */
int
default_compare_by_pieces_branch_ratio (machine_mode)
{
return 1;
}
bool

View File

@ -199,6 +199,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
unsigned int,
enum by_pieces_operation,
bool);
extern int default_compare_by_pieces_branch_ratio (machine_mode);
extern bool default_profile_before_prologue (void);
extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);

View File

@ -1,3 +1,9 @@
2016-06-03 Bernd Schmidt <bschmidt@redhat.com>
PR tree-optimization/52171
* gcc.dg/pr52171.c: New test.
* gcc.target/i386/pr52171.c: New test.
2016-06-03 Jan Hubicka <jh@suse.cz>
* g++.dg/tree-ssa/pred-1.C: New testcase

View File

@ -0,0 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-not "memcmp" } } */
#include <string.h>
struct A { int x; } a, b;
extern char s[], t[];
int foo ()
{
return memcmp (&a, &b, sizeof (struct A)) == 0;
}

View File

@ -0,0 +1,23 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-not "memcmp" } } */
/* { dg-final { scan-assembler "1752394086" } } */
/* This should turn into four compare/jump pairs with -m32, within the
limit of what the tuning considers acceptable for -O2. */
int cmp (char *p, char *q)
{
char *pa = __builtin_assume_aligned (p, 4);
char *qa = __builtin_assume_aligned (q, 4);
if (__builtin_memcmp (pa, qa, 16) != 0)
return 1;
return 0;
}
/* Since we have fast unaligned access, we should make a single
constant comparison. The constant becomes 1752394086. */
int cmp2 (char *p)
{
if (__builtin_memcmp (p, "fish", 4) != 0)
return 1;
return 0;
}

View File

@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
#include "params.h"
#include "ipa-chkp.h"
#include "tree-hash-traits.h"
#include "builtins.h"
/* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
is an index into strinfo vector, negative value stands for
@ -1843,6 +1844,88 @@ handle_builtin_memset (gimple_stmt_iterator *gsi)
return false;
}
/* Handle a call to memcmp. We try to handle small comparisons by
converting them to load and compare, and replacing the call to memcmp
with a __builtin_memcmp_eq call where possible. */
static bool
handle_builtin_memcmp (gimple_stmt_iterator *gsi)
{
gcall *stmt2 = as_a <gcall *> (gsi_stmt (*gsi));
tree res = gimple_call_lhs (stmt2);
tree arg1 = gimple_call_arg (stmt2, 0);
tree arg2 = gimple_call_arg (stmt2, 1);
tree len = gimple_call_arg (stmt2, 2);
unsigned HOST_WIDE_INT leni;
use_operand_p use_p;
imm_use_iterator iter;
if (!res)
return true;
FOR_EACH_IMM_USE_FAST (use_p, iter, res)
{
gimple *ustmt = USE_STMT (use_p);
if (gimple_code (ustmt) == GIMPLE_ASSIGN)
{
gassign *asgn = as_a <gassign *> (ustmt);
tree_code code = gimple_assign_rhs_code (asgn);
if ((code != EQ_EXPR && code != NE_EXPR)
|| !integer_zerop (gimple_assign_rhs2 (asgn)))
return true;
}
else if (gimple_code (ustmt) == GIMPLE_COND)
{
tree_code code = gimple_cond_code (ustmt);
if ((code != EQ_EXPR && code != NE_EXPR)
|| !integer_zerop (gimple_cond_rhs (ustmt)))
return true;
}
else
return true;
}
if (tree_fits_uhwi_p (len)
&& (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
&& exact_log2 (leni) != -1)
{
leni *= CHAR_TYPE_SIZE;
unsigned align1 = get_pointer_alignment (arg1);
unsigned align2 = get_pointer_alignment (arg2);
unsigned align = MIN (align1, align2);
machine_mode mode = mode_for_size (leni, MODE_INT, 1);
if (mode != BLKmode
&& (align >= leni || !SLOW_UNALIGNED_ACCESS (mode, align)))
{
location_t loc = gimple_location (stmt2);
tree type, off;
type = build_nonstandard_integer_type (leni, 1);
gcc_assert (GET_MODE_BITSIZE (TYPE_MODE (type)) == leni);
tree ptrtype = build_pointer_type_for_mode (char_type_node,
ptr_mode, true);
off = build_int_cst (ptrtype, 0);
arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
tree tem1 = fold_const_aggregate_ref (arg1);
if (tem1)
arg1 = tem1;
tree tem2 = fold_const_aggregate_ref (arg2);
if (tem2)
arg2 = tem2;
res = fold_convert_loc (loc, TREE_TYPE (res),
fold_build2_loc (loc, NE_EXPR,
boolean_type_node,
arg1, arg2));
gimplify_and_update_call_from_tree (gsi, res);
return false;
}
}
gimple_call_set_fndecl (stmt2, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
return false;
}
/* Handle a POINTER_PLUS_EXPR statement.
For p = "abcd" + 2; compute associated length, or if
p = q + off is pointing to a '\0' character of a string, call
@ -2100,6 +2183,10 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
if (!handle_builtin_memset (gsi))
return false;
break;
case BUILT_IN_MEMCMP:
if (!handle_builtin_memcmp (gsi))
return false;
break;
default:
break;
}

View File

@ -10601,6 +10601,13 @@ build_common_builtin_nodes (void)
BUILT_IN_STACK_RESTORE,
"__builtin_stack_restore", ECF_NOTHROW | ECF_LEAF);
ftype = build_function_type_list (integer_type_node, const_ptr_type_node,
const_ptr_type_node, size_type_node,
NULL_TREE);
local_define_builtin ("__builtin_memcmp_eq", ftype, BUILT_IN_MEMCMP_EQ,
"__builtin_memcmp_eq",
ECF_PURE | ECF_NOTHROW | ECF_LEAF);
/* If there's a possibility that we might use the ARM EABI, build the
alternate __cxa_end_cleanup node used to resume from C++ and Java. */
if (targetm.arm_eabi_unwinder)