re PR rtl-optimization/44374 (Hoist same instructions in different branches)

gcc/
	PR rtl-optimization/44374
	Reapply patch with fixes.
	* basic-block.h (enum bb_flags): Add BB_MODIFIED.
	* df-core.c (df_set_bb_dirty): Set it.
	* ifcvt.c (find_memory): Remove function.
	(dead_or_predicable): Use can_move_insns_across.
	* df.h (can_move_insns_across): Declare function.
	* cfgcleanup.c (block_was_dirty): New static variable.
	(flow_find_head_matching_sequence): Test for epilogue notes.
	(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
	than df_get_bb_dirty.
	(try_head_merge_bb): New static function.
	(try_optimize_cfg): Call it.  Call df_analyze if block_was_dirty
	is set.
	* df-problems.c: Include "target.h"
	(df_simulate_find_uses): New static function.
	(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
	(find_memory, find_memory_store): New static functions.
	(can_move_insns_across): New function.
	* Makefile.in (df-problems.o): Update dependencies.

gcc/testsuite/
	PR rtl-optimization/44374
	Reapply patch with fixes.
	* gcc.target/arm/headmerge-1.c: New test.
	* gcc.target/arm/headmerge-2.c: New test.
	* gcc.target/i386/headmerge-1.c: New test.
	* gcc.target/i386/headmerge-2.c: New test.

From-SVN: r167779
This commit is contained in:
Bernd Schmidt 2010-12-14 00:23:40 +00:00 committed by Bernd Schmidt
parent 218cbe3757
commit 4ec5d4f5b9
13 changed files with 817 additions and 137 deletions

View File

@ -1,3 +1,26 @@
2010-12-14 Bernd Schmidt <bernds@codesourcery.com>
PR rtl-optimization/44374
Reapply patch with fixes.
* basic-block.h (enum bb_flags): Add BB_MODIFIED.
* df-core.c (df_set_bb_dirty): Set it.
* ifcvt.c (find_memory): Remove function.
(dead_or_predicable): Use can_move_insns_across.
* df.h (can_move_insns_across): Declare function.
* cfgcleanup.c (block_was_dirty): New static variable.
(flow_find_head_matching_sequence): Test for epilogue notes.
(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
than df_get_bb_dirty.
(try_head_merge_bb): New static function.
(try_optimize_cfg): Call it. Call df_analyze if block_was_dirty
is set.
* df-problems.c: Include "target.h"
(df_simulate_find_uses): New static function.
(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
(find_memory, find_memory_store): New static functions.
(can_move_insns_across): New function.
* Makefile.in (df-problems.o): Update dependencies.
2010-12-13 Joseph Myers <joseph@codesourcery.com>
* config/xtensa/elf.h (SIZE_TYPE, PTRDIFF_TYPE): Define.

View File

@ -3194,7 +3194,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
$(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
$(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \

View File

@ -245,7 +245,13 @@ enum bb_flags
/* Set on blocks that cannot be threaded through.
Only used in cfgcleanup.c. */
BB_NONTHREADABLE_BLOCK = 1 << 11
BB_NONTHREADABLE_BLOCK = 1 << 11,
/* Set on blocks that were modified in some way. This bit is set in
df_set_bb_dirty, but not cleared by df_analyze, so it can be used
to test whether a block has been modified prior to a df_analyze
call. */
BB_MODIFIED = 1 << 12
};
/* Dummy flag for convenience in the hot/cold partitioning code. */

View File

@ -65,6 +65,10 @@ static bool first_pass;
/* Set to true if crossjumps occured in the latest run of try_optimize_cfg. */
static bool crossjumps_occured;
/* Set to true if we couldn't run an optimization due to stale liveness
information; we should run df_analyze to enable more opportunities. */
static bool block_was_dirty;
static bool try_crossjump_to_edge (int, edge, edge);
static bool try_crossjump_bb (int, basic_block);
static bool outgoing_edges_match (int, basic_block, basic_block);
@ -431,7 +435,7 @@ try_forward_edges (int mode, basic_block b)
int counter, goto_locus;
bool threaded = false;
int nthreaded_edges = 0;
bool may_thread = first_pass | df_get_bb_dirty (b);
bool may_thread = first_pass || (b->flags & BB_MODIFIED) != 0;
/* Skip complex edges because we don't know how to update them.
@ -466,7 +470,7 @@ try_forward_edges (int mode, basic_block b)
{
basic_block new_target = NULL;
bool new_target_threaded = false;
may_thread |= df_get_bb_dirty (target);
may_thread |= (target->flags & BB_MODIFIED) != 0;
if (FORWARDER_BLOCK_P (target)
&& !(single_succ_edge (target)->flags & EDGE_CROSSING)
@ -1184,12 +1188,20 @@ flow_find_head_matching_sequence (basic_block bb1, basic_block bb2, rtx *f1,
while (true)
{
/* Ignore notes. */
/* Ignore notes, except NOTE_INSN_EPILOGUE_BEG. */
while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
i1 = NEXT_INSN (i1);
{
if (NOTE_P (i1) && NOTE_KIND (i1) == NOTE_INSN_EPILOGUE_BEG)
break;
i1 = NEXT_INSN (i1);
}
while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
i2 = NEXT_INSN (i2);
{
if (NOTE_P (i2) && NOTE_KIND (i2) == NOTE_INSN_EPILOGUE_BEG)
break;
i2 = NEXT_INSN (i2);
}
if ((i1 == BB_END (bb1) && !NONDEBUG_INSN_P (i1))
|| (i2 == BB_END (bb2) && !NONDEBUG_INSN_P (i2)))
@ -1856,8 +1868,8 @@ try_crossjump_bb (int mode, basic_block bb)
/* If nothing changed since the last attempt, there is nothing
we can do. */
if (!first_pass
&& (!(df_get_bb_dirty (e->src))
&& !(df_get_bb_dirty (fallthru->src))))
&& !((e->src->flags & BB_MODIFIED)
|| (fallthru->src->flags & BB_MODIFIED)))
continue;
if (try_crossjump_to_edge (mode, e, fallthru))
@ -1906,8 +1918,8 @@ try_crossjump_bb (int mode, basic_block bb)
/* If nothing changed since the last attempt, there is nothing
we can do. */
if (!first_pass
&& (!(df_get_bb_dirty (e->src))
&& !(df_get_bb_dirty (e2->src))))
&& !((e->src->flags & BB_MODIFIED)
|| (e2->src->flags & BB_MODIFIED)))
continue;
if (try_crossjump_to_edge (mode, e, e2))
@ -1926,6 +1938,299 @@ try_crossjump_bb (int mode, basic_block bb)
return changed;
}
/* Search the successors of BB for common insn sequences. When found,
share code between them by moving it across the basic block
boundary. Return true if any changes made. */
static bool
try_head_merge_bb (basic_block bb)
{
basic_block final_dest_bb = NULL;
int max_match = INT_MAX;
edge e0;
rtx *headptr, *currptr, *nextptr;
bool changed, moveall;
unsigned ix;
rtx e0_last_head, cond, move_before;
unsigned nedges = EDGE_COUNT (bb->succs);
rtx jump = BB_END (bb);
regset live, live_union;
/* Nothing to do if there is not at least two outgoing edges. */
if (nedges < 2)
return false;
/* Don't crossjump if this block ends in a computed jump,
unless we are optimizing for size. */
if (optimize_bb_for_size_p (bb)
&& bb != EXIT_BLOCK_PTR
&& computed_jump_p (BB_END (bb)))
return false;
cond = get_condition (jump, &move_before, true, false);
if (cond == NULL_RTX)
move_before = jump;
for (ix = 0; ix < nedges; ix++)
if (EDGE_SUCC (bb, ix)->dest == EXIT_BLOCK_PTR)
return false;
for (ix = 0; ix < nedges; ix++)
{
edge e = EDGE_SUCC (bb, ix);
basic_block other_bb = e->dest;
if (df_get_bb_dirty (other_bb))
{
block_was_dirty = true;
return false;
}
if (e->flags & EDGE_ABNORMAL)
return false;
/* Normally, all destination blocks must only be reachable from this
block, i.e. they must have one incoming edge.
There is one special case we can handle, that of multiple consecutive
jumps where the first jumps to one of the targets of the second jump.
This happens frequently in switch statements for default labels.
The structure is as follows:
FINAL_DEST_BB
....
if (cond) jump A;
fall through
BB
jump with targets A, B, C, D...
A
has two incoming edges, from FINAL_DEST_BB and BB
In this case, we can try to move the insns through BB and into
FINAL_DEST_BB. */
if (EDGE_COUNT (other_bb->preds) != 1)
{
edge incoming_edge, incoming_bb_other_edge;
edge_iterator ei;
if (final_dest_bb != NULL
|| EDGE_COUNT (other_bb->preds) != 2)
return false;
/* We must be able to move the insns across the whole block. */
move_before = BB_HEAD (bb);
while (!NONDEBUG_INSN_P (move_before))
move_before = NEXT_INSN (move_before);
if (EDGE_COUNT (bb->preds) != 1)
return false;
incoming_edge = EDGE_PRED (bb, 0);
final_dest_bb = incoming_edge->src;
if (EDGE_COUNT (final_dest_bb->succs) != 2)
return false;
FOR_EACH_EDGE (incoming_bb_other_edge, ei, final_dest_bb->succs)
if (incoming_bb_other_edge != incoming_edge)
break;
if (incoming_bb_other_edge->dest != other_bb)
return false;
}
}
e0 = EDGE_SUCC (bb, 0);
e0_last_head = NULL_RTX;
changed = false;
for (ix = 1; ix < nedges; ix++)
{
edge e = EDGE_SUCC (bb, ix);
rtx e0_last, e_last;
int nmatch;
nmatch = flow_find_head_matching_sequence (e0->dest, e->dest,
&e0_last, &e_last, 0);
if (nmatch == 0)
return false;
if (nmatch < max_match)
{
max_match = nmatch;
e0_last_head = e0_last;
}
}
/* If we matched an entire block, we probably have to avoid moving the
last insn. */
if (max_match > 0
&& e0_last_head == BB_END (e0->dest)
&& (find_reg_note (e0_last_head, REG_EH_REGION, 0)
|| control_flow_insn_p (e0_last_head)))
{
max_match--;
if (max_match == 0)
return false;
do
e0_last_head = prev_real_insn (e0_last_head);
while (DEBUG_INSN_P (e0_last_head));
}
if (max_match == 0)
return false;
/* We must find a union of the live registers at each of the end points. */
live = BITMAP_ALLOC (NULL);
live_union = BITMAP_ALLOC (NULL);
currptr = XNEWVEC (rtx, nedges);
headptr = XNEWVEC (rtx, nedges);
nextptr = XNEWVEC (rtx, nedges);
for (ix = 0; ix < nedges; ix++)
{
int j;
basic_block merge_bb = EDGE_SUCC (bb, ix)->dest;
rtx head = BB_HEAD (merge_bb);
while (!NONDEBUG_INSN_P (head))
head = NEXT_INSN (head);
headptr[ix] = head;
currptr[ix] = head;
/* Compute the end point and live information */
for (j = 1; j < max_match; j++)
do
head = NEXT_INSN (head);
while (!NONDEBUG_INSN_P (head));
simulate_backwards_to_point (merge_bb, live, head);
IOR_REG_SET (live_union, live);
}
/* If we're moving across two blocks, verify the validity of the
first move, then adjust the target and let the loop below deal
with the final move. */
if (final_dest_bb != NULL)
{
rtx move_upto;
moveall = can_move_insns_across (currptr[0], e0_last_head, move_before,
jump, e0->dest, live_union,
NULL, &move_upto);
if (!moveall)
{
if (move_upto == NULL_RTX)
goto out;
while (e0_last_head != move_upto)
{
df_simulate_one_insn_backwards (e0->dest, e0_last_head,
live_union);
e0_last_head = PREV_INSN (e0_last_head);
}
}
if (e0_last_head == NULL_RTX)
goto out;
jump = BB_END (final_dest_bb);
cond = get_condition (jump, &move_before, true, false);
if (cond == NULL_RTX)
move_before = jump;
}
do
{
rtx move_upto;
moveall = can_move_insns_across (currptr[0], e0_last_head,
move_before, jump, e0->dest, live_union,
NULL, &move_upto);
if (!moveall && move_upto == NULL_RTX)
{
if (jump == move_before)
break;
/* Try again, using a different insertion point. */
move_before = jump;
#ifdef HAVE_cc0
/* Don't try moving before a cc0 user, as that may invalidate
the cc0. */
if (reg_mentioned_p (cc0_rtx, jump))
break;
#endif
continue;
}
if (final_dest_bb && !moveall)
/* We haven't checked whether a partial move would be OK for the first
move, so we have to fail this case. */
break;
changed = true;
for (;;)
{
if (currptr[0] == move_upto)
break;
for (ix = 0; ix < nedges; ix++)
{
rtx curr = currptr[ix];
do
curr = NEXT_INSN (curr);
while (!NONDEBUG_INSN_P (curr));
currptr[ix] = curr;
}
}
/* If we can't currently move all of the identical insns, remember
each insn after the range that we'll merge. */
if (!moveall)
for (ix = 0; ix < nedges; ix++)
{
rtx curr = currptr[ix];
do
curr = NEXT_INSN (curr);
while (!NONDEBUG_INSN_P (curr));
nextptr[ix] = curr;
}
reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
if (final_dest_bb != NULL)
df_set_bb_dirty (final_dest_bb);
df_set_bb_dirty (bb);
for (ix = 1; ix < nedges; ix++)
{
df_set_bb_dirty (EDGE_SUCC (bb, ix)->dest);
delete_insn_chain (headptr[ix], currptr[ix], false);
}
if (!moveall)
{
if (jump == move_before)
break;
/* For the unmerged insns, try a different insertion point. */
move_before = jump;
#ifdef HAVE_cc0
/* Don't try moving before a cc0 user, as that may invalidate
the cc0. */
if (reg_mentioned_p (cc0_rtx, jump))
break;
#endif
for (ix = 0; ix < nedges; ix++)
currptr[ix] = headptr[ix] = nextptr[ix];
}
}
while (!moveall);
out:
free (currptr);
free (headptr);
free (nextptr);
crossjumps_occured |= changed;
return changed;
}
/* Return true if BB contains just bb note, or bb note followed
by only DEBUG_INSNs. */
@ -1971,6 +2276,7 @@ try_optimize_cfg (int mode)
one predecessor, they may be combined. */
do
{
block_was_dirty = false;
changed = false;
iterations++;
@ -2170,6 +2476,13 @@ try_optimize_cfg (int mode)
&& try_crossjump_bb (mode, b))
changed_here = true;
if ((mode & CLEANUP_CROSSJUMP)
/* This can lengthen register lifetimes. Do it only after
reload. */
&& reload_completed
&& try_head_merge_bb (b))
changed_here = true;
/* Don't get confused by the index shift caused by
deleting blocks. */
if (!changed_here)
@ -2182,6 +2495,13 @@ try_optimize_cfg (int mode)
&& try_crossjump_bb (mode, EXIT_BLOCK_PTR))
changed = true;
if (block_was_dirty)
{
/* This should only be set by head-merging. */
gcc_assert (mode & CLEANUP_CROSSJUMP);
df_analyze ();
}
#ifdef ENABLE_CHECKING
if (changed)
verify_flow_info ();
@ -2366,8 +2686,7 @@ cleanup_cfg (int mode)
if ((mode & CLEANUP_EXPENSIVE) && !reload_completed
&& !delete_trivially_dead_insns (get_insns (), max_reg_num ()))
break;
else if ((mode & CLEANUP_CROSSJUMP)
&& crossjumps_occured)
if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occured)
run_fast_dce ();
}
else

View File

@ -1413,6 +1413,7 @@ df_get_bb_dirty (basic_block bb)
void
df_set_bb_dirty (basic_block bb)
{
bb->flags |= BB_MODIFIED;
if (df)
{
int p;

View File

@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "basic-block.h"
#include "sbitmap.h"
#include "bitmap.h"
#include "target.h"
#include "timevar.h"
#include "df.h"
#include "except.h"
@ -3541,6 +3542,27 @@ df_simulate_find_defs (rtx insn, bitmap defs)
}
}
/* Find the set of uses for INSN. This includes partial defs. */
static void
df_simulate_find_uses (rtx insn, bitmap uses)
{
df_ref *rec;
unsigned int uid = INSN_UID (insn);
for (rec = DF_INSN_UID_DEFS (uid); *rec; rec++)
{
df_ref def = *rec;
if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
bitmap_set_bit (uses, DF_REF_REGNO (def));
}
for (rec = DF_INSN_UID_USES (uid); *rec; rec++)
{
df_ref use = *rec;
bitmap_set_bit (uses, DF_REF_REGNO (use));
}
}
/* Find the set of real DEFs, which are not clobbers, for INSN. */
void
@ -3768,7 +3790,303 @@ df_simulate_one_insn_forwards (basic_block bb, rtx insn, bitmap live)
}
df_simulate_fixup_sets (bb, live);
}
/* Used by the next two functions to encode information about the
memory references we found. */
#define MEMREF_NORMAL 1
#define MEMREF_VOLATILE 2
/* A subroutine of can_move_insns_across_p called through for_each_rtx.
Return either MEMREF_NORMAL or MEMREF_VOLATILE if a memory is found. */
static int
find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
{
rtx x = *px;
if (GET_CODE (x) == ASM_OPERANDS && MEM_VOLATILE_P (x))
return MEMREF_VOLATILE;
if (!MEM_P (x))
return 0;
if (MEM_VOLATILE_P (x))
return MEMREF_VOLATILE;
if (MEM_READONLY_P (x))
return 0;
return MEMREF_NORMAL;
}
/* A subroutine of can_move_insns_across_p called through note_stores.
DATA points to an integer in which we set either the bit for
MEMREF_NORMAL or the bit for MEMREF_VOLATILE if we find a MEM
of either kind. */
static void
find_memory_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
void *data ATTRIBUTE_UNUSED)
{
int *pflags = (int *)data;
if (GET_CODE (x) == SUBREG)
x = XEXP (x, 0);
/* Treat stores to SP as stores to memory, this will prevent problems
when there are references to the stack frame. */
if (x == stack_pointer_rtx)
*pflags |= MEMREF_VOLATILE;
if (!MEM_P (x))
return;
*pflags |= MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL;
}
/* Scan BB backwards, using df_simulate functions to keep track of
lifetimes, up to insn POINT. The result is stored in LIVE. */
void
simulate_backwards_to_point (basic_block bb, regset live, rtx point)
{
rtx insn;
bitmap_copy (live, df_get_live_out (bb));
df_simulate_initialize_backwards (bb, live);
/* Scan and update life information until we reach the point we're
interested in. */
for (insn = BB_END (bb); insn != point; insn = PREV_INSN (insn))
df_simulate_one_insn_backwards (bb, insn, live);
}
/* Return true if it is safe to move a group of insns, described by
the range FROM to TO, backwards across another group of insns,
described by ACROSS_FROM to ACROSS_TO. It is assumed that there
are no insns between ACROSS_TO and FROM, but they may be in
different basic blocks; MERGE_BB is the block from which the
insns will be moved. The caller must pass in a regset MERGE_LIVE
which specifies the registers live after TO.
This function may be called in one of two cases: either we try to
move identical instructions from all successor blocks into their
predecessor, or we try to move from only one successor block. If
OTHER_BRANCH_LIVE is nonnull, it indicates that we're dealing with
the second case. It should contain a set of registers live at the
end of ACROSS_TO which must not be clobbered by moving the insns.
In that case, we're also more careful about moving memory references
and trapping insns.
We return false if it is not safe to move the entire group, but it
may still be possible to move a subgroup. PMOVE_UPTO, if nonnull,
is set to point at the last moveable insn in such a case. */
bool
can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
basic_block merge_bb, regset merge_live,
regset other_branch_live, rtx *pmove_upto)
{
rtx insn, next, max_to;
bitmap merge_set, merge_use, local_merge_live;
bitmap test_set, test_use;
unsigned i, fail = 0;
bitmap_iterator bi;
int memrefs_in_across = 0;
int mem_sets_in_across = 0;
bool trapping_insns_in_across = false;
if (pmove_upto != NULL)
*pmove_upto = NULL_RTX;
/* Find real bounds, ignoring debug insns. */
while (!NONDEBUG_INSN_P (from) && from != to)
from = NEXT_INSN (from);
while (!NONDEBUG_INSN_P (to) && from != to)
to = PREV_INSN (to);
for (insn = across_to; ; insn = next)
{
if (NONDEBUG_INSN_P (insn))
{
memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory,
NULL);
note_stores (PATTERN (insn), find_memory_stores,
&mem_sets_in_across);
/* This is used just to find sets of the stack pointer. */
memrefs_in_across |= mem_sets_in_across;
trapping_insns_in_across |= may_trap_p (PATTERN (insn));
}
next = PREV_INSN (insn);
if (insn == across_from)
break;
}
/* Collect:
MERGE_SET = set of registers set in MERGE_BB
MERGE_USE = set of registers used in MERGE_BB and live at its top
MERGE_LIVE = set of registers live at the point inside the MERGE
range that we've reached during scanning
TEST_SET = set of registers set between ACROSS_FROM and ACROSS_END.
TEST_USE = set of registers used between ACROSS_FROM and ACROSS_END,
and live before ACROSS_FROM. */
merge_set = BITMAP_ALLOC (&reg_obstack);
merge_use = BITMAP_ALLOC (&reg_obstack);
local_merge_live = BITMAP_ALLOC (&reg_obstack);
test_set = BITMAP_ALLOC (&reg_obstack);
test_use = BITMAP_ALLOC (&reg_obstack);
/* Compute the set of registers set and used in the ACROSS range. */
if (other_branch_live != NULL)
bitmap_copy (test_use, other_branch_live);
df_simulate_initialize_backwards (merge_bb, test_use);
for (insn = across_to; ; insn = next)
{
if (NONDEBUG_INSN_P (insn))
{
df_simulate_find_defs (insn, test_set);
df_simulate_defs (insn, test_use);
df_simulate_uses (insn, test_use);
}
next = PREV_INSN (insn);
if (insn == across_from)
break;
}
/* Compute an upper bound for the amount of insns moved, by finding
the first insn in MERGE that sets a register in TEST_USE, or uses
a register in TEST_SET. We also check for calls, trapping operations,
and memory references. */
max_to = NULL_RTX;
for (insn = from; ; insn = next)
{
if (CALL_P (insn))
break;
if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
break;
if (NONDEBUG_INSN_P (insn))
{
if (may_trap_p (PATTERN (insn))
&& (trapping_insns_in_across || other_branch_live != NULL))
break;
/* We cannot move memory stores past each other, or move memory
reads past stores, at least not without tracking them and
calling true_dependence on every pair.
If there is no other branch and no memory references or
sets in the ACROSS range, we can move memory references
freely, even volatile ones.
Otherwise, the rules are as follows: volatile memory
references and stores can't be moved at all, and any type
of memory reference can't be moved if there are volatile
accesses or stores in the ACROSS range. That leaves
normal reads, which can be moved, as the trapping case is
dealt with elsewhere. */
if (other_branch_live != NULL || memrefs_in_across != 0)
{
int mem_ref_flags = 0;
int mem_set_flags = 0;
note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags);
mem_ref_flags = for_each_rtx (&PATTERN (insn), find_memory,
NULL);
/* Catch sets of the stack pointer. */
mem_ref_flags |= mem_set_flags;
if ((mem_ref_flags | mem_set_flags) & MEMREF_VOLATILE)
break;
if ((memrefs_in_across & MEMREF_VOLATILE) && mem_ref_flags != 0)
break;
if (mem_set_flags != 0
|| (mem_sets_in_across != 0 && mem_ref_flags != 0))
break;
}
df_simulate_find_uses (insn, merge_use);
/* We're only interested in uses which use a value live at
the top, not one previously set in this block. */
bitmap_and_compl_into (merge_use, merge_set);
df_simulate_find_defs (insn, merge_set);
if (bitmap_intersect_p (merge_set, test_use)
|| bitmap_intersect_p (merge_use, test_set))
break;
max_to = insn;
}
next = NEXT_INSN (insn);
if (insn == to)
break;
}
if (max_to != to)
fail = 1;
if (max_to == NULL_RTX || (fail && pmove_upto == NULL))
goto out;
/* Now, lower this upper bound by also taking into account that
a range of insns moved across ACROSS must not leave a register
live at the end that will be clobbered in ACROSS. We need to
find a point where TEST_SET & LIVE == 0.
Insns in the MERGE range that set registers which are also set
in the ACROSS range may still be moved as long as we also move
later insns which use the results of the set, and make the
register dead again. This is verified by the condition stated
above. We only need to test it for registers that are set in
the moved region.
MERGE_LIVE is provided by the caller and holds live registers after
TO. */
bitmap_copy (local_merge_live, merge_live);
for (insn = to; insn != max_to; insn = PREV_INSN (insn))
df_simulate_one_insn_backwards (merge_bb, insn, local_merge_live);
/* We're not interested in registers that aren't set in the moved
region at all. */
bitmap_and_into (local_merge_live, merge_set);
for (;;)
{
if (NONDEBUG_INSN_P (insn))
{
if (!bitmap_intersect_p (test_set, local_merge_live))
{
max_to = insn;
break;
}
df_simulate_one_insn_backwards (merge_bb, insn,
local_merge_live);
}
if (insn == from)
{
fail = 1;
goto out;
}
insn = PREV_INSN (insn);
}
if (max_to != to)
fail = 1;
if (pmove_upto)
*pmove_upto = max_to;
/* For small register class machines, don't lengthen lifetimes of
hard registers before reload. */
if (! reload_completed
&& targetm.small_register_classes_for_mode_p (VOIDmode))
{
EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
{
if (i < FIRST_PSEUDO_REGISTER
&& ! fixed_regs[i]
&& ! global_regs[i])
fail = 1;
}
}
out:
BITMAP_FREE (merge_set);
BITMAP_FREE (merge_use);
BITMAP_FREE (local_merge_live);
BITMAP_FREE (test_set);
BITMAP_FREE (test_use);
return !fail;
}
/*----------------------------------------------------------------------------

View File

@ -970,7 +970,9 @@ extern void df_simulate_one_insn_backwards (basic_block, rtx, bitmap);
extern void df_simulate_finalize_backwards (basic_block, bitmap);
extern void df_simulate_initialize_forwards (basic_block, bitmap);
extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap);
extern void simulate_backwards_to_point (basic_block, regset, rtx);
extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset,
regset, rtx *);
/* Functions defined in df-scan.c. */
extern void df_scan_alloc (bitmap);

View File

@ -102,7 +102,6 @@ static int noce_find_if_block (basic_block, edge, edge, int);
static int cond_exec_find_if_block (ce_if_block_t *);
static int find_if_case_1 (basic_block, edge, edge);
static int find_if_case_2 (basic_block, edge, edge);
static int find_memory (rtx *, void *);
static int dead_or_predicable (basic_block, basic_block, basic_block,
basic_block, int);
static void noce_emit_move_insn (rtx, rtx);
@ -3975,15 +3974,6 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge)
return TRUE;
}
/* A subroutine of dead_or_predicable called through for_each_rtx.
Return 1 if a memory is found. */
static int
find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
{
return MEM_P (*px);
}
/* Used by the code above to perform the actual rtl transformations.
Return TRUE if successful.
@ -3997,7 +3987,7 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
basic_block other_bb, basic_block new_dest, int reversep)
{
rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX;
bitmap merge_set = NULL, merge_set_noclobber = NULL;
bitmap merge_set = NULL;
/* Number of pending changes. */
int n_validated_changes = 0;
@ -4087,129 +4077,46 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
}
#endif
/* If we allocated new pseudos (e.g. in the conditional move
expander called from noce_emit_cmove), we must resize the
array first. */
if (max_regno < max_reg_num ())
max_regno = max_reg_num ();
/* Try the NCE path if the CE path did not result in any changes. */
if (n_validated_changes == 0)
{
rtx cond, insn;
regset live;
bool success;
/* In the non-conditional execution case, we have to verify that there
are no trapping operations, no calls, no references to memory, and
that any registers modified are dead at the branch site. */
rtx insn, cond, prev;
bitmap test_live, test_set;
bool intersect = false;
/* Check for no calls or trapping operations. */
for (insn = head; ; insn = NEXT_INSN (insn))
{
if (CALL_P (insn))
return FALSE;
if (NONDEBUG_INSN_P (insn))
{
if (may_trap_p (PATTERN (insn)))
return FALSE;
/* ??? Even non-trapping memories such as stack frame
references must be avoided. For stores, we collect
no lifetime info; for reads, we'd have to assert
true_dependence false against every store in the
TEST range. */
if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
return FALSE;
}
if (insn == end)
break;
}
if (! any_condjump_p (jump))
if (!any_condjump_p (jump))
return FALSE;
/* Find the extent of the conditional. */
cond = noce_get_condition (jump, &earliest, false);
if (! cond)
if (!cond)
return FALSE;
/* Collect:
MERGE_SET = set of registers set in MERGE_BB
MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers
that are really set, not just clobbered.
TEST_LIVE = set of registers live at EARLIEST
TEST_SET = set of registers set between EARLIEST and the
end of the block. */
live = BITMAP_ALLOC (&reg_obstack);
simulate_backwards_to_point (merge_bb, live, end);
success = can_move_insns_across (head, end, earliest, jump,
merge_bb, live,
df_get_live_in (other_bb), NULL);
BITMAP_FREE (live);
if (!success)
return FALSE;
/* Collect the set of registers set in MERGE_BB. */
merge_set = BITMAP_ALLOC (&reg_obstack);
merge_set_noclobber = BITMAP_ALLOC (&reg_obstack);
/* If we allocated new pseudos (e.g. in the conditional move
expander called from noce_emit_cmove), we must resize the
array first. */
if (max_regno < max_reg_num ())
max_regno = max_reg_num ();
FOR_BB_INSNS (merge_bb, insn)
{
if (NONDEBUG_INSN_P (insn))
{
df_simulate_find_defs (insn, merge_set);
df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
}
}
/* For small register class machines, don't lengthen lifetimes of
hard registers before reload. */
if (! reload_completed
&& targetm.small_register_classes_for_mode_p (VOIDmode))
{
unsigned i;
bitmap_iterator bi;
EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
{
if (i < FIRST_PSEUDO_REGISTER
&& ! fixed_regs[i]
&& ! global_regs[i])
goto fail;
}
}
/* For TEST, we're interested in a range of insns, not a whole block.
Moreover, we're interested in the insns live from OTHER_BB. */
test_live = BITMAP_ALLOC (&reg_obstack);
test_set = BITMAP_ALLOC (&reg_obstack);
/* The loop below takes the set of live registers
after JUMP, and calculates the live set before EARLIEST. */
bitmap_copy (test_live, df_get_live_in (other_bb));
df_simulate_initialize_backwards (test_bb, test_live);
for (insn = jump; ; insn = prev)
{
if (INSN_P (insn))
{
df_simulate_find_defs (insn, test_set);
df_simulate_one_insn_backwards (test_bb, insn, test_live);
}
prev = PREV_INSN (insn);
if (insn == earliest)
break;
}
/* We can perform the transformation if
MERGE_SET_NOCLOBBER & TEST_SET
and
MERGE_SET & TEST_LIVE
and
TEST_SET & DF_LIVE_IN (merge_bb)
are empty. */
if (bitmap_intersect_p (merge_set_noclobber, test_set)
|| bitmap_intersect_p (merge_set, test_live)
|| bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
intersect = true;
BITMAP_FREE (test_live);
BITMAP_FREE (test_set);
if (intersect)
goto fail;
if (NONDEBUG_INSN_P (insn))
df_simulate_find_defs (insn, merge_set);
}
no_body:
@ -4288,7 +4195,6 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
remove_reg_equal_equiv_notes_for_regno (i);
BITMAP_FREE (merge_set);
BITMAP_FREE (merge_set_noclobber);
}
reorder_insns (head, end, PREV_INSN (earliest));
@ -4307,12 +4213,10 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
cancel:
cancel_changes (0);
fail:
if (merge_set)
{
BITMAP_FREE (merge_set);
BITMAP_FREE (merge_set_noclobber);
}
BITMAP_FREE (merge_set);
return FALSE;
}

View File

@ -1,3 +1,12 @@
2010-12-14 Bernd Schmidt <bernds@codesourcery.com>
PR rtl-optimization/44374
Reapply patch with fixes.
* gcc.target/arm/headmerge-1.c: New test.
* gcc.target/arm/headmerge-2.c: New test.
* gcc.target/i386/headmerge-1.c: New test.
* gcc.target/i386/headmerge-2.c: New test.
2010-12-13 Jason Merrill <jason@redhat.com>
PR c++/46873

View File

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "#120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
void t (int x, int y)
{
if (y < 5)
foo1 (120);
else
foo2 (120);
}

View File

@ -0,0 +1,35 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
extern void foo3 (int);
extern void foo4 (int);
extern void foo5 (int);
extern void foo6 (int);
void t (int x, int y)
{
switch (y)
{
case 1:
foo1 (120);
break;
case 5:
foo2 (120);
break;
case 7:
foo3 (120);
break;
case 10:
foo4 (120);
break;
case 13:
foo5 (120);
break;
default:
foo6 (120);
break;
}
}

View File

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
void t (int x, int y)
{
if (y < 5)
foo1 (120);
else
foo2 (120);
}

View File

@ -0,0 +1,35 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times "120" 1 } } */
extern void foo1 (int);
extern void foo2 (int);
extern void foo3 (int);
extern void foo4 (int);
extern void foo5 (int);
extern void foo6 (int);
void t (int x, int y)
{
switch (y)
{
case 1:
foo1 (120);
break;
case 5:
foo2 (120);
break;
case 7:
foo3 (120);
break;
case 10:
foo4 (120);
break;
case 13:
foo5 (120);
break;
default:
foo6 (120);
break;
}
}