invoke.texi (-fshrink-wrap): Document.

* doc/invoke.texi (-fshrink-wrap): Document.
	* opts.c (default_options_table): Add it.
	* common.opt (fshrink-wrap): Add.
	* function.c (emit_return_into_block): Remove useless declaration.
	(record_hard_reg_uses_1, record_hard_reg_uses, frame_required_for_rtx,
	requires_stack_frame_p, gen_return_pattern): New static functions.
	(emit_return_into_block): New arg simple_p.  All callers changed.
	Use gen_return_pattern.
	(thread_prologue_and_epilogue_insns): Implement shrink-wrapping.
	* config/i386/i386.md (return): Expand into a simple_return.
	(simple_return): New expander):
	(simple_return_internal, simple_return_internal_long,
	simple_return_pop_internal_long, simple_return_indirect_internal):
	Renamed from return_internal, return_internal_long,
	return_pop_internal_long and return_indirect_internal; changed to use
	simple_return.
	* config/i386/i386.c (ix86_expand_epilogue): Adjust to expand
	simple returns.
	(ix86_pad_returns): Likewise.
	* function.h (struct rtl_data): Add member shrink_wrapped.
	* cfgcleanup.c (outgoing_edges_match): If shrink-wrapped, edges that
	are not jumps or sibcalls can't be compared.

	* gcc.target/i386/sw-1.c: New test.

From-SVN: r179553
This commit is contained in:
Bernd Schmidt 2011-10-05 12:59:23 +00:00 committed by Bernd Schmidt
parent 18474649d9
commit 484db665a3
11 changed files with 707 additions and 149 deletions

View File

@ -1,3 +1,28 @@
2011-10-05 Bernd Schmidt <bernds@codesourcery.com>
* doc/invoke.texi (-fshrink-wrap): Document.
* opts.c (default_options_table): Add it.
* common.opt (fshrink-wrap): Add.
* function.c (emit_return_into_block): Remove useless declaration.
(record_hard_reg_uses_1, record_hard_reg_uses, frame_required_for_rtx,
requires_stack_frame_p, gen_return_pattern): New static functions.
(emit_return_into_block): New arg simple_p. All callers changed.
Use gen_return_pattern.
(thread_prologue_and_epilogue_insns): Implement shrink-wrapping.
* config/i386/i386.md (return): Expand into a simple_return.
(simple_return): New expander):
(simple_return_internal, simple_return_internal_long,
simple_return_pop_internal_long, simple_return_indirect_internal):
Renamed from return_internal, return_internal_long,
return_pop_internal_long and return_indirect_internal; changed to use
simple_return.
* config/i386/i386.c (ix86_expand_epilogue): Adjust to expand
simple returns.
(ix86_pad_returns): Likewise.
* function.h (struct rtl_data): Add member shrink_wrapped.
* cfgcleanup.c (outgoing_edges_match): If shrink-wrapped, edges that
are not jumps or sibcalls can't be compared.
2011-10-05 Richard Guenther <rguenther@suse.de>
* tree-ssa-sccvn.c (vn_get_expr_for): Handle CONSTRUCTOR of

View File

@ -1488,6 +1488,16 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2)
edge e1, e2;
edge_iterator ei;
/* If we performed shrink-wrapping, edges to the EXIT_BLOCK_PTR can
only be distinguished for JUMP_INSNs. The two paths may differ in
whether they went through the prologue. Sibcalls are fine, we know
that we either didn't need or inserted an epilogue before them. */
if (crtl->shrink_wrapped
&& single_succ_p (bb1) && single_succ (bb1) == EXIT_BLOCK_PTR
&& !JUMP_P (BB_END (bb1))
&& !(CALL_P (BB_END (bb1)) && SIBLING_CALL_P (BB_END (bb1))))
return false;
/* If BB1 has only one successor, we may be looking at either an
unconditional jump, or a fake edge to exit. */
if (single_succ_p (bb1)

View File

@ -1772,6 +1772,11 @@ fshow-column
Common Report Var(flag_show_column) Init(1)
Show column numbers in diagnostics, when available. Default on
fshrink-wrap
Common Report Var(flag_shrink_wrap) Optimization
Emit function prologues only before parts of the function that need it,
rather than at the top of the function.
fsignaling-nans
Common Report Var(flag_signaling_nans) Optimization SetByCombined
Disable optimizations observable by IEEE signaling NaNs

View File

@ -10779,13 +10779,13 @@ ix86_expand_epilogue (int style)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
popc, -1, true);
emit_jump_insn (gen_return_indirect_internal (ecx));
emit_jump_insn (gen_simple_return_indirect_internal (ecx));
}
else
emit_jump_insn (gen_return_pop_internal (popc));
emit_jump_insn (gen_simple_return_pop_internal (popc));
}
else
emit_jump_insn (gen_return_internal ());
emit_jump_insn (gen_simple_return_internal ());
/* Restore the state back to the state from the prologue,
so that it's correct for the next epilogue. */
@ -31312,7 +31312,7 @@ ix86_pad_returns (void)
}
if (replace)
{
emit_jump_insn_before (gen_return_internal_long (), ret);
emit_jump_insn_before (gen_simple_return_internal_long (), ret);
delete_insn (ret);
}
}

View File

@ -11695,19 +11695,31 @@
;; See comments for ix86_can_use_return_insn_p in i386.c.
(define_expand "return"
[(return)]
[(simple_return)]
"ix86_can_use_return_insn_p ()"
{
if (crtl->args.pops_args)
{
rtx popc = GEN_INT (crtl->args.pops_args);
emit_jump_insn (gen_return_pop_internal (popc));
emit_jump_insn (gen_simple_return_pop_internal (popc));
DONE;
}
})
(define_insn "return_internal"
[(return)]
(define_expand "simple_return"
[(simple_return)]
""
{
if (crtl->args.pops_args)
{
rtx popc = GEN_INT (crtl->args.pops_args);
emit_jump_insn (gen_simple_return_pop_internal (popc));
DONE;
}
})
(define_insn "simple_return_internal"
[(simple_return)]
"reload_completed"
"ret"
[(set_attr "length" "1")
@ -11718,8 +11730,8 @@
;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
;; instruction Athlon and K8 have.
(define_insn "return_internal_long"
[(return)
(define_insn "simple_return_internal_long"
[(simple_return)
(unspec [(const_int 0)] UNSPEC_REP)]
"reload_completed"
"rep\;ret"
@ -11729,8 +11741,8 @@
(set_attr "prefix_rep" "1")
(set_attr "modrm" "0")])
(define_insn "return_pop_internal"
[(return)
(define_insn "simple_return_pop_internal"
[(simple_return)
(use (match_operand:SI 0 "const_int_operand" ""))]
"reload_completed"
"ret\t%0"
@ -11739,8 +11751,8 @@
(set_attr "length_immediate" "2")
(set_attr "modrm" "0")])
(define_insn "return_indirect_internal"
[(return)
(define_insn "simple_return_indirect_internal"
[(simple_return)
(use (match_operand:SI 0 "register_operand" "r"))]
"reload_completed"
"jmp\t%A0"

View File

@ -396,10 +396,10 @@ Objective-C and Objective-C++ Dialects}.
-fschedule-insns -fschedule-insns2 -fsection-anchors @gol
-fselective-scheduling -fselective-scheduling2 @gol
-fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
-fsignaling-nans -fsingle-precision-constant -fsplit-ivs-in-unroller @gol
-fsplit-wide-types -fstack-protector -fstack-protector-all @gol
-fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer @gol
-ftree-bit-ccp @gol
-fshrink-wrap -fsignaling-nans -fsingle-precision-constant @gol
-fsplit-ivs-in-unroller -fsplit-wide-types -fstack-protector @gol
-fstack-protector-all -fstrict-aliasing -fstrict-overflow @gol
-fthread-jumps -ftracer -ftree-bit-ccp @gol
-ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol
-ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
-ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol
@ -6880,6 +6880,12 @@ This option has no effect until one of @option{-fselective-scheduling} or
When pipelining loops during selective scheduling, also pipeline outer loops.
This option has no effect until @option{-fsel-sched-pipelining} is turned on.
@item -fshrink-wrap
@opindex fshrink-wrap
Emit function prologues only before parts of the function that need it,
rather than at the top of the function. This flag is enabled by default at
@option{-O} and higher.
@item -fcaller-saves
@opindex fcaller-saves
Enable values to be allocated in registers that will be clobbered by

View File

@ -147,9 +147,6 @@ extern tree debug_find_var_in_block_tree (tree, tree);
can always export `prologue_epilogue_contains'. */
static void record_insns (rtx, rtx, htab_t *) ATTRIBUTE_UNUSED;
static bool contains (const_rtx, htab_t);
#ifdef HAVE_return
static void emit_return_into_block (basic_block);
#endif
static void prepare_function_start (void);
static void do_clobber_return_reg (rtx, void *);
static void do_use_return_reg (rtx, void *);
@ -5285,6 +5282,78 @@ prologue_epilogue_contains (const_rtx insn)
return 0;
}
#ifdef HAVE_simple_return
/* A for_each_rtx subroutine of record_hard_reg_sets. */
static int
record_hard_reg_uses_1 (rtx *px, void *data)
{
rtx x = *px;
HARD_REG_SET *pused = (HARD_REG_SET *)data;
if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER)
add_to_hard_reg_set (pused, GET_MODE (x), REGNO (x));
return 0;
}
/* Like record_hard_reg_sets, but called through note_uses. */
static void
record_hard_reg_uses (rtx *px, void *data)
{
for_each_rtx (px, record_hard_reg_uses_1, data);
}
/* A subroutine of requires_stack_frame_p, called via for_each_rtx.
Return 1 if we found an rtx that forces a prologue, zero otherwise. */
static int
frame_required_for_rtx (rtx *loc, void *data ATTRIBUTE_UNUSED)
{
rtx x = *loc;
if (x == stack_pointer_rtx || x == hard_frame_pointer_rtx
|| x == arg_pointer_rtx || x == pic_offset_table_rtx)
return 1;
return 0;
}
/* Return true if INSN requires the stack frame to be set up.
PROLOGUE_USED contains the hard registers used in the function
prologue. */
static bool
requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used)
{
df_ref *def_rec;
HARD_REG_SET hardregs;
unsigned regno;
if (!INSN_P (insn) || DEBUG_INSN_P (insn))
return false;
if (CALL_P (insn))
return !SIBLING_CALL_P (insn);
if (for_each_rtx (&PATTERN (insn), frame_required_for_rtx, NULL))
return true;
CLEAR_HARD_REG_SET (hardregs);
for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
{
rtx dreg = DF_REF_REG (*def_rec);
if (!REG_P (dreg))
continue;
add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
REGNO (dreg));
}
if (hard_reg_set_intersect_p (hardregs, prologue_used))
return true;
AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (TEST_HARD_REG_BIT (hardregs, regno)
&& df_regs_ever_live_p (regno))
return true;
return false;
}
#endif
#ifdef HAVE_return
/* Insert use of return register before the end of BB. */
@ -5299,45 +5368,145 @@ emit_use_return_register_into_block (basic_block bb)
emit_insn_before (seq, BB_END (bb));
}
/* Insert gen_return at the end of block BB. This also means updating
block_for_insn appropriately. */
/* Create a return pattern, either simple_return or return, depending on
simple_p. */
static rtx
gen_return_pattern (bool simple_p)
{
#ifdef HAVE_simple_return
return simple_p ? gen_simple_return () : gen_return ();
#else
gcc_assert (!simple_p);
return gen_return ();
#endif
}
/* Insert an appropriate return pattern at the end of block BB. This
also means updating block_for_insn appropriately. SIMPLE_P is
the same as in gen_return_pattern and passed to it. */
static void
emit_return_into_block (basic_block bb)
emit_return_into_block (bool simple_p, basic_block bb)
{
rtx jump = emit_jump_insn_after (gen_return (), BB_END (bb));
rtx pat = PATTERN (jump);
rtx jump, pat;
jump = emit_jump_insn_after (gen_return_pattern (simple_p), BB_END (bb));
pat = PATTERN (jump);
if (GET_CODE (pat) == PARALLEL)
pat = XVECEXP (pat, 0, 0);
gcc_assert (ANY_RETURN_P (pat));
JUMP_LABEL (jump) = pat;
}
#endif /* HAVE_return */
#endif
/* Generate the prologue and epilogue RTL if the machine supports it. Thread
this into place with notes indicating where the prologue ends and where
the epilogue begins. Update the basic block information when possible. */
the epilogue begins. Update the basic block information when possible.
Notes on epilogue placement:
There are several kinds of edges to the exit block:
* a single fallthru edge from LAST_BB
* possibly, edges from blocks containing sibcalls
* possibly, fake edges from infinite loops
The epilogue is always emitted on the fallthru edge from the last basic
block in the function, LAST_BB, into the exit block.
If LAST_BB is empty except for a label, it is the target of every
other basic block in the function that ends in a return. If a
target has a return or simple_return pattern (possibly with
conditional variants), these basic blocks can be changed so that a
return insn is emitted into them, and their target is adjusted to
the real exit block.
Notes on shrink wrapping: We implement a fairly conservative
version of shrink-wrapping rather than the textbook one. We only
generate a single prologue and a single epilogue. This is
sufficient to catch a number of interesting cases involving early
exits.
First, we identify the blocks that require the prologue to occur before
them. These are the ones that modify a call-saved register, or reference
any of the stack or frame pointer registers. To simplify things, we then
mark everything reachable from these blocks as also requiring a prologue.
This takes care of loops automatically, and avoids the need to examine
whether MEMs reference the frame, since it is sufficient to check for
occurrences of the stack or frame pointer.
We then compute the set of blocks for which the need for a prologue
is anticipatable (borrowing terminology from the shrink-wrapping
description in Muchnick's book). These are the blocks which either
require a prologue themselves, or those that have only successors
where the prologue is anticipatable. The prologue needs to be
inserted on all edges from BB1->BB2 where BB2 is in ANTIC and BB1
is not. For the moment, we ensure that only one such edge exists.
The epilogue is placed as described above, but we make a
distinction between inserting return and simple_return patterns
when modifying other blocks that end in a return. Blocks that end
in a sibcall omit the sibcall_epilogue if the block is not in
ANTIC. */
static void
thread_prologue_and_epilogue_insns (void)
{
bool inserted;
basic_block last_bb;
bool last_bb_active;
#ifdef HAVE_simple_return
bool unconverted_simple_returns = false;
basic_block simple_return_block_hot = NULL;
basic_block simple_return_block_cold = NULL;
bool nonempty_prologue;
#endif
rtx returnjump ATTRIBUTE_UNUSED;
rtx seq ATTRIBUTE_UNUSED, epilogue_end ATTRIBUTE_UNUSED;
edge entry_edge, e;
rtx prologue_seq ATTRIBUTE_UNUSED, split_prologue_seq ATTRIBUTE_UNUSED;
edge e, entry_edge, orig_entry_edge, exit_fallthru_edge;
edge_iterator ei;
bitmap_head bb_flags;
df_analyze ();
rtl_profile_for_bb (ENTRY_BLOCK_PTR);
inserted = false;
seq = NULL_RTX;
epilogue_end = NULL_RTX;
returnjump = NULL_RTX;
/* Can't deal with multiple successors of the entry block at the
moment. Function should always have at least one entry
point. */
gcc_assert (single_succ_p (ENTRY_BLOCK_PTR));
entry_edge = single_succ_edge (ENTRY_BLOCK_PTR);
orig_entry_edge = entry_edge;
exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR->preds);
if (exit_fallthru_edge != NULL)
{
rtx label;
last_bb = exit_fallthru_edge->src;
/* Test whether there are active instructions in the last block. */
label = BB_END (last_bb);
while (label && !LABEL_P (label))
{
if (active_insn_p (label))
break;
label = PREV_INSN (label);
}
last_bb_active = BB_HEAD (last_bb) != label || !LABEL_P (label);
}
else
{
last_bb = NULL;
last_bb_active = false;
}
split_prologue_seq = NULL_RTX;
if (flag_split_stack
&& (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
== NULL))
@ -5349,17 +5518,15 @@ thread_prologue_and_epilogue_insns (void)
start_sequence ();
emit_insn (gen_split_stack_prologue ());
seq = get_insns ();
split_prologue_seq = get_insns ();
end_sequence ();
record_insns (seq, NULL, &prologue_insn_hash);
set_insn_locators (seq, prologue_locator);
insert_insn_on_edge (seq, entry_edge);
inserted = true;
record_insns (split_prologue_seq, NULL, &prologue_insn_hash);
set_insn_locators (split_prologue_seq, prologue_locator);
#endif
}
prologue_seq = NULL_RTX;
#ifdef HAVE_prologue
if (HAVE_prologue)
{
@ -5382,15 +5549,222 @@ thread_prologue_and_epilogue_insns (void)
if (!targetm.profile_before_prologue () && crtl->profile)
emit_insn (gen_blockage ());
seq = get_insns ();
prologue_seq = get_insns ();
end_sequence ();
set_insn_locators (seq, prologue_locator);
insert_insn_on_edge (seq, entry_edge);
inserted = true;
set_insn_locators (prologue_seq, prologue_locator);
}
#endif
bitmap_initialize (&bb_flags, &bitmap_default_obstack);
#ifdef HAVE_simple_return
/* Try to perform a kind of shrink-wrapping, making sure the
prologue/epilogue is emitted only around those parts of the
function that require it. */
nonempty_prologue = false;
for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
{
nonempty_prologue = true;
break;
}
if (flag_shrink_wrap && HAVE_simple_return
&& nonempty_prologue && !crtl->calls_eh_return)
{
HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
rtx p_insn;
VEC(basic_block, heap) *vec;
basic_block bb;
bitmap_head bb_antic_flags;
bitmap_head bb_on_list;
if (dump_file)
fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
/* Compute the registers set and used in the prologue. */
CLEAR_HARD_REG_SET (prologue_clobbered);
CLEAR_HARD_REG_SET (prologue_used);
for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
{
HARD_REG_SET this_used;
if (!NONDEBUG_INSN_P (p_insn))
continue;
CLEAR_HARD_REG_SET (this_used);
note_uses (&PATTERN (p_insn), record_hard_reg_uses,
&this_used);
AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
IOR_HARD_REG_SET (prologue_used, this_used);
note_stores (PATTERN (p_insn), record_hard_reg_sets,
&prologue_clobbered);
}
for (p_insn = split_prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
if (NONDEBUG_INSN_P (p_insn))
note_stores (PATTERN (p_insn), record_hard_reg_sets,
&prologue_clobbered);
bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
/* Find the set of basic blocks that require a stack frame. */
vec = VEC_alloc (basic_block, heap, n_basic_blocks);
FOR_EACH_BB (bb)
{
rtx insn;
/* As a special case, check for jumps to the last bb that
cannot successfully be converted to simple_returns later
on, and mark them as requiring a frame. These are
conditional jumps that jump to their fallthru block, so
it's not a case that is expected to occur often. */
if (JUMP_P (BB_END (bb)) && any_condjump_p (BB_END (bb))
&& single_succ_p (bb)
&& !last_bb_active
&& single_succ (bb) == last_bb)
{
bitmap_set_bit (&bb_flags, bb->index);
VEC_quick_push (basic_block, vec, bb);
}
else
FOR_BB_INSNS (bb, insn)
if (requires_stack_frame_p (insn, prologue_used))
{
bitmap_set_bit (&bb_flags, bb->index);
VEC_quick_push (basic_block, vec, bb);
break;
}
}
/* For every basic block that needs a prologue, mark all blocks
reachable from it, so as to ensure they are also seen as
requiring a prologue. */
while (!VEC_empty (basic_block, vec))
{
basic_block tmp_bb = VEC_pop (basic_block, vec);
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, tmp_bb->succs)
if (e->dest != EXIT_BLOCK_PTR
&& bitmap_set_bit (&bb_flags, e->dest->index))
VEC_quick_push (basic_block, vec, e->dest);
}
/* If the last basic block contains only a label, we'll be able
to convert jumps to it to (potentially conditional) return
insns later. This means we don't necessarily need a prologue
for paths reaching it. */
if (last_bb)
{
if (!last_bb_active)
bitmap_clear_bit (&bb_flags, last_bb->index);
else if (!bitmap_bit_p (&bb_flags, last_bb->index))
goto fail_shrinkwrap;
}
/* Now walk backwards from every block that is marked as needing
a prologue to compute the bb_antic_flags bitmap. */
bitmap_copy (&bb_antic_flags, &bb_flags);
FOR_EACH_BB (bb)
{
edge e;
edge_iterator ei;
if (!bitmap_bit_p (&bb_flags, bb->index))
continue;
FOR_EACH_EDGE (e, ei, bb->preds)
if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
&& bitmap_set_bit (&bb_on_list, e->src->index))
VEC_quick_push (basic_block, vec, e->src);
}
while (!VEC_empty (basic_block, vec))
{
basic_block tmp_bb = VEC_pop (basic_block, vec);
edge e;
edge_iterator ei;
bool all_set = true;
bitmap_clear_bit (&bb_on_list, tmp_bb->index);
FOR_EACH_EDGE (e, ei, tmp_bb->succs)
if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
{
all_set = false;
break;
}
if (all_set)
{
bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
FOR_EACH_EDGE (e, ei, tmp_bb->preds)
if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
&& bitmap_set_bit (&bb_on_list, e->src->index))
VEC_quick_push (basic_block, vec, e->src);
}
}
/* Find exactly one edge that leads to a block in ANTIC from
a block that isn't. */
if (!bitmap_bit_p (&bb_antic_flags, entry_edge->dest->index))
FOR_EACH_BB (bb)
{
if (!bitmap_bit_p (&bb_antic_flags, bb->index))
continue;
FOR_EACH_EDGE (e, ei, bb->preds)
if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
{
if (entry_edge != orig_entry_edge)
{
entry_edge = orig_entry_edge;
if (dump_file)
fprintf (dump_file, "More than one candidate edge.\n");
goto fail_shrinkwrap;
}
if (dump_file)
fprintf (dump_file, "Found candidate edge for "
"shrink-wrapping, %d->%d.\n", e->src->index,
e->dest->index);
entry_edge = e;
}
}
/* Test whether the prologue is known to clobber any register
(other than FP or SP) which are live on the edge. */
CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
if (frame_pointer_needed)
CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
CLEAR_HARD_REG_SET (live_on_edge);
reg_set_to_hard_reg_set (&live_on_edge,
df_get_live_in (entry_edge->dest));
if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
{
entry_edge = orig_entry_edge;
if (dump_file)
fprintf (dump_file, "Shrink-wrapping aborted due to clobber.\n");
}
else if (dump_file && entry_edge != orig_entry_edge)
{
crtl->shrink_wrapped = true;
fprintf (dump_file, "Performing shrink-wrapping.\n");
}
fail_shrinkwrap:
bitmap_clear (&bb_antic_flags);
bitmap_clear (&bb_on_list);
VEC_free (basic_block, heap, vec);
}
#endif
if (split_prologue_seq != NULL_RTX)
{
insert_insn_on_edge (split_prologue_seq, entry_edge);
inserted = true;
}
if (prologue_seq != NULL_RTX)
{
insert_insn_on_edge (prologue_seq, entry_edge);
inserted = true;
}
/* If the exit block has no non-fake predecessors, we don't need
an epilogue. */
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
@ -5400,110 +5774,145 @@ thread_prologue_and_epilogue_insns (void)
goto epilogue_done;
rtl_profile_for_bb (EXIT_BLOCK_PTR);
#ifdef HAVE_return
if (optimize && HAVE_return)
/* If we're allowed to generate a simple return instruction, then by
definition we don't need a full epilogue. If the last basic
block before the exit block does not contain active instructions,
examine its predecessors and try to emit (conditional) return
instructions. */
if (optimize && !last_bb_active
&& (HAVE_return || entry_edge != orig_entry_edge))
{
/* If we're allowed to generate a simple return instruction,
then by definition we don't need a full epilogue. Examine
the block that falls through to EXIT. If it does not
contain any code, examine its predecessors and try to
emit (conditional) return instructions. */
basic_block last;
edge_iterator ei2;
int i;
basic_block bb;
rtx label;
VEC(basic_block,heap) *src_bbs;
e = find_fallthru_edge (EXIT_BLOCK_PTR->preds);
if (e == NULL)
if (exit_fallthru_edge == NULL)
goto epilogue_done;
last = e->src;
label = BB_HEAD (last_bb);
/* Verify that there are no active instructions in the last block. */
label = BB_END (last);
while (label && !LABEL_P (label))
src_bbs = VEC_alloc (basic_block, heap, EDGE_COUNT (last_bb->preds));
FOR_EACH_EDGE (e, ei2, last_bb->preds)
if (e->src != ENTRY_BLOCK_PTR)
VEC_quick_push (basic_block, src_bbs, e->src);
FOR_EACH_VEC_ELT (basic_block, src_bbs, i, bb)
{
if (active_insn_p (label))
break;
label = PREV_INSN (label);
}
bool simple_p;
rtx jump;
e = find_edge (bb, last_bb);
if (BB_HEAD (last) == label && LABEL_P (label))
{
edge_iterator ei2;
jump = BB_END (bb);
for (ei2 = ei_start (last->preds); (e = ei_safe_edge (ei2)); )
#ifdef HAVE_simple_return
simple_p = (entry_edge != orig_entry_edge
&& !bitmap_bit_p (&bb_flags, bb->index));
#else
simple_p = false;
#endif
if (!simple_p
&& (!HAVE_return || !JUMP_P (jump)
|| JUMP_LABEL (jump) != label))
continue;
/* If we have an unconditional jump, we can replace that
with a simple return instruction. */
if (!JUMP_P (jump))
{
basic_block bb = e->src;
rtx jump;
emit_barrier_after (BB_END (bb));
emit_return_into_block (simple_p, bb);
}
else if (simplejump_p (jump))
{
/* The use of the return register might be present in the exit
fallthru block. Either:
- removing the use is safe, and we should remove the use in
the exit fallthru block, or
- removing the use is not safe, and we should add it here.
For now, we conservatively choose the latter. Either of the
2 helps in crossjumping. */
emit_use_return_register_into_block (bb);
if (bb == ENTRY_BLOCK_PTR)
{
ei_next (&ei2);
continue;
}
emit_return_into_block (simple_p, bb);
delete_insn (jump);
}
else if (condjump_p (jump) && JUMP_LABEL (jump) != label)
{
basic_block new_bb;
edge new_e;
jump = BB_END (bb);
if (!JUMP_P (jump) || JUMP_LABEL (jump) != label)
{
ei_next (&ei2);
continue;
}
/* If we have an unconditional jump, we can replace that
with a simple return instruction. */
if (simplejump_p (jump))
{
/* The use of the return register might be present in the exit
fallthru block. Either:
- removing the use is safe, and we should remove the use in
the exit fallthru block, or
- removing the use is not safe, and we should add it here.
For now, we conservatively choose the latter. Either of the
2 helps in crossjumping. */
emit_use_return_register_into_block (bb);
emit_return_into_block (bb);
delete_insn (jump);
}
/* If we have a conditional jump, we can try to replace
that with a conditional return instruction. */
else if (condjump_p (jump))
{
if (! redirect_jump (jump, ret_rtx, 0))
{
ei_next (&ei2);
continue;
}
/* See comment in simple_jump_p case above. */
emit_use_return_register_into_block (bb);
/* If this block has only one successor, it both jumps
and falls through to the fallthru block, so we can't
delete the edge. */
if (single_succ_p (bb))
{
ei_next (&ei2);
continue;
}
}
gcc_assert (simple_p);
new_bb = split_edge (e);
emit_barrier_after (BB_END (new_bb));
emit_return_into_block (simple_p, new_bb);
#ifdef HAVE_simple_return
if (BB_PARTITION (new_bb) == BB_HOT_PARTITION)
simple_return_block_hot = new_bb;
else
simple_return_block_cold = new_bb;
#endif
new_e = single_succ_edge (new_bb);
redirect_edge_succ (new_e, EXIT_BLOCK_PTR);
continue;
}
/* If we have a conditional jump branching to the last
block, we can try to replace that with a conditional
return instruction. */
else if (condjump_p (jump))
{
rtx dest;
if (simple_p)
dest = simple_return_rtx;
else
dest = ret_rtx;
if (! redirect_jump (jump, dest, 0))
{
ei_next (&ei2);
#ifdef HAVE_simple_return
if (simple_p)
unconverted_simple_returns = true;
#endif
continue;
}
/* Fix up the CFG for the successful change we just made. */
redirect_edge_succ (e, EXIT_BLOCK_PTR);
/* See comment in simple_jump_p case above. */
emit_use_return_register_into_block (bb);
/* If this block has only one successor, it both jumps
and falls through to the fallthru block, so we can't
delete the edge. */
if (single_succ_p (bb))
continue;
}
else
{
#ifdef HAVE_simple_return
if (simple_p)
unconverted_simple_returns = true;
#endif
continue;
}
/* Fix up the CFG for the successful change we just made. */
redirect_edge_succ (e, EXIT_BLOCK_PTR);
}
VEC_free (basic_block, heap, src_bbs);
if (HAVE_return)
{
/* Emit a return insn for the exit fallthru block. Whether
this is still reachable will be determined later. */
emit_barrier_after (BB_END (last));
emit_return_into_block (last);
epilogue_end = BB_END (last);
single_succ_edge (last)->flags &= ~EDGE_FALLTHRU;
emit_barrier_after (BB_END (last_bb));
emit_return_into_block (false, last_bb);
epilogue_end = BB_END (last_bb);
if (JUMP_P (epilogue_end))
JUMP_LABEL (epilogue_end) = ret_rtx;
single_succ_edge (last_bb)->flags &= ~EDGE_FALLTHRU;
goto epilogue_done;
}
}
@ -5540,20 +5949,15 @@ thread_prologue_and_epilogue_insns (void)
}
#endif
/* Find the edge that falls through to EXIT. Other edges may exist
due to RETURN instructions, but those don't need epilogues.
There really shouldn't be a mixture -- either all should have
been converted or none, however... */
/* If nothing falls through into the exit block, we don't need an
epilogue. */
e = find_fallthru_edge (EXIT_BLOCK_PTR->preds);
if (e == NULL)
if (exit_fallthru_edge == NULL)
goto epilogue_done;
#ifdef HAVE_epilogue
if (HAVE_epilogue)
{
rtx returnjump;
start_sequence ();
epilogue_end = emit_note (NOTE_INSN_EPILOGUE_BEG);
seq = gen_epilogue ();
@ -5564,11 +5968,11 @@ thread_prologue_and_epilogue_insns (void)
record_insns (seq, NULL, &epilogue_insn_hash);
set_insn_locators (seq, epilogue_locator);
returnjump = get_last_insn ();
seq = get_insns ();
returnjump = get_last_insn ();
end_sequence ();
insert_insn_on_edge (seq, e);
insert_insn_on_edge (seq, exit_fallthru_edge);
inserted = true;
if (JUMP_P (returnjump))
@ -5581,23 +5985,21 @@ thread_prologue_and_epilogue_insns (void)
else
JUMP_LABEL (returnjump) = ret_rtx;
}
else
returnjump = NULL_RTX;
}
else
#endif
{
basic_block cur_bb;
if (! next_active_insn (BB_END (e->src)))
if (! next_active_insn (BB_END (exit_fallthru_edge->src)))
goto epilogue_done;
/* We have a fall-through edge to the exit block, the source is not
at the end of the function, and there will be an assembler epilogue
at the end of the function.
We can't use force_nonfallthru here, because that would try to
use return. Inserting a jump 'by hand' is extremely messy, so
use return. Inserting a jump 'by hand' is extremely messy, so
we take advantage of cfg_layout_finalize using
fixup_fallthru_exit_predecessor. */
fixup_fallthru_exit_predecessor. */
cfg_layout_initialize (0);
FOR_EACH_BB (cur_bb)
if (cur_bb->index >= NUM_FIXED_BLOCKS
@ -5607,6 +6009,7 @@ thread_prologue_and_epilogue_insns (void)
}
epilogue_done:
default_rtl_profile ();
if (inserted)
@ -5632,33 +6035,102 @@ epilogue_done:
}
}
#ifdef HAVE_simple_return
/* If there were branches to an empty LAST_BB which we tried to
convert to conditional simple_returns, but couldn't for some
reason, create a block to hold a simple_return insn and redirect
those remaining edges. */
if (unconverted_simple_returns)
{
edge_iterator ei2;
basic_block exit_pred = EXIT_BLOCK_PTR->prev_bb;
gcc_assert (entry_edge != orig_entry_edge);
/* See if we can reuse the last insn that was emitted for the
epilogue. */
if (returnjump != NULL_RTX
&& JUMP_LABEL (returnjump) == simple_return_rtx)
{
edge e = split_block (exit_fallthru_edge->src,
PREV_INSN (returnjump));
if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
simple_return_block_hot = e->dest;
else
simple_return_block_cold = e->dest;
}
restart_scan:
for (ei2 = ei_start (last_bb->preds); (e = ei_safe_edge (ei2)); )
{
basic_block bb = e->src;
basic_block *pdest_bb;
if (bb == ENTRY_BLOCK_PTR
|| bitmap_bit_p (&bb_flags, bb->index))
{
ei_next (&ei2);
continue;
}
if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
pdest_bb = &simple_return_block_hot;
else
pdest_bb = &simple_return_block_cold;
if (*pdest_bb == NULL)
{
basic_block bb;
rtx start;
bb = create_basic_block (NULL, NULL, exit_pred);
BB_COPY_PARTITION (bb, e->src);
start = emit_jump_insn_after (gen_simple_return (),
BB_END (bb));
JUMP_LABEL (start) = simple_return_rtx;
emit_barrier_after (start);
*pdest_bb = bb;
make_edge (bb, EXIT_BLOCK_PTR, 0);
}
redirect_edge_and_branch_force (e, *pdest_bb);
goto restart_scan;
}
}
#endif
#ifdef HAVE_sibcall_epilogue
/* Emit sibling epilogues before any sibling call sites. */
for (ei = ei_start (EXIT_BLOCK_PTR->preds); (e = ei_safe_edge (ei)); )
{
basic_block bb = e->src;
rtx insn = BB_END (bb);
rtx ep_seq;
if (!CALL_P (insn)
|| ! SIBLING_CALL_P (insn))
|| ! SIBLING_CALL_P (insn)
|| (entry_edge != orig_entry_edge
&& !bitmap_bit_p (&bb_flags, bb->index)))
{
ei_next (&ei);
continue;
}
start_sequence ();
emit_note (NOTE_INSN_EPILOGUE_BEG);
emit_insn (gen_sibcall_epilogue ());
seq = get_insns ();
end_sequence ();
ep_seq = gen_sibcall_epilogue ();
if (ep_seq)
{
start_sequence ();
emit_note (NOTE_INSN_EPILOGUE_BEG);
emit_insn (ep_seq);
seq = get_insns ();
end_sequence ();
/* Retain a map of the epilogue insns. Used in life analysis to
avoid getting rid of sibcall epilogue insns. Do this before we
actually emit the sequence. */
record_insns (seq, NULL, &epilogue_insn_hash);
set_insn_locators (seq, epilogue_locator);
/* Retain a map of the epilogue insns. Used in life analysis to
avoid getting rid of sibcall epilogue insns. Do this before we
actually emit the sequence. */
record_insns (seq, NULL, &epilogue_insn_hash);
set_insn_locators (seq, epilogue_locator);
emit_insn_before (seq, insn);
emit_insn_before (seq, insn);
}
ei_next (&ei);
}
#endif
@ -5683,6 +6155,8 @@ epilogue_done:
}
#endif
bitmap_clear (&bb_flags);
/* Threading the prologue and epilogue changes the artificial refs
in the entry and exit blocks. */
epilogue_completed = 1;

View File

@ -435,6 +435,9 @@ struct GTY(()) rtl_data {
function where currently compiled version of it is nothrow. */
bool nothrow;
/* True if we performed shrink-wrapping for the current function. */
bool shrink_wrapped;
/* Like regs_ever_live, but 1 if a reg is set or clobbered from an
asm. Unlike regs_ever_live, elements of this array corresponding
to eliminable regs (like the frame pointer) are set if an asm

View File

@ -434,6 +434,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_1_PLUS, OPT_fipa_reference, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_fipa_profile, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_fmerge_constants, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_fshrink_wrap, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_ccp, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_bit_ccp, NULL, 1 },

View File

@ -1,3 +1,7 @@
2011-10-05 Bernd Schmidt <bernds@codesourcery.com>
* gcc.target/i386/sw-1.c: New test.
2011-10-05 Uros Bizjak <ubizjak@gmail.com>
* gcc.target/i386/avx256-unaligned-load-3.c (dg-options): Add

View File

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fshrink-wrap -fdump-rtl-pro_and_epilogue" } */
#include <string.h>
int c;
int x[2000];
__attribute__((regparm(1))) void foo (int a, int b)
{
int t[200];
if (a == 0 || c == 0)
return;
memcpy (t, x + b, sizeof t);
c = t[a];
}
/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue" } } */
/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */