bb-reorder: Improve compgotos pass (PR71785)

For code like the testcase in PR71785 GCC factors all the indirect branches
to a single dispatcher that then everything jumps to.  This is because
having many indirect branches with each many jump targets does not scale
in large parts of the compiler.  Very late in the pass pipeline (right
before peephole2) the indirect branches are then unfactored again, by
the duplicate_computed_gotos pass.

This pass works by replacing branches to such a common dispatcher by a
copy of the dispatcher.  For code like this testcase this does not work
so well: most cases do a single addition instruction right before the
dispatcher, but not all, and we end up with only two indirect jumps: the
one without the addition, and the one with the addition in its own basic
block, and now everything else jumps _there_.

This patch rewrites the algorithm to deal with this.  It also makes it
simpler: it does not need the "candidates" array anymore, it does not
need RTL layout mode, it does not need cleanup_cfg, and it does not
need to keep track of what blocks it already visited.


	PR rtl-optimization/71785
	* bb-reorder.c (maybe_duplicate_computed_goto): New function.
	(duplicate_computed_gotos): New function.
	(pass_duplicate_computed_gotos::execute): Rewrite.

From-SVN: r242584
This commit is contained in:
Segher Boessenkool 2016-11-18 10:14:52 +01:00 committed by Segher Boessenkool
parent 9bd0f0bc6b
commit cd36a4518d
2 changed files with 101 additions and 121 deletions

View File

@ -1,3 +1,10 @@
2016-11-18 Segher Boessenkool <segher@kernel.crashing.org>
PR rtl-optimization/71785
* bb-reorder.c (maybe_duplicate_computed_goto): New function.
(duplicate_computed_gotos): New function.
(pass_duplicate_computed_gotos::execute): Rewrite.
2016-11-17 Jeff Law <law@redhat.com>
PR target/47192

View File

@ -2586,11 +2586,101 @@ make_pass_reorder_blocks (gcc::context *ctxt)
return new pass_reorder_blocks (ctxt);
}
/* Duplicate a block (that we already know ends in a computed jump) into its
predecessors, where possible. Return whether anything is changed. */
static bool
maybe_duplicate_computed_goto (basic_block bb, int max_size)
{
if (single_pred_p (bb))
return false;
/* Make sure that the block is small enough. */
rtx_insn *insn;
FOR_BB_INSNS (bb, insn)
if (INSN_P (insn))
{
max_size -= get_attr_min_length (insn);
if (max_size < 0)
return false;
}
bool changed = false;
edge e;
edge_iterator ei;
for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
{
basic_block pred = e->src;
/* Do not duplicate BB into PRED if that is the last predecessor, or if
we cannot merge a copy of BB with PRED. */
if (single_pred_p (bb)
|| !single_succ_p (pred)
|| e->flags & EDGE_COMPLEX
|| pred->index < NUM_FIXED_BLOCKS
|| (JUMP_P (BB_END (pred)) && !simplejump_p (BB_END (pred)))
|| (JUMP_P (BB_END (pred)) && CROSSING_JUMP_P (BB_END (pred))))
{
ei_next (&ei);
continue;
}
if (dump_file)
fprintf (dump_file, "Duplicating computed goto bb %d into bb %d\n",
bb->index, e->src->index);
/* Remember if PRED can be duplicated; if so, the copy of BB merged
with PRED can be duplicated as well. */
bool can_dup_more = can_duplicate_block_p (pred);
/* Make a copy of BB, merge it into PRED. */
basic_block copy = duplicate_block (bb, e, NULL);
emit_barrier_after_bb (copy);
reorder_insns_nobb (BB_HEAD (copy), BB_END (copy), BB_END (pred));
merge_blocks (pred, copy);
changed = true;
/* Try to merge the resulting merged PRED into further predecessors. */
if (can_dup_more)
maybe_duplicate_computed_goto (pred, max_size);
}
return changed;
}
/* Duplicate the blocks containing computed gotos. This basically unfactors
computed gotos that were factored early on in the compilation process to
speed up edge based data flow. We used to not unfactoring them again,
which can seriously pessimize code with many computed jumps in the source
code, such as interpreters. See e.g. PR15242. */
speed up edge based data flow. We used to not unfactor them again, which
can seriously pessimize code with many computed jumps in the source code,
such as interpreters. See e.g. PR15242. */
static void
duplicate_computed_gotos (function *fun)
{
/* We are estimating the length of uncond jump insn only once
since the code for getting the insn length always returns
the minimal length now. */
if (uncond_jump_length == 0)
uncond_jump_length = get_uncond_jump_length ();
/* Never copy a block larger than this. */
int max_size
= uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
bool changed = false;
/* Try to duplicate all blocks that end in a computed jump and that
can be duplicated at all. */
basic_block bb;
FOR_EACH_BB_FN (bb, fun)
if (computed_jump_p (BB_END (bb)) && can_duplicate_block_p (bb))
changed |= maybe_duplicate_computed_goto (bb, max_size);
/* Duplicating blocks will redirect edges and may cause hot blocks
previously reached by both hot and cold blocks to become dominated
only by cold blocks. */
if (changed)
fixup_partitions ();
}
namespace {
@ -2633,125 +2723,8 @@ pass_duplicate_computed_gotos::gate (function *fun)
unsigned int
pass_duplicate_computed_gotos::execute (function *fun)
{
basic_block bb, new_bb;
bitmap candidates;
int max_size;
bool changed = false;
duplicate_computed_gotos (fun);
if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
return 0;
clear_bb_flags ();
cfg_layout_initialize (0);
/* We are estimating the length of uncond jump insn only once
since the code for getting the insn length always returns
the minimal length now. */
if (uncond_jump_length == 0)
uncond_jump_length = get_uncond_jump_length ();
max_size
= uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
candidates = BITMAP_ALLOC (NULL);
/* Look for blocks that end in a computed jump, and see if such blocks
are suitable for unfactoring. If a block is a candidate for unfactoring,
mark it in the candidates. */
FOR_EACH_BB_FN (bb, fun)
{
rtx_insn *insn;
edge e;
edge_iterator ei;
int size, all_flags;
/* Build the reorder chain for the original order of blocks. */
if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
bb->aux = bb->next_bb;
/* Obviously the block has to end in a computed jump. */
if (!computed_jump_p (BB_END (bb)))
continue;
/* Only consider blocks that can be duplicated. */
if (CROSSING_JUMP_P (BB_END (bb))
|| !can_duplicate_block_p (bb))
continue;
/* Make sure that the block is small enough. */
size = 0;
FOR_BB_INSNS (bb, insn)
if (INSN_P (insn))
{
size += get_attr_min_length (insn);
if (size > max_size)
break;
}
if (size > max_size)
continue;
/* Final check: there must not be any incoming abnormal edges. */
all_flags = 0;
FOR_EACH_EDGE (e, ei, bb->preds)
all_flags |= e->flags;
if (all_flags & EDGE_COMPLEX)
continue;
bitmap_set_bit (candidates, bb->index);
}
/* Nothing to do if there is no computed jump here. */
if (bitmap_empty_p (candidates))
goto done;
/* Duplicate computed gotos. */
FOR_EACH_BB_FN (bb, fun)
{
if (bb->flags & BB_VISITED)
continue;
bb->flags |= BB_VISITED;
/* BB must have one outgoing edge. That edge must not lead to
the exit block or the next block.
The destination must have more than one predecessor. */
if (!single_succ_p (bb)
|| single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
|| single_succ (bb) == bb->next_bb
|| single_pred_p (single_succ (bb)))
continue;
/* The successor block has to be a duplication candidate. */
if (!bitmap_bit_p (candidates, single_succ (bb)->index))
continue;
/* Don't duplicate a partition crossing edge, which requires difficult
fixup. */
if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
continue;
new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
new_bb->aux = bb->aux;
bb->aux = new_bb;
new_bb->flags |= BB_VISITED;
changed = true;
}
done:
if (changed)
{
/* Duplicating blocks above will redirect edges and may cause hot
blocks previously reached by both hot and cold blocks to become
dominated only by cold blocks. */
fixup_partitions ();
/* Merge the duplicated blocks into predecessors, when possible. */
cfg_layout_finalize ();
cleanup_cfg (0);
}
else
cfg_layout_finalize ();
BITMAP_FREE (candidates);
return 0;
}