re PR rtl-optimization/15242 (pessimization of "goto *")

PR optimization/15242
	* params.def (PARAM_MAX_GOTO_DUPLICATION_INSNS): New param.
	* basic-block.h (duplicate_computed_gotos): Add prototype.
	* bb-reorder.c (duplicate_computed_gotos): New function to
	duplicate sufficiently small blocks ending in a computed jump.
	* passes.c (rest_of_compilation): Call duplicate_computed_gotos
	if not optimizing for size.
	* cfgcleanup.c (try_crossjump_bb): If not optimizing for size,
	never do tail merging for blocks ending in a computed jump.
	* doc/invoke.texi: Document the max-goto-duplication-insns param.

From-SVN: r94531
This commit is contained in:
Steven Bosscher 2005-02-01 10:03:15 +00:00 committed by Steven Bosscher
parent e88334a68f
commit bbcb0c056b
7 changed files with 140 additions and 2 deletions

View File

@ -1,3 +1,16 @@
2005-02-01 Steven Bosscher <stevenb@suse.de>
PR optimization/15242
* params.def (PARAM_MAX_GOTO_DUPLICATION_INSNS): New param.
* basic-block.h (duplicate_computed_gotos): Add prototype.
* bb-reorder.c (duplicate_computed_gotos): New function to
duplicate sufficiently small blocks ending in a computed jump.
* passes.c (rest_of_compilation): Call duplicate_computed_gotos
if not optimizing for size.
* cfgcleanup.c (try_crossjump_bb): If not optimizing for size,
never do tail merging for blocks ending in a computed jump.
* doc/invoke.texi: Document the max-goto-duplication-insns param.
2005-02-01 Eric Botcazou <ebotcazou@libertysurf.fr>
Patch from Richard Sandiford <rsandifo@redhat.com>

View File

@ -812,6 +812,7 @@ extern bool control_flow_insn_p (rtx);
/* In bb-reorder.c */
extern void reorder_basic_blocks (unsigned int);
extern void duplicate_computed_gotos (void);
extern void partition_hot_cold_basic_blocks (void);
/* In cfg.c */

View File

@ -81,6 +81,7 @@
#include "tm_p.h"
#include "obstack.h"
#include "expr.h"
#include "params.h"
/* The number of rounds. In most cases there will only be 4 rounds, but
when partitioning hot and cold basic blocks into separate sections of
@ -1189,8 +1190,7 @@ copy_bb_p (basic_block bb, int code_may_grow)
if (code_may_grow && maybe_hot_bb_p (bb))
max_size *= 8;
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
FOR_BB_INSNS (bb, insn)
{
if (INSN_P (insn))
size += get_attr_length (insn);
@ -1985,6 +1985,104 @@ reorder_basic_blocks (unsigned int flags)
timevar_pop (TV_REORDER_BLOCKS);
}
/* Duplicate the blocks containing computed gotos. This basically unfactors
computed gotos that were factored early on in the compilation process to
speed up edge based data flow. We used to not unfactoring them again,
which can seriously pessimize code with many computed jumps in the source
code, such as interpreters. See e.g. PR15242. */
void
duplicate_computed_gotos (void)
{
basic_block bb, new_bb;
bitmap candidates;
int max_size;
if (n_basic_blocks <= 1)
return;
if (targetm.cannot_modify_jumps_p ())
return;
timevar_push (TV_REORDER_BLOCKS);
cfg_layout_initialize (0);
/* We are estimating the length of uncond jump insn only once
since the code for getting the insn length always returns
the minimal length now. */
if (uncond_jump_length == 0)
uncond_jump_length = get_uncond_jump_length ();
max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
candidates = BITMAP_XMALLOC ();
/* Build the reorder chain for the original order of blocks.
Look for a computed jump while we are at it. */
FOR_EACH_BB (bb)
{
if (bb->next_bb != EXIT_BLOCK_PTR)
bb->rbi->next = bb->next_bb;
/* If the block ends in a computed jump and it is small enough,
make it a candidate for duplication. */
if (computed_jump_p (BB_END (bb)))
{
rtx insn;
int size = 0;
FOR_BB_INSNS (bb, insn)
{
if (INSN_P (insn))
size += get_attr_length (insn);
if (size > max_size)
break;
}
if (size <= max_size)
bitmap_set_bit (candidates, bb->index);
}
}
/* Nothing to do if there is no computed jump here. */
if (bitmap_empty_p (candidates))
goto done;
/* Duplicate computed gotos. */
FOR_EACH_BB (bb)
{
if (bb->rbi->visited)
continue;
bb->rbi->visited = 1;
/* BB must have one outgoing edge. That edge must not lead to
the exit block or the next block.
The destination must have more than one predecessor. */
if (EDGE_COUNT(bb->succs) != 1
|| EDGE_SUCC(bb,0)->dest == EXIT_BLOCK_PTR
|| EDGE_SUCC(bb,0)->dest == bb->next_bb
|| EDGE_COUNT(EDGE_SUCC(bb,0)->dest->preds) <= 1)
continue;
/* The successor block has to be a duplication candidate. */
if (!bitmap_bit_p (candidates, EDGE_SUCC(bb,0)->dest->index))
continue;
new_bb = duplicate_block (EDGE_SUCC(bb,0)->dest, EDGE_SUCC(bb,0));
new_bb->rbi->next = bb->rbi->next;
bb->rbi->next = new_bb;
new_bb->rbi->visited = 1;
}
done:
cfg_layout_finalize ();
BITMAP_XFREE (candidates);
timevar_pop (TV_REORDER_BLOCKS);
}
/* This function is the main 'entrance' for the optimization that
partitions hot and cold basic blocks into separate sections of the
.o file (to improve performance and cache locality). Ideally it

View File

@ -1707,6 +1707,13 @@ try_crossjump_bb (int mode, basic_block bb)
if (EDGE_COUNT (bb->preds) < 2)
return false;
/* Don't crossjump if this block ends in a computed jump,
unless we are optimizing for size. */
if (!optimize_size
&& bb != EXIT_BLOCK_PTR
&& computed_jump_p (BB_END (bb)))
return false;
/* If we are partitioning hot/cold basic blocks, we don't want to
mess up unconditional or indirect jumps that cross between hot
and cold sections.

View File

@ -5344,6 +5344,14 @@ of two blocks before crossjumping will be performed on them. This
value is ignored in the case where all instructions in the block being
crossjumped from are matched. The default value is 5.
@item max-goto-duplication-insns
The maximum number of instructions to duplicate to a block that jumps
to a computed goto. To avoid @math{O(N^2)} behavior in a number of
passes, GCC factors computed gotos early in the compilation process,
and unfactors them as late as possible. Only computed jumps at the
end of a basic blocks with no more than max-goto-duplication-insns are
unfactored. The default value is 8.
@item max-delay-slot-insn-search
The maximum number of instructions to consider when looking for an
instruction to fill a delay slot. If more than this arbitrary number of

View File

@ -317,6 +317,12 @@ DEFPARAM(PARAM_MIN_CROSSJUMP_INSNS,
"The minimum number of matching instructions to consider for crossjumping",
5, 0, 0)
/* The maximum number of insns to duplicate when unfactoring computed gotos. */
DEFPARAM(PARAM_MAX_GOTO_DUPLICATION_INSNS,
"max-goto-duplication-insns",
"The maximum number of insns to duplicate when unfactoring computed gotos",
8, 0, 0)
/* The maximum length of path considered in cse. */
DEFPARAM(PARAM_MAX_CSE_PATH_LENGTH,
"max-cse-path-length",

View File

@ -1713,6 +1713,11 @@ rest_of_compilation (void)
compute_alignments ();
/* Aggressively duplicate basic blocks ending in computed gotos to the
tails of their predecessors, unless we are optimizing for size. */
if (flag_expensive_optimizations && !optimize_size)
duplicate_computed_gotos ();
if (flag_var_tracking)
rest_of_handle_variable_tracking ();