basic-block.h (compute_flow_dominators): Declare.

* basic-block.h (compute_flow_dominators): Declare.

        * gcse.c (alloc_code_hoist_mem): New function.
        (free_code_hoist_mem, compute_code_hoist_vbeinout): Likewise.
        (compute_code_hoist_data, hoist_expr_reaches_here_p): Likewise.
        (hoist_code, one_code_hoisting_pass): Likewise.
        (gcse_main): If optimizing for size, then hoist expressions
        computed in multiple dominated basic blocks.

From-SVN: r29523
This commit is contained in:
Jeffrey A Law 1999-09-20 14:53:51 +00:00 committed by Jeff Law
parent 03d32d1a08
commit bb457bd901
3 changed files with 392 additions and 0 deletions

View File

@ -24,6 +24,15 @@ Mon Sep 20 14:43:37 1999 Nick Clifton <nickc@cygnus.com>
Mon Sep 20 05:41:36 1999 Jeffrey A Law (law@cygnus.com)
* basic-block.h (compute_flow_dominators): Declare.
* gcse.c (alloc_code_hoist_mem): New function.
(free_code_hoist_mem, compute_code_hoist_vbeinout): Likewise.
(compute_code_hoist_data, hoist_expr_reaches_here_p): Likewise.
(hoist_code, one_code_hoisting_pass): Likewise.
(gcse_main): If optimizing for size, then hoist expressions
computed in multiple dominated basic blocks.
* gcse.c (invalid_nonnull_info): New function.
(delete_null_pointer_checks): Likewise.
* rtl.h (delete_null_pointer_checks): Declare.

View File

@ -281,6 +281,7 @@ extern void compute_preds_succs PROTO ((int_list_ptr *, int_list_ptr *,
extern void compute_dominators PROTO ((sbitmap *, sbitmap *,
int_list_ptr *,
int_list_ptr *));
extern void compute_flow_dominators PROTO ((sbitmap *, sbitmap *));
extern void compute_immediate_dominators PROTO ((int *, sbitmap *));
/* In lcm.c */

View File

@ -600,6 +600,14 @@ static int one_pre_gcse_pass PROTO ((int));
static void add_label_notes PROTO ((rtx, rtx));
static void alloc_code_hoist_mem PROTO ((int, int));
static void free_code_hoist_mem PROTO ((void));
static void compute_code_hoist_vbeinout PROTO ((void));
static void compute_code_hoist_data PROTO ((void));
static int hoist_expr_reaches_here_p PROTO ((int, int, int, char *));
static void hoist_code PROTO ((void));
static int one_code_hoisting_pass PROTO ((void));
static void alloc_rd_mem PROTO ((int, int));
static void free_rd_mem PROTO ((void));
static void handle_rd_kill_set PROTO ((rtx, int, int));
@ -730,8 +738,28 @@ gcse_main (f, file)
if (max_pass_bytes < bytes_used)
max_pass_bytes = bytes_used;
/* Free up memory, then reallocate for code hoisting. We can
not re-use the existing allocated memory because the tables
will not have info for the insns or registers created by
partial redundancy elimination. */
free_gcse_mem ();
/* It does not make sense to run code hoisting unless we optimizing
for code size -- it rarely makes programs faster, and can make
them bigger if we did partial redundancy elimination (when optimizing
for space, we use a classic gcse algorithm instead of partial
redundancy algorithms). */
if (optimize_size)
{
max_gcse_regno = max_reg_num ();
alloc_gcse_mem (f);
changed |= one_code_hoisting_pass ();
free_gcse_mem ();
if (max_pass_bytes < bytes_used)
max_pass_bytes = bytes_used;
}
if (file)
{
fprintf (file, "\n");
@ -5044,3 +5072,357 @@ delete_null_pointer_checks (f)
free (nonnull_avin);
free (nonnull_avout);
}
/* Code Hoisting variables and subroutines. */
/* Very busy expressions. */
static sbitmap *hoist_vbein;
static sbitmap *hoist_vbeout;
/* Hoistable expressions. */
static sbitmap *hoist_exprs;
/* Dominator bitmaps. */
static sbitmap *dominators;
static sbitmap *post_dominators;
/* ??? We could compute post dominators and run this algorithm in
reverse to to perform tail merging, doing so would probably be
more effective than the tail merging code in jump.c.
It's unclear if tail merging could be run in parallel with
code hoisting. It would be nice. */
/* Allocate vars used for code hoisting analysis. */
static void
alloc_code_hoist_mem (n_blocks, n_exprs)
int n_blocks, n_exprs;
{
antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
transp = sbitmap_vector_alloc (n_blocks, n_exprs);
comp = sbitmap_vector_alloc (n_blocks, n_exprs);
hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
post_dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
}
/* Free vars used for code hoisting analysis. */
static void
free_code_hoist_mem ()
{
free (antloc);
free (transp);
free (comp);
free (hoist_vbein);
free (hoist_vbeout);
free (hoist_exprs);
free (transpout);
free (dominators);
free (post_dominators);
}
/* Compute the very busy expressions at entry/exit from each block.
An expression is very busy if all paths from a given point
compute the expression. */
static void
compute_code_hoist_vbeinout ()
{
int bb, changed, passes;
sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
passes = 0;
changed = 1;
while (changed)
{
changed = 0;
/* We scan the blocks in the reverse order to speed up
the convergence. */
for (bb = n_basic_blocks - 1; bb >= 0; bb--)
{
changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
hoist_vbeout[bb], transp[bb]);
if (bb != n_basic_blocks - 1)
sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein,
bb, s_succs);
}
passes++;
}
if (gcse_file)
fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
}
/* Top level routine to do the dataflow analysis needed by code hoisting. */
static void
compute_code_hoist_data ()
{
compute_local_properties (transp, comp, antloc, 0);
compute_transpout ();
compute_code_hoist_vbeinout ();
compute_flow_dominators (dominators, post_dominators);
if (gcse_file)
fprintf (gcse_file, "\n");
}
/* Determine if the expression identified by EXPR_INDEX would
reach BB unimpared if it was placed at the end of EXPR_BB.
It's unclear exactly what Muchnick meant by "unimpared". It seems
to me that the expression must either be computed or transparent in
*every* block in the path(s) from EXPR_BB to BB. Any other definition
would allow the expression to be hoisted out of loops, even if
the expression wasn't a loop invariant.
Contrast this to reachability for PRE where an expression is
considered reachable if *any* path reaches instead of *all*
paths. */
static int
hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
int expr_bb;
int expr_index;
int bb;
char *visited;
{
edge pred;
if (visited == NULL)
{
visited = (char *) alloca (n_basic_blocks);
bzero (visited, n_basic_blocks);
}
visited[expr_bb] = 1;
for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
{
int pred_bb = pred->src->index;
if (pred->src == ENTRY_BLOCK_PTR)
break;
else if (visited[pred_bb])
continue;
/* Does this predecessor generate this expression? */
else if (TEST_BIT (comp[pred_bb], expr_index))
break;
else if (! TEST_BIT (transp[pred_bb], expr_index))
break;
/* Not killed. */
else
{
visited[pred_bb] = 1;
if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
pred_bb, visited))
break;
}
}
return (pred == NULL);
}
/* Actually perform code hoisting. */
static void
hoist_code ()
{
int bb, dominated, i;
struct expr **index_map;
sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
/* Compute a mapping from expression number (`bitmap_index') to
hash table entry. */
index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
for (i = 0; i < expr_hash_table_size; i++)
{
struct expr *expr;
for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
index_map[expr->bitmap_index] = expr;
}
/* Walk over each basic block looking for potentially hoistable
expressions, nothing gets hoisted from the entry block. */
for (bb = 0; bb < n_basic_blocks; bb++)
{
int found = 0;
int insn_inserted_p;
/* Examine each expression that is very busy at the exit of this
block. These are the potentially hoistable expressions. */
for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
{
int hoistable = 0;
if (TEST_BIT (hoist_vbeout[bb], i)
&& TEST_BIT (transpout[bb], i))
{
/* We've found a potentially hoistable expression, now
we look at every block BB dominates to see if it
computes the expression. */
for (dominated = 0; dominated < n_basic_blocks; dominated++)
{
/* Ignore self dominance. */
if (bb == dominated
|| ! TEST_BIT (dominators[dominated], bb))
continue;
/* We've found a dominated block, now see if it computes
the busy expression and whether or not moving that
expression to the "beginning" of that block is safe. */
if (!TEST_BIT (antloc[dominated], i))
continue;
/* Note if the expression would reach the dominated block
unimpared if it was placed at the end of BB.
Keep track of how many times this expression is hoistable
from a dominated block into BB. */
if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
hoistable++;
}
/* If we found more than one hoistable occurence of this
expression, then note it in the bitmap of expressions to
hoist. It makes no sense to hoist things which are computed
in only one BB, and doing so tends to pessimize register
allocation. One could increase this value to try harder
to avoid any possible code expansion due to register
allocation issues; however experiments have shown that
the vast majority of hoistable expressions are only movable
from two successors, so raising this threshhold is likely
to nullify any benefit we get from code hoisting. */
if (hoistable > 1)
{
SET_BIT (hoist_exprs[bb], i);
found = 1;
}
}
}
/* If we found nothing to hoist, then quit now. */
if (! found)
continue;
/* Loop over all the hoistable expressions. */
for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
{
/* We want to insert the expression into BB only once, so
note when we've inserted it. */
insn_inserted_p = 0;
/* These tests should be the same as the tests above. */
if (TEST_BIT (hoist_vbeout[bb], i))
{
/* We've found a potentially hoistable expression, now
we look at every block BB dominates to see if it
computes the expression. */
for (dominated = 0; dominated < n_basic_blocks; dominated++)
{
/* Ignore self dominance. */
if (bb == dominated
|| ! TEST_BIT (dominators[dominated], bb))
continue;
/* We've found a dominated block, now see if it computes
the busy expression and whether or not moving that
expression to the "beginning" of that block is safe. */
if (!TEST_BIT (antloc[dominated], i))
continue;
/* The expression is computed in the dominated block and
it would be safe to compute it at the start of the
dominated block. Now we have to determine if the
expresion would reach the dominated block if it was
placed at the end of BB. */
if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
{
struct expr *expr = index_map[i];
struct occr *occr = expr->antic_occr;
rtx insn;
rtx set;
/* Find the right occurence of this expression. */
while (BLOCK_NUM (occr->insn) != dominated && occr)
occr = occr->next;
/* Should never happen. */
if (!occr)
abort ();
insn = occr->insn;
set = single_set (insn);
if (! set)
abort ();
/* Create a pseudo-reg to store the result of reaching
expressions into. Get the mode for the new pseudo
from the mode of the original destination pseudo. */
if (expr->reaching_reg == NULL)
expr->reaching_reg
= gen_reg_rtx (GET_MODE (SET_DEST (set)));
/* In theory this should never fail since we're creating
a reg->reg copy.
However, on the x86 some of the movXX patterns actually
contain clobbers of scratch regs. This may cause the
insn created by validate_change to not match any
pattern and thus cause validate_change to fail. */
if (validate_change (insn, &SET_SRC (set),
expr->reaching_reg, 0))
{
occr->deleted_p = 1;
if (!insn_inserted_p)
{
insert_insn_end_bb (index_map[i], bb, 0);
insn_inserted_p = 1;
}
}
}
}
}
}
}
}
/* Top level routine to perform one code hoisting (aka unification) pass
Return non-zero if a change was made. */
static int
one_code_hoisting_pass ()
{
int changed = 0;
alloc_expr_hash_table (max_cuid);
compute_expr_hash_table ();
if (gcse_file)
dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
expr_hash_table_size, n_exprs);
if (n_exprs > 0)
{
alloc_code_hoist_mem (n_basic_blocks, n_exprs);
compute_code_hoist_data ();
hoist_code ();
free_code_hoist_mem ();
}
free_expr_hash_table ();
return changed;
}