From bb457bd90107d2cae48e2f1a19077bd859c47b70 Mon Sep 17 00:00:00 2001 From: Jeffrey A Law Date: Mon, 20 Sep 1999 14:53:51 +0000 Subject: [PATCH] basic-block.h (compute_flow_dominators): Declare. * basic-block.h (compute_flow_dominators): Declare. * gcse.c (alloc_code_hoist_mem): New function. (free_code_hoist_mem, compute_code_hoist_vbeinout): Likewise. (compute_code_hoist_data, hoist_expr_reaches_here_p): Likewise. (hoist_code, one_code_hoisting_pass): Likewise. (gcse_main): If optimizing for size, then hoist expressions computed in multiple dominated basic blocks. From-SVN: r29523 --- gcc/ChangeLog | 9 ++ gcc/basic-block.h | 1 + gcc/gcse.c | 382 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 392 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3e424c46064..cfb83bcda0d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -24,6 +24,15 @@ Mon Sep 20 14:43:37 1999 Nick Clifton Mon Sep 20 05:41:36 1999 Jeffrey A Law (law@cygnus.com) + * basic-block.h (compute_flow_dominators): Declare. + + * gcse.c (alloc_code_hoist_mem): New function. + (free_code_hoist_mem, compute_code_hoist_vbeinout): Likewise. + (compute_code_hoist_data, hoist_expr_reaches_here_p): Likewise. + (hoist_code, one_code_hoisting_pass): Likewise. + (gcse_main): If optimizing for size, then hoist expressions + computed in multiple dominated basic blocks. + * gcse.c (invalid_nonnull_info): New function. (delete_null_pointer_checks): Likewise. * rtl.h (delete_null_pointer_checks): Declare. diff --git a/gcc/basic-block.h b/gcc/basic-block.h index c6a90653927..4eca203e2ec 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -281,6 +281,7 @@ extern void compute_preds_succs PROTO ((int_list_ptr *, int_list_ptr *, extern void compute_dominators PROTO ((sbitmap *, sbitmap *, int_list_ptr *, int_list_ptr *)); +extern void compute_flow_dominators PROTO ((sbitmap *, sbitmap *)); extern void compute_immediate_dominators PROTO ((int *, sbitmap *)); /* In lcm.c */ diff --git a/gcc/gcse.c b/gcc/gcse.c index a02320ec5be..def34898cd7 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -600,6 +600,14 @@ static int one_pre_gcse_pass PROTO ((int)); static void add_label_notes PROTO ((rtx, rtx)); +static void alloc_code_hoist_mem PROTO ((int, int)); +static void free_code_hoist_mem PROTO ((void)); +static void compute_code_hoist_vbeinout PROTO ((void)); +static void compute_code_hoist_data PROTO ((void)); +static int hoist_expr_reaches_here_p PROTO ((int, int, int, char *)); +static void hoist_code PROTO ((void)); +static int one_code_hoisting_pass PROTO ((void)); + static void alloc_rd_mem PROTO ((int, int)); static void free_rd_mem PROTO ((void)); static void handle_rd_kill_set PROTO ((rtx, int, int)); @@ -730,8 +738,28 @@ gcse_main (f, file) if (max_pass_bytes < bytes_used) max_pass_bytes = bytes_used; + /* Free up memory, then reallocate for code hoisting. We can + not re-use the existing allocated memory because the tables + will not have info for the insns or registers created by + partial redundancy elimination. */ free_gcse_mem (); + /* It does not make sense to run code hoisting unless we optimizing + for code size -- it rarely makes programs faster, and can make + them bigger if we did partial redundancy elimination (when optimizing + for space, we use a classic gcse algorithm instead of partial + redundancy algorithms). */ + if (optimize_size) + { + max_gcse_regno = max_reg_num (); + alloc_gcse_mem (f); + changed |= one_code_hoisting_pass (); + free_gcse_mem (); + + if (max_pass_bytes < bytes_used) + max_pass_bytes = bytes_used; + } + if (file) { fprintf (file, "\n"); @@ -5044,3 +5072,357 @@ delete_null_pointer_checks (f) free (nonnull_avin); free (nonnull_avout); } + +/* Code Hoisting variables and subroutines. */ + +/* Very busy expressions. */ +static sbitmap *hoist_vbein; +static sbitmap *hoist_vbeout; + +/* Hoistable expressions. */ +static sbitmap *hoist_exprs; + +/* Dominator bitmaps. */ +static sbitmap *dominators; +static sbitmap *post_dominators; + +/* ??? We could compute post dominators and run this algorithm in + reverse to to perform tail merging, doing so would probably be + more effective than the tail merging code in jump.c. + + It's unclear if tail merging could be run in parallel with + code hoisting. It would be nice. */ + +/* Allocate vars used for code hoisting analysis. */ + +static void +alloc_code_hoist_mem (n_blocks, n_exprs) + int n_blocks, n_exprs; +{ + antloc = sbitmap_vector_alloc (n_blocks, n_exprs); + transp = sbitmap_vector_alloc (n_blocks, n_exprs); + comp = sbitmap_vector_alloc (n_blocks, n_exprs); + + hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); + hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); + hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); + transpout = sbitmap_vector_alloc (n_blocks, n_exprs); + + dominators = sbitmap_vector_alloc (n_blocks, n_blocks); + post_dominators = sbitmap_vector_alloc (n_blocks, n_blocks); +} + +/* Free vars used for code hoisting analysis. */ + +static void +free_code_hoist_mem () +{ + free (antloc); + free (transp); + free (comp); + + free (hoist_vbein); + free (hoist_vbeout); + free (hoist_exprs); + free (transpout); + + free (dominators); + free (post_dominators); +} + +/* Compute the very busy expressions at entry/exit from each block. + + An expression is very busy if all paths from a given point + compute the expression. */ + +static void +compute_code_hoist_vbeinout () +{ + int bb, changed, passes; + + sbitmap_vector_zero (hoist_vbeout, n_basic_blocks); + sbitmap_vector_zero (hoist_vbein, n_basic_blocks); + + passes = 0; + changed = 1; + while (changed) + { + changed = 0; + /* We scan the blocks in the reverse order to speed up + the convergence. */ + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + { + changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb], + hoist_vbeout[bb], transp[bb]); + if (bb != n_basic_blocks - 1) + sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein, + bb, s_succs); + } + passes++; + } + + if (gcse_file) + fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes); +} + +/* Top level routine to do the dataflow analysis needed by code hoisting. */ + +static void +compute_code_hoist_data () +{ + compute_local_properties (transp, comp, antloc, 0); + compute_transpout (); + compute_code_hoist_vbeinout (); + compute_flow_dominators (dominators, post_dominators); + if (gcse_file) + fprintf (gcse_file, "\n"); +} + +/* Determine if the expression identified by EXPR_INDEX would + reach BB unimpared if it was placed at the end of EXPR_BB. + + It's unclear exactly what Muchnick meant by "unimpared". It seems + to me that the expression must either be computed or transparent in + *every* block in the path(s) from EXPR_BB to BB. Any other definition + would allow the expression to be hoisted out of loops, even if + the expression wasn't a loop invariant. + + Contrast this to reachability for PRE where an expression is + considered reachable if *any* path reaches instead of *all* + paths. */ + +static int +hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited) + int expr_bb; + int expr_index; + int bb; + char *visited; +{ + edge pred; + + if (visited == NULL) + { + visited = (char *) alloca (n_basic_blocks); + bzero (visited, n_basic_blocks); + } + + visited[expr_bb] = 1; + for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next) + { + int pred_bb = pred->src->index; + + if (pred->src == ENTRY_BLOCK_PTR) + break; + else if (visited[pred_bb]) + continue; + /* Does this predecessor generate this expression? */ + else if (TEST_BIT (comp[pred_bb], expr_index)) + break; + else if (! TEST_BIT (transp[pred_bb], expr_index)) + break; + /* Not killed. */ + else + { + visited[pred_bb] = 1; + if (! hoist_expr_reaches_here_p (expr_bb, expr_index, + pred_bb, visited)) + break; + } + } + + return (pred == NULL); +} + +/* Actually perform code hoisting. */ +static void +hoist_code () +{ + int bb, dominated, i; + struct expr **index_map; + + sbitmap_vector_zero (hoist_exprs, n_basic_blocks); + + /* Compute a mapping from expression number (`bitmap_index') to + hash table entry. */ + + index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *)); + bzero ((char *) index_map, n_exprs * sizeof (struct expr *)); + for (i = 0; i < expr_hash_table_size; i++) + { + struct expr *expr; + + for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) + index_map[expr->bitmap_index] = expr; + } + + /* Walk over each basic block looking for potentially hoistable + expressions, nothing gets hoisted from the entry block. */ + for (bb = 0; bb < n_basic_blocks; bb++) + { + int found = 0; + int insn_inserted_p; + + /* Examine each expression that is very busy at the exit of this + block. These are the potentially hoistable expressions. */ + for (i = 0; i < hoist_vbeout[bb]->n_bits; i++) + { + int hoistable = 0; + if (TEST_BIT (hoist_vbeout[bb], i) + && TEST_BIT (transpout[bb], i)) + { + /* We've found a potentially hoistable expression, now + we look at every block BB dominates to see if it + computes the expression. */ + for (dominated = 0; dominated < n_basic_blocks; dominated++) + { + /* Ignore self dominance. */ + if (bb == dominated + || ! TEST_BIT (dominators[dominated], bb)) + continue; + + /* We've found a dominated block, now see if it computes + the busy expression and whether or not moving that + expression to the "beginning" of that block is safe. */ + if (!TEST_BIT (antloc[dominated], i)) + continue; + + /* Note if the expression would reach the dominated block + unimpared if it was placed at the end of BB. + + Keep track of how many times this expression is hoistable + from a dominated block into BB. */ + if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) + hoistable++; + } + + /* If we found more than one hoistable occurence of this + expression, then note it in the bitmap of expressions to + hoist. It makes no sense to hoist things which are computed + in only one BB, and doing so tends to pessimize register + allocation. One could increase this value to try harder + to avoid any possible code expansion due to register + allocation issues; however experiments have shown that + the vast majority of hoistable expressions are only movable + from two successors, so raising this threshhold is likely + to nullify any benefit we get from code hoisting. */ + if (hoistable > 1) + { + SET_BIT (hoist_exprs[bb], i); + found = 1; + } + } + } + + /* If we found nothing to hoist, then quit now. */ + if (! found) + continue; + + /* Loop over all the hoistable expressions. */ + for (i = 0; i < hoist_exprs[bb]->n_bits; i++) + { + /* We want to insert the expression into BB only once, so + note when we've inserted it. */ + insn_inserted_p = 0; + + /* These tests should be the same as the tests above. */ + if (TEST_BIT (hoist_vbeout[bb], i)) + { + /* We've found a potentially hoistable expression, now + we look at every block BB dominates to see if it + computes the expression. */ + for (dominated = 0; dominated < n_basic_blocks; dominated++) + { + /* Ignore self dominance. */ + if (bb == dominated + || ! TEST_BIT (dominators[dominated], bb)) + continue; + + /* We've found a dominated block, now see if it computes + the busy expression and whether or not moving that + expression to the "beginning" of that block is safe. */ + if (!TEST_BIT (antloc[dominated], i)) + continue; + + /* The expression is computed in the dominated block and + it would be safe to compute it at the start of the + dominated block. Now we have to determine if the + expresion would reach the dominated block if it was + placed at the end of BB. */ + if (hoist_expr_reaches_here_p (bb, i, dominated, NULL)) + { + struct expr *expr = index_map[i]; + struct occr *occr = expr->antic_occr; + rtx insn; + rtx set; + + + /* Find the right occurence of this expression. */ + while (BLOCK_NUM (occr->insn) != dominated && occr) + occr = occr->next; + + /* Should never happen. */ + if (!occr) + abort (); + + insn = occr->insn; + + set = single_set (insn); + if (! set) + abort (); + + /* Create a pseudo-reg to store the result of reaching + expressions into. Get the mode for the new pseudo + from the mode of the original destination pseudo. */ + if (expr->reaching_reg == NULL) + expr->reaching_reg + = gen_reg_rtx (GET_MODE (SET_DEST (set))); + + /* In theory this should never fail since we're creating + a reg->reg copy. + + However, on the x86 some of the movXX patterns actually + contain clobbers of scratch regs. This may cause the + insn created by validate_change to not match any + pattern and thus cause validate_change to fail. */ + if (validate_change (insn, &SET_SRC (set), + expr->reaching_reg, 0)) + { + occr->deleted_p = 1; + if (!insn_inserted_p) + { + insert_insn_end_bb (index_map[i], bb, 0); + insn_inserted_p = 1; + } + } + } + } + } + } + } +} + +/* Top level routine to perform one code hoisting (aka unification) pass + + Return non-zero if a change was made. */ + +static int +one_code_hoisting_pass () +{ + int changed = 0; + + alloc_expr_hash_table (max_cuid); + compute_expr_hash_table (); + if (gcse_file) + dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table, + expr_hash_table_size, n_exprs); + if (n_exprs > 0) + { + alloc_code_hoist_mem (n_basic_blocks, n_exprs); + compute_code_hoist_data (); + hoist_code (); + free_code_hoist_mem (); + } + free_expr_hash_table (); + + return changed; +}