diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a28101b899d..86c9d2e6f93 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2004-03-03 Mostafa Hagog + + * common.opt: Add description of the new -fgcse-after-reload flag. + + * flags.h (flag_gcse_after_reload): Declaration of global variable. + + * gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p, + reg_used_between_after_reload_p, rtx get_avail_load_store_reg, + is_jump_table_basic_block, bb_has_well_behaved_predecessors, + get_bb_avail_insn, hash_scan_set_after_reload, + compute_hash_table_after_reload, eliminate_partially_redundant_loads, + gcse_after_reload, get_bb_avail_insn): New functions to implement + gcse-after-reload. + (gcse_after_reload_main): New function, the main entry point to + gcse-after-reload. + + * rtl.h (gcse_after_reload_main): Declaration of the new function. + + * opts.c (common_handle_option): Handle the -fgcse-after-reload flag. + + * toplev.c (flag_gcse_after_reload): Initialization. + + * passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main. + + * params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION, + PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning + the gcse after reload optimization. + + * params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION, + GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning + parameters. + + * doc/invoke.texi: Documentation for the new flag gcse-after-reload. + 2004-03-03 Nicolas Pitre * config/arm/ieee754-df.S (muldf3, divdf3): Fix denormalization of diff --git a/gcc/common.opt b/gcc/common.opt index cc7f218426c..d9faa60a8f5 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -371,7 +371,13 @@ Perform store motion after global common subexpression elimination fgcse-las Common -Perform redundant load after store elimination in global common subexpression elimination +Perform redundant load after store elimination in global common subexpression +elimination + +fgcse-after-reload +Common +Perform global common subexpression elimination after register allocation +has finished. fguess-branch-probability Common diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4fb703ce105..26b9a127bec 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -270,8 +270,8 @@ in the following sections. -fdelayed-branch -fdelete-null-pointer-checks @gol -fexpensive-optimizations -ffast-math -ffloat-store @gol -fforce-addr -fforce-mem -ffunction-sections @gol --fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol --fcrossjumping -fif-conversion -fif-conversion2 @gol +-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol +-floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol -finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol -fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol -fmove-all-movables -fnew-ra -fno-branch-count-reg @gol @@ -3646,7 +3646,8 @@ invoking @option{-O2} on programs that use computed gotos. @opindex O3 Optimize yet more. @option{-O3} turns on all optimizations specified by @option{-O2} and also turns on the @option{-finline-functions}, -@option{-fweb} and @option{-frename-registers} options. +@option{-fweb}, @option{-frename-registers} +and @option{-fgcse-after-reload} options. @item -O0 @opindex O0 @@ -3957,6 +3958,12 @@ same memory location (both partial and full redundancies). Enabled by default when gcse is enabled. +@item -fgcse-after-reload +@opindex fgcse-after-reload +When @option{-fgcse-after-reload} is enabled, a redundant load elimination +pass is performed after reload. The purpose of this pass is to cleanup +redundant spilling. + @item -floop-optimize @opindex floop-optimize Perform loop optimizations: move constant expressions out of loops, simplify diff --git a/gcc/flags.h b/gcc/flags.h index b986479c4b5..b088f6cb3ba 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -672,6 +672,11 @@ extern int flag_gcse_sm; extern int flag_gcse_las; +/* Nonzero if we want to perform global redundancy elimination after + register allocation. */ + +extern int flag_gcse_after_reload; + /* Nonzero if value histograms should be used to optimize code. */ extern int flag_value_profile_transformations; diff --git a/gcc/gcse.c b/gcc/gcse.c index ad1c9fda64e..975fb1fbe2f 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -1980,6 +1980,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, antic_occr->insn = insn; antic_occr->next = NULL; + antic_occr->deleted_p = 0; } } @@ -2016,6 +2017,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, avail_occr->insn = insn; avail_occr->next = NULL; + avail_occr->deleted_p = 0; } } } @@ -2102,6 +2104,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table *table) cur_occr->insn = insn; cur_occr->next = NULL; + cur_occr->deleted_p = 0; } } @@ -8091,4 +8094,639 @@ is_too_expensive (const char *pass) return false; } +/* The following code implements gcse after reload, the purpose of this + pass is to cleanup redundant loads generated by reload and other + optimizations that come after gcse. It searches for simple inter-block + redundancies and tries to eliminate them by adding moves and loads + in cold places. */ + +/* The following structure holds the information about the occurrences of + the redundant instructions. */ +struct unoccr +{ + struct unoccr *next; + edge pred; + rtx insn; +}; + +static bool reg_used_on_edge (rtx, edge); +static rtx reg_set_between_after_reload_p (rtx, rtx, rtx); +static rtx reg_used_between_after_reload_p (rtx, rtx, rtx); +static rtx get_avail_load_store_reg (rtx); +static bool is_jump_table_basic_block (basic_block); +static bool bb_has_well_behaved_predecessors (basic_block); +static struct occr* get_bb_avail_insn (basic_block, struct occr *); +static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *); +static void compute_hash_table_after_reload (struct hash_table *); +static void eliminate_partially_redundant_loads (basic_block, + rtx, + struct expr *); +static void gcse_after_reload (void); +static struct occr* get_bb_avail_insn (basic_block, struct occr *); +void gcse_after_reload_main (rtx, FILE *); + + +/* Check if register REG is used in any insn waiting to be inserted on E. + Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p + with PREV(insn),NEXT(insn) instead of calling + reg_overlap_mentioned_p. */ + +static bool +reg_used_on_edge (rtx reg, edge e) +{ + rtx insn; + + for (insn = e->insns; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return true; + + return false; +} + +/* Return the insn that sets register REG or clobbers it in between + FROM_INSN and TO_INSN (exclusive of those two). + Just like reg_set_between but for hard registers and not pseudos. */ + +static rtx +reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) +{ + rtx insn; + int regno; + + if (GET_CODE (reg) != REG) + abort (); + regno = REGNO (reg); + + /* We are called after register allocation. */ + if (regno >= FIRST_PSEUDO_REGISTER) + abort (); + + if (from_insn == to_insn) + return NULL_RTX; + + for (insn = NEXT_INSN (from_insn); + insn != to_insn; + insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + if (FIND_REG_INC_NOTE (insn, reg) + || (GET_CODE (insn) == CALL_INSN + && call_used_regs[regno]) + || find_reg_fusage (insn, CLOBBER, reg)) + return insn; + } + if (set_of (reg, insn) != NULL_RTX) + return insn; + } + return NULL_RTX; +} + +/* Return the insn that uses register REG in between FROM_INSN and TO_INSN + (exclusive of those two). Similar to reg_used_between but for hard + registers and not pseudos. */ + +static rtx +reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) +{ + rtx insn; + int regno; + + if (GET_CODE (reg) != REG) + return to_insn; + regno = REGNO (reg); + + /* We are called after register allocation. */ + if (regno >= FIRST_PSEUDO_REGISTER) + abort (); + if (from_insn == to_insn) + return NULL_RTX; + + for (insn = NEXT_INSN (from_insn); + insn != to_insn; + insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && (reg_overlap_mentioned_p (reg, PATTERN (insn)) + || (GET_CODE (insn) == CALL_INSN + && call_used_regs[regno]) + || find_reg_fusage (insn, USE, reg) + || find_reg_fusage (insn, CLOBBER, reg))) + return insn; + return NULL_RTX; +} + +/* Return the loaded/stored register of a load/store instruction. */ + +static rtx +get_avail_load_store_reg (rtx insn) +{ + if (GET_CODE (SET_DEST (PATTERN (insn))) == REG) /* A load. */ + return SET_DEST(PATTERN(insn)); + if (GET_CODE (SET_SRC (PATTERN (insn))) == REG) /* A store. */ + return SET_SRC (PATTERN (insn)); + abort (); +} + +/* Don't handle ABNORMAL edges or jump tables. */ + +static bool +is_jump_table_basic_block (basic_block bb) +{ + rtx insn = BB_END (bb); + + if (GET_CODE (insn) == JUMP_INSN && + (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) + return true; + return false; +} + +/* Return nonzero if the predecessors of BB are "well behaved". */ + +static bool +bb_has_well_behaved_predecessors (basic_block bb) +{ + edge pred; + + if (! bb->pred) + return false; + for (pred = bb->pred; pred != NULL; pred = pred->pred_next) + if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred)) + || is_jump_table_basic_block (pred->src)) + return false; + return true; +} + + +/* Search for the occurrences of expression in BB. */ + +static struct occr* +get_bb_avail_insn (basic_block bb, struct occr *occr) +{ + for (; occr != NULL; occr = occr->next) + if (BLOCK_FOR_INSN (occr->insn)->index == bb->index) + return occr; + return NULL; +} + +/* Perform partial GCSE pass after reload, try to eliminate redundant loads + created by the reload pass. We try to look for a full or partial + redundant loads fed by one or more loads/stores in predecessor BBs, + and try adding loads to make them fully redundant. We also check if + it's worth adding loads to be able to delete the redundant load. + + Algorithm: + 1. Build available expressions hash table: + For each load/store instruction, if the loaded/stored memory didn't + change until the end of the basic block add this memory expression to + the hash table. + 2. Perform Redundancy elimination: + For each load instruction do the following: + perform partial redundancy elimination, check if it's worth adding + loads to make the load fully redundant. If so add loads and + register copies and delete the load. + + Future enhancement: + if loaded register is used/defined between load and some store, + look for some other free register between load and all its stores, + and replace load with a copy from this register to the loaded + register. */ + + +/* This handles the case where several stores feed a partially redundant + load. It checks if the redundancy elimination is possible and if it's + worth it. */ + +static void +eliminate_partially_redundant_loads (basic_block bb, rtx insn, + struct expr *expr) +{ + edge pred; + rtx avail_insn = NULL_RTX; + rtx avail_reg; + rtx dest, pat; + struct occr *a_occr; + struct unoccr *occr, *avail_occrs = NULL; + struct unoccr *unoccr, *unavail_occrs = NULL; + int npred_ok = 0; + gcov_type ok_count = 0; /* Redundant load execution count. */ + gcov_type critical_count = 0; /* Execution count of critical edges. */ + + /* The execution count of the loads to be added to make the + load fully redundant. */ + gcov_type not_ok_count = 0; + basic_block pred_bb; + + pat = PATTERN (insn); + dest = SET_DEST (pat); + /* Check if the loaded register is not used nor killed from the beginning + of the block. */ + if (reg_used_between_after_reload_p (dest, + PREV_INSN (BB_HEAD (bb)), insn)) + return; + + /* Check potential for replacing load with copy for predecessors. */ + for (pred = bb->pred; pred; pred = pred->pred_next) + { + rtx next_pred_bb_end; + + avail_insn = NULL_RTX; + pred_bb = pred->src; + next_pred_bb_end = NEXT_INSN (BB_END (pred_bb)); + for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr; + a_occr = get_bb_avail_insn (pred_bb, a_occr->next)) + { + /* Check if the loaded register is not used. */ + avail_insn = a_occr->insn; + if (! (avail_reg = get_avail_load_store_reg (avail_insn))) + abort (); + /* Make sure we can generate a move from register avail_reg to + dest. */ + extract_insn (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg))); + if (! constrain_operands (1) + || reg_killed_on_edge (avail_reg, pred) + || reg_used_on_edge (dest, pred)) + { + avail_insn = NULL; + continue; + } + if (! reg_set_between_after_reload_p (avail_reg, avail_insn, + next_pred_bb_end)) + /* AVAIL_INSN remains non-null. */ + break; + else + avail_insn = NULL; + } + if (avail_insn != NULL_RTX) + { + npred_ok++; + ok_count += pred->count; + if (EDGE_CRITICAL_P (pred)) + critical_count += pred->count; + occr = (struct unoccr *) gmalloc (sizeof (struct unoccr)); + occr->insn = avail_insn; + occr->pred = pred; + occr->next = avail_occrs; + avail_occrs = occr; + } + else + { + not_ok_count += pred->count; + if (EDGE_CRITICAL_P (pred)) + critical_count += pred->count; + unoccr = (struct unoccr *) gmalloc (sizeof (struct unoccr)); + unoccr->insn = NULL_RTX; + unoccr->pred = pred; + unoccr->next = unavail_occrs; + unavail_occrs = unoccr; + } + } + + if (npred_ok == 0 /* No load can be replaced by copy. */ + || (optimize_size && npred_ok > 1)) /* Prevent exploding the code. */ + return; + + /* Check if it's worth applying the partial redundancy elimination. */ + if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count) + return; + + if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count) + return; + + /* Generate moves to the loaded register from where + the memory is available. */ + for (occr = avail_occrs; occr; occr = occr->next) + { + avail_insn = occr->insn; + pred = occr->pred; + /* Set avail_reg to be the register having the value of the + memory. */ + avail_reg = get_avail_load_store_reg (avail_insn); + if (! avail_reg) + abort (); + + insert_insn_on_edge (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg)), + pred); + + if (gcse_file) + fprintf (gcse_file, + "GCSE AFTER reload generating move from %d to %d on \ + edge from %d to %d\n", + REGNO (avail_reg), + REGNO (dest), + pred->src->index, + pred->dest->index); + } + + /* Regenerate loads where the memory is unavailable. */ + for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next) + { + pred = unoccr->pred; + insert_insn_on_edge (copy_insn (PATTERN (insn)), pred); + + if (gcse_file) + fprintf (gcse_file, + "GCSE AFTER reload: generating on edge from %d to %d\ + a copy of load:\n", + pred->src->index, + pred->dest->index); + } + + /* Delete the insn if it is not available in this block and mark it + for deletion if it is available. If insn is available it may help + discover additional redundancies, so mark it for later deletion.*/ + for (a_occr = get_bb_avail_insn (bb, expr->avail_occr); + a_occr && (a_occr->insn != insn); + a_occr = get_bb_avail_insn (bb, a_occr->next)); + + if (!a_occr) + delete_insn (insn); + else + a_occr->deleted_p = 1; +} + +/* Performing the redundancy elimination as described before. */ + +static void +gcse_after_reload (void) +{ + unsigned int i; + rtx insn; + basic_block bb; + struct expr *expr; + struct occr *occr; + + /* Note we start at block 1. */ + + if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR) + return; + + FOR_BB_BETWEEN (bb, + ENTRY_BLOCK_PTR->next_bb->next_bb, + EXIT_BLOCK_PTR, + next_bb) + { + if (! bb_has_well_behaved_predecessors (bb)) + continue; + + /* Do not try this optimization on cold basic blocks. */ + if (probably_cold_bb_p (bb)) + continue; + + reset_opr_set_tables (); + + for (insn = BB_HEAD (bb); + insn != NULL + && insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + /* Is it a load - of the form (set (reg) (mem))? */ + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_DEST (PATTERN (insn))) == REG + && GET_CODE (SET_SRC (PATTERN (insn))) == MEM) + { + rtx pat = PATTERN (insn); + rtx src = SET_SRC (pat); + struct expr *expr; + + if (general_operand (src, GET_MODE (src)) + /* Is the expression recorded? */ + && (expr = lookup_expr (src, &expr_hash_table)) != NULL + /* Are the operands unchanged since the start of the + block? */ + && oprs_not_set_p (src, insn) + && ! MEM_VOLATILE_P (src) + && GET_MODE (src) != BLKmode + && !(flag_non_call_exceptions && may_trap_p (src)) + && !side_effects_p (src)) + { + /* We now have a load (insn) and an available memory at + its BB start (expr). Try to remove the loads if it is + redundant. */ + eliminate_partially_redundant_loads (bb, insn, expr); + } + } + + /* Keep track of everything modified by this insn. */ + if (INSN_P (insn)) + mark_oprs_set (insn); + } + } + + commit_edge_insertions (); + + /* Go over the expression hash table and delete insns that were + marked for later deletion. */ + for (i = 0; i < expr_hash_table.size; i++) + { + for (expr = expr_hash_table.table[i]; + expr != NULL; + expr = expr->next_same_hash) + for (occr = expr->avail_occr; occr; occr = occr->next) + if (occr->deleted_p) + delete_insn (occr->insn); + } +} + +/* Scan pattern PAT of INSN and add an entry to the hash TABLE. + After reload we are interested in loads/stores only. */ + +static void +hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table) +{ + rtx src = SET_SRC (pat); + rtx dest = SET_DEST (pat); + + if (GET_CODE (src) != MEM && GET_CODE (dest) != MEM) + return; + + if (GET_CODE (dest) == REG) + { + if (/* Don't GCSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (dest)) + /* GCSE commonly inserts instruction after the insn. We can't + do that easily for EH_REGION notes so disable GCSE on these + for now. */ + && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) + /* Is SET_SRC something we want to gcse? */ + && general_operand (src, GET_MODE (src)) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + && ! JUMP_P (insn)) + { + /* An expression is not available if its operands are + subsequently modified, including this insn. */ + if (oprs_available_p (src, insn)) + insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table); + } + } + else if ((GET_CODE (src) == REG)) + { + /* Only record sets of pseudo-regs in the hash table. */ + if (/* Don't GCSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (src)) + /* GCSE commonly inserts instruction after the insn. We can't + do that easily for EH_REGION notes so disable GCSE on these + for now. */ + && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) + /* Is SET_DEST something we want to gcse? */ + && general_operand (dest, GET_MODE (dest)) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + &&! JUMP_P (insn) + && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest))) + /* Check if the memory expression is killed after insn. */ + && ! load_killed_in_block_p (BLOCK_FOR_INSN (insn), + INSN_CUID (insn) + 1, + dest, + 1) + && oprs_unchanged_p (XEXP (dest, 0), insn, 1)) + { + insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table); + } + } +} + + +/* Create hash table of memory expressions available at end of basic + blocks. */ + +static void +compute_hash_table_after_reload (struct hash_table *table) +{ + unsigned int i; + + table->set_p = 0; + + /* Initialize count of number of entries in hash table. */ + table->n_elems = 0; + memset ((char *) table->table, 0, + table->size * sizeof (struct expr *)); + + /* While we compute the hash table we also compute a bit array of which + registers are set in which blocks. */ + sbitmap_vector_zero (reg_set_in_block, last_basic_block); + + /* Re-cache any INSN_LIST nodes we have allocated. */ + clear_modify_mem_tables (); + + /* Some working arrays used to track first and last set in each block. */ + reg_avail_info = (struct reg_avail_info*) + gmalloc (max_gcse_regno * sizeof (struct reg_avail_info)); + + for (i = 0; i < max_gcse_regno; ++i) + reg_avail_info[i].last_bb = NULL; + + FOR_EACH_BB (current_bb) + { + rtx insn; + unsigned int regno; + + /* First pass over the instructions records information used to + determine when registers and memory are first and last set. */ + for (insn = BB_HEAD (current_bb); + insn && insn != NEXT_INSN (BB_END (current_bb)); + insn = NEXT_INSN (insn)) + { + if (! INSN_P (insn)) + continue; + + if (GET_CODE (insn) == CALL_INSN) + { + bool clobbers_all = false; + +#ifdef NON_SAVING_SETJMP + if (NON_SAVING_SETJMP + && find_reg_note (insn, REG_SETJMP, NULL_RTX)) + clobbers_all = true; +#endif + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (clobbers_all + || TEST_HARD_REG_BIT (regs_invalidated_by_call, + regno)) + record_last_reg_set_info (insn, regno); + + mark_call (insn); + } + + note_stores (PATTERN (insn), record_last_set_info, insn); + + if (GET_CODE (PATTERN (insn)) == SET) + { + rtx src, dest; + + src = SET_SRC (PATTERN (insn)); + dest = SET_DEST (PATTERN (insn)); + if (GET_CODE (src) == MEM && auto_inc_p (XEXP (src, 0))) + { + regno = REGNO (XEXP (XEXP (src, 0), 0)); + record_last_reg_set_info (insn, regno); + } + if (GET_CODE (dest) == MEM && auto_inc_p (XEXP (dest, 0))) + { + regno = REGNO (XEXP (XEXP (dest, 0), 0)); + record_last_reg_set_info (insn, regno); + } + } + } + + /* The next pass builds the hash table. */ + for (insn = BB_HEAD (current_bb); + insn && insn != NEXT_INSN (BB_END (current_bb)); + insn = NEXT_INSN (insn)) + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET) + if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX)) + hash_scan_set_after_reload (PATTERN (insn), insn, table); + } + + free (reg_avail_info); + reg_avail_info = NULL; +} + + +/* Main entry point of the GCSE after reload - clean some redundant loads + due to spilling. */ + +void +gcse_after_reload_main (rtx f, FILE* file) +{ + gcse_subst_count = 0; + gcse_create_count = 0; + + gcse_file = file; + + gcc_obstack_init (&gcse_obstack); + bytes_used = 0; + + /* We need alias. */ + init_alias_analysis (); + + max_gcse_regno = max_reg_num (); + + alloc_reg_set_mem (max_gcse_regno); + alloc_gcse_mem (f); + alloc_hash_table (max_cuid, &expr_hash_table, 0); + compute_hash_table_after_reload (&expr_hash_table); + + if (gcse_file) + dump_hash_table (gcse_file, "Expression", &expr_hash_table); + + if (expr_hash_table.n_elems > 0) + gcse_after_reload (); + + free_hash_table (&expr_hash_table); + + free_gcse_mem (); + free_reg_set_mem (); + + /* We are finished with alias. */ + end_alias_analysis (); + + obstack_free (&gcse_obstack, NULL); +} + #include "gt-gcse.h" diff --git a/gcc/opts.c b/gcc/opts.c index a4267f87723..fa1971cd651 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -574,6 +574,7 @@ decode_options (unsigned int argc, const char **argv) flag_rename_registers = 1; flag_unswitch_loops = 1; flag_web = 1; + flag_gcse_after_reload = 1; } if (optimize < 2 || optimize_size) @@ -1035,6 +1036,10 @@ common_handle_option (size_t scode, const char *arg, flag_gcse_sm = value; break; + case OPT_fgcse_after_reload: + flag_gcse_after_reload = value; + break; + case OPT_fgcse_las: flag_gcse_las = value; break; diff --git a/gcc/params.def b/gcc/params.def index e00e22e3b0d..7be8ddc1322 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -131,7 +131,25 @@ DEFPARAM(PARAM_MAX_GCSE_PASSES, "max-gcse-passes", "The maximum number of passes to make when doing GCSE", 1) - +/* This is the threshold ratio when to perform partial redundancy + elimination after reload. We perform partial redundancy elimination + when the following holds: + (Redundant load execution count) + ------------------------------- >= GCSE_AFTER_RELOAD_PARTIAL_FRACTION + (Added loads execution count) */ +DEFPARAM(PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION, + "gcse-after-reload-partial-fraction", + "The threshold ratio for performing partial redundancy elimination \ + after reload.", + 3) +/* This is the threshold ratio of the critical edges execution count compared to + the redundant loads execution count that permits performing the load + redundancy elimination in gcse after reload. */ +DEFPARAM(PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION, + "gcse-after-reload-critical-fraction", + "The threshold ratio of critical edges execution count that permit \ + performing redundancy elimination after reload.", + 10) /* This parameter limits the number of insns in a loop that will be unrolled, and by how much the loop is unrolled. diff --git a/gcc/params.h b/gcc/params.h index 0a784454ccd..d030dbe645a 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -104,6 +104,10 @@ typedef enum compiler_param ((size_t) PARAM_VALUE (PARAM_MAX_GCSE_MEMORY)) #define MAX_GCSE_PASSES \ PARAM_VALUE (PARAM_MAX_GCSE_PASSES) +#define GCSE_AFTER_RELOAD_PARTIAL_FRACTION \ + PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION) +#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \ + PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION) #define MAX_UNROLLED_INSNS \ PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) #endif /* ! GCC_PARAMS_H */ diff --git a/gcc/passes.c b/gcc/passes.c index 1b012772f4b..1bd554c61e1 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -158,6 +158,7 @@ enum dump_file_index DFI_lreg, DFI_greg, DFI_postreload, + DFI_gcse2, DFI_flow2, DFI_peephole2, DFI_ce3, @@ -178,7 +179,7 @@ enum dump_file_index Remaining -d letters: " e m q " - " JK O Q WXY " + " K O Q WXY " */ static struct dump_file_info dump_file_tbl[DFI_MAX] = @@ -210,6 +211,7 @@ static struct dump_file_info dump_file_tbl[DFI_MAX] = { "lreg", 'l', 1, 0, 0 }, { "greg", 'g', 1, 0, 0 }, { "postreload", 'o', 1, 0, 0 }, + { "gcse2", 'J', 0, 0, 0 }, { "flow2", 'w', 1, 0, 0 }, { "peephole2", 'z', 1, 0, 0 }, { "ce3", 'E', 1, 0, 0 }, @@ -788,6 +790,23 @@ rest_of_handle_sched2 (tree decl, rtx insns) } #endif +static void +rest_of_handle_gcse2 (tree decl, rtx insns) +{ + open_dump_file (DFI_gcse2, decl); + + gcse_after_reload_main (insns, dump_file); + rebuild_jump_labels (insns); + delete_trivially_dead_insns (insns, max_reg_num ()); + close_dump_file (DFI_gcse2, print_rtl_with_bb, insns); + + ggc_collect (); + +#ifdef ENABLE_CHECKING + verify_flow_info (); +#endif +} + /* Register allocation pre-pass, to reduce number of moves necessary for two-address machines. */ static void @@ -1842,6 +1861,9 @@ rest_of_compilation (tree decl) close_dump_file (DFI_postreload, print_rtl_with_bb, insns); + if (optimize > 0 && flag_gcse_after_reload) + rest_of_handle_gcse2 (decl, insns); + /* Re-create the death notes which were deleted during reload. */ timevar_push (TV_FLOW2); open_dump_file (DFI_flow2, decl); diff --git a/gcc/rtl.h b/gcc/rtl.h index 66578d08bf5..2e911362578 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2289,6 +2289,7 @@ extern rtx fis_get_condition (rtx); #ifdef BUFSIZ extern int gcse_main (rtx, FILE *); extern int bypass_jumps (FILE *); +extern void gcse_after_reload_main (rtx, FILE *); #endif /* In global.c */ diff --git a/gcc/toplev.c b/gcc/toplev.c index da9e7f90bab..8192a1a8ea8 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -526,6 +526,9 @@ int flag_gcse_sm = 1; int flag_gcse_las = 1; +/* Nonzero means perform global cse after register allocation. */ +int flag_gcse_after_reload = 0; + /* Perform target register optimization before prologue / epilogue threading. */ @@ -915,6 +918,7 @@ static const lang_independent_options f_options[] = {"gcse-lm", &flag_gcse_lm, 1 }, {"gcse-sm", &flag_gcse_sm, 1 }, {"gcse-las", &flag_gcse_las, 1 }, + {"gcse-after-reload", &flag_gcse_after_reload, 1}, {"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 }, {"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 }, {"btr-bb-exclusive", &flag_btr_bb_exclusive, 1 },