common.opt: Add description of the new -fgcse-after-reload flag.

2004-03-03  Mostafa Hagog  <mustafa@il.ibm.com>

        * common.opt: Add description of the new -fgcse-after-reload flag.

        * flags.h (flag_gcse_after_reload): Declaration of global variable.

        * gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p,
        reg_used_between_after_reload_p, rtx get_avail_load_store_reg,
        is_jump_table_basic_block, bb_has_well_behaved_predecessors,
        get_bb_avail_insn, hash_scan_set_after_reload,
        compute_hash_table_after_reload, eliminate_partially_redundant_loads,
        gcse_after_reload, get_bb_avail_insn): New functions to implement
        gcse-after-reload.
        (gcse_after_reload_main): New function, the main entry point to
        gcse-after-reload.

        * rtl.h (gcse_after_reload_main): Declaration of the new function.

        * opts.c (common_handle_option): Handle the -fgcse-after-reload flag.

        * toplev.c (flag_gcse_after_reload): Initialization.

        * passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main.

        * params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
        PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning
        the gcse after reload optimization.

        * params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
        GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning
        parameters.

        * doc/invoke.texi: Documentation for the new flag gcse-after-reload.

From-SVN: r78842
This commit is contained in:
Mostafa Hagog 2004-03-03 16:32:45 +00:00 committed by David Edelsohn
parent 6f6dedf5bd
commit f9957958fe
11 changed files with 750 additions and 6 deletions

View File

@ -1,3 +1,37 @@
2004-03-03 Mostafa Hagog <mustafa@il.ibm.com>
* common.opt: Add description of the new -fgcse-after-reload flag.
* flags.h (flag_gcse_after_reload): Declaration of global variable.
* gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p,
reg_used_between_after_reload_p, rtx get_avail_load_store_reg,
is_jump_table_basic_block, bb_has_well_behaved_predecessors,
get_bb_avail_insn, hash_scan_set_after_reload,
compute_hash_table_after_reload, eliminate_partially_redundant_loads,
gcse_after_reload, get_bb_avail_insn): New functions to implement
gcse-after-reload.
(gcse_after_reload_main): New function, the main entry point to
gcse-after-reload.
* rtl.h (gcse_after_reload_main): Declaration of the new function.
* opts.c (common_handle_option): Handle the -fgcse-after-reload flag.
* toplev.c (flag_gcse_after_reload): Initialization.
* passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main.
* params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning
the gcse after reload optimization.
* params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning
parameters.
* doc/invoke.texi: Documentation for the new flag gcse-after-reload.
2004-03-03 Nicolas Pitre <nico@cam.org>
* config/arm/ieee754-df.S (muldf3, divdf3): Fix denormalization of

View File

@ -371,7 +371,13 @@ Perform store motion after global common subexpression elimination
fgcse-las
Common
Perform redundant load after store elimination in global common subexpression elimination
Perform redundant load after store elimination in global common subexpression
elimination
fgcse-after-reload
Common
Perform global common subexpression elimination after register allocation
has finished.
fguess-branch-probability
Common

View File

@ -270,8 +270,8 @@ in the following sections.
-fdelayed-branch -fdelete-null-pointer-checks @gol
-fexpensive-optimizations -ffast-math -ffloat-store @gol
-fforce-addr -fforce-mem -ffunction-sections @gol
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol
-fcrossjumping -fif-conversion -fif-conversion2 @gol
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol
-floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol
-finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
-fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
@ -3646,7 +3646,8 @@ invoking @option{-O2} on programs that use computed gotos.
@opindex O3
Optimize yet more. @option{-O3} turns on all optimizations specified by
@option{-O2} and also turns on the @option{-finline-functions},
@option{-fweb} and @option{-frename-registers} options.
@option{-fweb}, @option{-frename-registers}
and @option{-fgcse-after-reload} options.
@item -O0
@opindex O0
@ -3957,6 +3958,12 @@ same memory location (both partial and full redundancies).
Enabled by default when gcse is enabled.
@item -fgcse-after-reload
@opindex fgcse-after-reload
When @option{-fgcse-after-reload} is enabled, a redundant load elimination
pass is performed after reload. The purpose of this pass is to cleanup
redundant spilling.
@item -floop-optimize
@opindex floop-optimize
Perform loop optimizations: move constant expressions out of loops, simplify

View File

@ -672,6 +672,11 @@ extern int flag_gcse_sm;
extern int flag_gcse_las;
/* Nonzero if we want to perform global redundancy elimination after
register allocation. */
extern int flag_gcse_after_reload;
/* Nonzero if value histograms should be used to optimize code. */
extern int flag_value_profile_transformations;

View File

@ -1980,6 +1980,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
antic_occr->insn = insn;
antic_occr->next = NULL;
antic_occr->deleted_p = 0;
}
}
@ -2016,6 +2017,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
avail_occr->insn = insn;
avail_occr->next = NULL;
avail_occr->deleted_p = 0;
}
}
}
@ -2102,6 +2104,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table *table)
cur_occr->insn = insn;
cur_occr->next = NULL;
cur_occr->deleted_p = 0;
}
}
@ -8091,4 +8094,639 @@ is_too_expensive (const char *pass)
return false;
}
/* The following code implements gcse after reload, the purpose of this
pass is to cleanup redundant loads generated by reload and other
optimizations that come after gcse. It searches for simple inter-block
redundancies and tries to eliminate them by adding moves and loads
in cold places. */
/* The following structure holds the information about the occurrences of
the redundant instructions. */
struct unoccr
{
struct unoccr *next;
edge pred;
rtx insn;
};
static bool reg_used_on_edge (rtx, edge);
static rtx reg_set_between_after_reload_p (rtx, rtx, rtx);
static rtx reg_used_between_after_reload_p (rtx, rtx, rtx);
static rtx get_avail_load_store_reg (rtx);
static bool is_jump_table_basic_block (basic_block);
static bool bb_has_well_behaved_predecessors (basic_block);
static struct occr* get_bb_avail_insn (basic_block, struct occr *);
static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *);
static void compute_hash_table_after_reload (struct hash_table *);
static void eliminate_partially_redundant_loads (basic_block,
rtx,
struct expr *);
static void gcse_after_reload (void);
static struct occr* get_bb_avail_insn (basic_block, struct occr *);
void gcse_after_reload_main (rtx, FILE *);
/* Check if register REG is used in any insn waiting to be inserted on E.
Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
with PREV(insn),NEXT(insn) instead of calling
reg_overlap_mentioned_p. */
static bool
reg_used_on_edge (rtx reg, edge e)
{
rtx insn;
for (insn = e->insns; insn; insn = NEXT_INSN (insn))
if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
return true;
return false;
}
/* Return the insn that sets register REG or clobbers it in between
FROM_INSN and TO_INSN (exclusive of those two).
Just like reg_set_between but for hard registers and not pseudos. */
static rtx
reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
{
rtx insn;
int regno;
if (GET_CODE (reg) != REG)
abort ();
regno = REGNO (reg);
/* We are called after register allocation. */
if (regno >= FIRST_PSEUDO_REGISTER)
abort ();
if (from_insn == to_insn)
return NULL_RTX;
for (insn = NEXT_INSN (from_insn);
insn != to_insn;
insn = NEXT_INSN (insn))
{
if (INSN_P (insn))
{
if (FIND_REG_INC_NOTE (insn, reg)
|| (GET_CODE (insn) == CALL_INSN
&& call_used_regs[regno])
|| find_reg_fusage (insn, CLOBBER, reg))
return insn;
}
if (set_of (reg, insn) != NULL_RTX)
return insn;
}
return NULL_RTX;
}
/* Return the insn that uses register REG in between FROM_INSN and TO_INSN
(exclusive of those two). Similar to reg_used_between but for hard
registers and not pseudos. */
static rtx
reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn)
{
rtx insn;
int regno;
if (GET_CODE (reg) != REG)
return to_insn;
regno = REGNO (reg);
/* We are called after register allocation. */
if (regno >= FIRST_PSEUDO_REGISTER)
abort ();
if (from_insn == to_insn)
return NULL_RTX;
for (insn = NEXT_INSN (from_insn);
insn != to_insn;
insn = NEXT_INSN (insn))
if (INSN_P (insn)
&& (reg_overlap_mentioned_p (reg, PATTERN (insn))
|| (GET_CODE (insn) == CALL_INSN
&& call_used_regs[regno])
|| find_reg_fusage (insn, USE, reg)
|| find_reg_fusage (insn, CLOBBER, reg)))
return insn;
return NULL_RTX;
}
/* Return the loaded/stored register of a load/store instruction. */
static rtx
get_avail_load_store_reg (rtx insn)
{
if (GET_CODE (SET_DEST (PATTERN (insn))) == REG) /* A load. */
return SET_DEST(PATTERN(insn));
if (GET_CODE (SET_SRC (PATTERN (insn))) == REG) /* A store. */
return SET_SRC (PATTERN (insn));
abort ();
}
/* Don't handle ABNORMAL edges or jump tables. */
static bool
is_jump_table_basic_block (basic_block bb)
{
rtx insn = BB_END (bb);
if (GET_CODE (insn) == JUMP_INSN &&
(GET_CODE (PATTERN (insn)) == ADDR_VEC
|| GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
return true;
return false;
}
/* Return nonzero if the predecessors of BB are "well behaved". */
static bool
bb_has_well_behaved_predecessors (basic_block bb)
{
edge pred;
if (! bb->pred)
return false;
for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
|| is_jump_table_basic_block (pred->src))
return false;
return true;
}
/* Search for the occurrences of expression in BB. */
static struct occr*
get_bb_avail_insn (basic_block bb, struct occr *occr)
{
for (; occr != NULL; occr = occr->next)
if (BLOCK_FOR_INSN (occr->insn)->index == bb->index)
return occr;
return NULL;
}
/* Perform partial GCSE pass after reload, try to eliminate redundant loads
created by the reload pass. We try to look for a full or partial
redundant loads fed by one or more loads/stores in predecessor BBs,
and try adding loads to make them fully redundant. We also check if
it's worth adding loads to be able to delete the redundant load.
Algorithm:
1. Build available expressions hash table:
For each load/store instruction, if the loaded/stored memory didn't
change until the end of the basic block add this memory expression to
the hash table.
2. Perform Redundancy elimination:
For each load instruction do the following:
perform partial redundancy elimination, check if it's worth adding
loads to make the load fully redundant. If so add loads and
register copies and delete the load.
Future enhancement:
if loaded register is used/defined between load and some store,
look for some other free register between load and all its stores,
and replace load with a copy from this register to the loaded
register. */
/* This handles the case where several stores feed a partially redundant
load. It checks if the redundancy elimination is possible and if it's
worth it. */
static void
eliminate_partially_redundant_loads (basic_block bb, rtx insn,
struct expr *expr)
{
edge pred;
rtx avail_insn = NULL_RTX;
rtx avail_reg;
rtx dest, pat;
struct occr *a_occr;
struct unoccr *occr, *avail_occrs = NULL;
struct unoccr *unoccr, *unavail_occrs = NULL;
int npred_ok = 0;
gcov_type ok_count = 0; /* Redundant load execution count. */
gcov_type critical_count = 0; /* Execution count of critical edges. */
/* The execution count of the loads to be added to make the
load fully redundant. */
gcov_type not_ok_count = 0;
basic_block pred_bb;
pat = PATTERN (insn);
dest = SET_DEST (pat);
/* Check if the loaded register is not used nor killed from the beginning
of the block. */
if (reg_used_between_after_reload_p (dest,
PREV_INSN (BB_HEAD (bb)), insn))
return;
/* Check potential for replacing load with copy for predecessors. */
for (pred = bb->pred; pred; pred = pred->pred_next)
{
rtx next_pred_bb_end;
avail_insn = NULL_RTX;
pred_bb = pred->src;
next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
{
/* Check if the loaded register is not used. */
avail_insn = a_occr->insn;
if (! (avail_reg = get_avail_load_store_reg (avail_insn)))
abort ();
/* Make sure we can generate a move from register avail_reg to
dest. */
extract_insn (gen_move_insn (copy_rtx (dest),
copy_rtx (avail_reg)));
if (! constrain_operands (1)
|| reg_killed_on_edge (avail_reg, pred)
|| reg_used_on_edge (dest, pred))
{
avail_insn = NULL;
continue;
}
if (! reg_set_between_after_reload_p (avail_reg, avail_insn,
next_pred_bb_end))
/* AVAIL_INSN remains non-null. */
break;
else
avail_insn = NULL;
}
if (avail_insn != NULL_RTX)
{
npred_ok++;
ok_count += pred->count;
if (EDGE_CRITICAL_P (pred))
critical_count += pred->count;
occr = (struct unoccr *) gmalloc (sizeof (struct unoccr));
occr->insn = avail_insn;
occr->pred = pred;
occr->next = avail_occrs;
avail_occrs = occr;
}
else
{
not_ok_count += pred->count;
if (EDGE_CRITICAL_P (pred))
critical_count += pred->count;
unoccr = (struct unoccr *) gmalloc (sizeof (struct unoccr));
unoccr->insn = NULL_RTX;
unoccr->pred = pred;
unoccr->next = unavail_occrs;
unavail_occrs = unoccr;
}
}
if (npred_ok == 0 /* No load can be replaced by copy. */
|| (optimize_size && npred_ok > 1)) /* Prevent exploding the code. */
return;
/* Check if it's worth applying the partial redundancy elimination. */
if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
return;
if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
return;
/* Generate moves to the loaded register from where
the memory is available. */
for (occr = avail_occrs; occr; occr = occr->next)
{
avail_insn = occr->insn;
pred = occr->pred;
/* Set avail_reg to be the register having the value of the
memory. */
avail_reg = get_avail_load_store_reg (avail_insn);
if (! avail_reg)
abort ();
insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
copy_rtx (avail_reg)),
pred);
if (gcse_file)
fprintf (gcse_file,
"GCSE AFTER reload generating move from %d to %d on \
edge from %d to %d\n",
REGNO (avail_reg),
REGNO (dest),
pred->src->index,
pred->dest->index);
}
/* Regenerate loads where the memory is unavailable. */
for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
{
pred = unoccr->pred;
insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
if (gcse_file)
fprintf (gcse_file,
"GCSE AFTER reload: generating on edge from %d to %d\
a copy of load:\n",
pred->src->index,
pred->dest->index);
}
/* Delete the insn if it is not available in this block and mark it
for deletion if it is available. If insn is available it may help
discover additional redundancies, so mark it for later deletion.*/
for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
a_occr && (a_occr->insn != insn);
a_occr = get_bb_avail_insn (bb, a_occr->next));
if (!a_occr)
delete_insn (insn);
else
a_occr->deleted_p = 1;
}
/* Performing the redundancy elimination as described before. */
static void
gcse_after_reload (void)
{
unsigned int i;
rtx insn;
basic_block bb;
struct expr *expr;
struct occr *occr;
/* Note we start at block 1. */
if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
return;
FOR_BB_BETWEEN (bb,
ENTRY_BLOCK_PTR->next_bb->next_bb,
EXIT_BLOCK_PTR,
next_bb)
{
if (! bb_has_well_behaved_predecessors (bb))
continue;
/* Do not try this optimization on cold basic blocks. */
if (probably_cold_bb_p (bb))
continue;
reset_opr_set_tables ();
for (insn = BB_HEAD (bb);
insn != NULL
&& insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
/* Is it a load - of the form (set (reg) (mem))? */
if (GET_CODE (insn) == INSN
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (SET_DEST (PATTERN (insn))) == REG
&& GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
{
rtx pat = PATTERN (insn);
rtx src = SET_SRC (pat);
struct expr *expr;
if (general_operand (src, GET_MODE (src))
/* Is the expression recorded? */
&& (expr = lookup_expr (src, &expr_hash_table)) != NULL
/* Are the operands unchanged since the start of the
block? */
&& oprs_not_set_p (src, insn)
&& ! MEM_VOLATILE_P (src)
&& GET_MODE (src) != BLKmode
&& !(flag_non_call_exceptions && may_trap_p (src))
&& !side_effects_p (src))
{
/* We now have a load (insn) and an available memory at
its BB start (expr). Try to remove the loads if it is
redundant. */
eliminate_partially_redundant_loads (bb, insn, expr);
}
}
/* Keep track of everything modified by this insn. */
if (INSN_P (insn))
mark_oprs_set (insn);
}
}
commit_edge_insertions ();
/* Go over the expression hash table and delete insns that were
marked for later deletion. */
for (i = 0; i < expr_hash_table.size; i++)
{
for (expr = expr_hash_table.table[i];
expr != NULL;
expr = expr->next_same_hash)
for (occr = expr->avail_occr; occr; occr = occr->next)
if (occr->deleted_p)
delete_insn (occr->insn);
}
}
/* Scan pattern PAT of INSN and add an entry to the hash TABLE.
After reload we are interested in loads/stores only. */
static void
hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table)
{
rtx src = SET_SRC (pat);
rtx dest = SET_DEST (pat);
if (GET_CODE (src) != MEM && GET_CODE (dest) != MEM)
return;
if (GET_CODE (dest) == REG)
{
if (/* Don't GCSE something if we can't do a reg/reg copy. */
can_copy_p (GET_MODE (dest))
/* GCSE commonly inserts instruction after the insn. We can't
do that easily for EH_REGION notes so disable GCSE on these
for now. */
&& ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
/* Is SET_SRC something we want to gcse? */
&& general_operand (src, GET_MODE (src))
/* Don't CSE a nop. */
&& ! set_noop_p (pat)
&& ! JUMP_P (insn))
{
/* An expression is not available if its operands are
subsequently modified, including this insn. */
if (oprs_available_p (src, insn))
insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table);
}
}
else if ((GET_CODE (src) == REG))
{
/* Only record sets of pseudo-regs in the hash table. */
if (/* Don't GCSE something if we can't do a reg/reg copy. */
can_copy_p (GET_MODE (src))
/* GCSE commonly inserts instruction after the insn. We can't
do that easily for EH_REGION notes so disable GCSE on these
for now. */
&& ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
/* Is SET_DEST something we want to gcse? */
&& general_operand (dest, GET_MODE (dest))
/* Don't CSE a nop. */
&& ! set_noop_p (pat)
&&! JUMP_P (insn)
&& ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
/* Check if the memory expression is killed after insn. */
&& ! load_killed_in_block_p (BLOCK_FOR_INSN (insn),
INSN_CUID (insn) + 1,
dest,
1)
&& oprs_unchanged_p (XEXP (dest, 0), insn, 1))
{
insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table);
}
}
}
/* Create hash table of memory expressions available at end of basic
blocks. */
static void
compute_hash_table_after_reload (struct hash_table *table)
{
unsigned int i;
table->set_p = 0;
/* Initialize count of number of entries in hash table. */
table->n_elems = 0;
memset ((char *) table->table, 0,
table->size * sizeof (struct expr *));
/* While we compute the hash table we also compute a bit array of which
registers are set in which blocks. */
sbitmap_vector_zero (reg_set_in_block, last_basic_block);
/* Re-cache any INSN_LIST nodes we have allocated. */
clear_modify_mem_tables ();
/* Some working arrays used to track first and last set in each block. */
reg_avail_info = (struct reg_avail_info*)
gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
for (i = 0; i < max_gcse_regno; ++i)
reg_avail_info[i].last_bb = NULL;
FOR_EACH_BB (current_bb)
{
rtx insn;
unsigned int regno;
/* First pass over the instructions records information used to
determine when registers and memory are first and last set. */
for (insn = BB_HEAD (current_bb);
insn && insn != NEXT_INSN (BB_END (current_bb));
insn = NEXT_INSN (insn))
{
if (! INSN_P (insn))
continue;
if (GET_CODE (insn) == CALL_INSN)
{
bool clobbers_all = false;
#ifdef NON_SAVING_SETJMP
if (NON_SAVING_SETJMP
&& find_reg_note (insn, REG_SETJMP, NULL_RTX))
clobbers_all = true;
#endif
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (clobbers_all
|| TEST_HARD_REG_BIT (regs_invalidated_by_call,
regno))
record_last_reg_set_info (insn, regno);
mark_call (insn);
}
note_stores (PATTERN (insn), record_last_set_info, insn);
if (GET_CODE (PATTERN (insn)) == SET)
{
rtx src, dest;
src = SET_SRC (PATTERN (insn));
dest = SET_DEST (PATTERN (insn));
if (GET_CODE (src) == MEM && auto_inc_p (XEXP (src, 0)))
{
regno = REGNO (XEXP (XEXP (src, 0), 0));
record_last_reg_set_info (insn, regno);
}
if (GET_CODE (dest) == MEM && auto_inc_p (XEXP (dest, 0)))
{
regno = REGNO (XEXP (XEXP (dest, 0), 0));
record_last_reg_set_info (insn, regno);
}
}
}
/* The next pass builds the hash table. */
for (insn = BB_HEAD (current_bb);
insn && insn != NEXT_INSN (BB_END (current_bb));
insn = NEXT_INSN (insn))
if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX))
hash_scan_set_after_reload (PATTERN (insn), insn, table);
}
free (reg_avail_info);
reg_avail_info = NULL;
}
/* Main entry point of the GCSE after reload - clean some redundant loads
due to spilling. */
void
gcse_after_reload_main (rtx f, FILE* file)
{
gcse_subst_count = 0;
gcse_create_count = 0;
gcse_file = file;
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
/* We need alias. */
init_alias_analysis ();
max_gcse_regno = max_reg_num ();
alloc_reg_set_mem (max_gcse_regno);
alloc_gcse_mem (f);
alloc_hash_table (max_cuid, &expr_hash_table, 0);
compute_hash_table_after_reload (&expr_hash_table);
if (gcse_file)
dump_hash_table (gcse_file, "Expression", &expr_hash_table);
if (expr_hash_table.n_elems > 0)
gcse_after_reload ();
free_hash_table (&expr_hash_table);
free_gcse_mem ();
free_reg_set_mem ();
/* We are finished with alias. */
end_alias_analysis ();
obstack_free (&gcse_obstack, NULL);
}
#include "gt-gcse.h"

View File

@ -574,6 +574,7 @@ decode_options (unsigned int argc, const char **argv)
flag_rename_registers = 1;
flag_unswitch_loops = 1;
flag_web = 1;
flag_gcse_after_reload = 1;
}
if (optimize < 2 || optimize_size)
@ -1035,6 +1036,10 @@ common_handle_option (size_t scode, const char *arg,
flag_gcse_sm = value;
break;
case OPT_fgcse_after_reload:
flag_gcse_after_reload = value;
break;
case OPT_fgcse_las:
flag_gcse_las = value;
break;

View File

@ -131,7 +131,25 @@ DEFPARAM(PARAM_MAX_GCSE_PASSES,
"max-gcse-passes",
"The maximum number of passes to make when doing GCSE",
1)
/* This is the threshold ratio when to perform partial redundancy
elimination after reload. We perform partial redundancy elimination
when the following holds:
(Redundant load execution count)
------------------------------- >= GCSE_AFTER_RELOAD_PARTIAL_FRACTION
(Added loads execution count) */
DEFPARAM(PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
"gcse-after-reload-partial-fraction",
"The threshold ratio for performing partial redundancy elimination \
after reload.",
3)
/* This is the threshold ratio of the critical edges execution count compared to
the redundant loads execution count that permits performing the load
redundancy elimination in gcse after reload. */
DEFPARAM(PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION,
"gcse-after-reload-critical-fraction",
"The threshold ratio of critical edges execution count that permit \
performing redundancy elimination after reload.",
10)
/* This parameter limits the number of insns in a loop that will be unrolled,
and by how much the loop is unrolled.

View File

@ -104,6 +104,10 @@ typedef enum compiler_param
((size_t) PARAM_VALUE (PARAM_MAX_GCSE_MEMORY))
#define MAX_GCSE_PASSES \
PARAM_VALUE (PARAM_MAX_GCSE_PASSES)
#define GCSE_AFTER_RELOAD_PARTIAL_FRACTION \
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
#define MAX_UNROLLED_INSNS \
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
#endif /* ! GCC_PARAMS_H */

View File

@ -158,6 +158,7 @@ enum dump_file_index
DFI_lreg,
DFI_greg,
DFI_postreload,
DFI_gcse2,
DFI_flow2,
DFI_peephole2,
DFI_ce3,
@ -178,7 +179,7 @@ enum dump_file_index
Remaining -d letters:
" e m q "
" JK O Q WXY "
" K O Q WXY "
*/
static struct dump_file_info dump_file_tbl[DFI_MAX] =
@ -210,6 +211,7 @@ static struct dump_file_info dump_file_tbl[DFI_MAX] =
{ "lreg", 'l', 1, 0, 0 },
{ "greg", 'g', 1, 0, 0 },
{ "postreload", 'o', 1, 0, 0 },
{ "gcse2", 'J', 0, 0, 0 },
{ "flow2", 'w', 1, 0, 0 },
{ "peephole2", 'z', 1, 0, 0 },
{ "ce3", 'E', 1, 0, 0 },
@ -788,6 +790,23 @@ rest_of_handle_sched2 (tree decl, rtx insns)
}
#endif
static void
rest_of_handle_gcse2 (tree decl, rtx insns)
{
open_dump_file (DFI_gcse2, decl);
gcse_after_reload_main (insns, dump_file);
rebuild_jump_labels (insns);
delete_trivially_dead_insns (insns, max_reg_num ());
close_dump_file (DFI_gcse2, print_rtl_with_bb, insns);
ggc_collect ();
#ifdef ENABLE_CHECKING
verify_flow_info ();
#endif
}
/* Register allocation pre-pass, to reduce number of moves necessary
for two-address machines. */
static void
@ -1842,6 +1861,9 @@ rest_of_compilation (tree decl)
close_dump_file (DFI_postreload, print_rtl_with_bb, insns);
if (optimize > 0 && flag_gcse_after_reload)
rest_of_handle_gcse2 (decl, insns);
/* Re-create the death notes which were deleted during reload. */
timevar_push (TV_FLOW2);
open_dump_file (DFI_flow2, decl);

View File

@ -2289,6 +2289,7 @@ extern rtx fis_get_condition (rtx);
#ifdef BUFSIZ
extern int gcse_main (rtx, FILE *);
extern int bypass_jumps (FILE *);
extern void gcse_after_reload_main (rtx, FILE *);
#endif
/* In global.c */

View File

@ -526,6 +526,9 @@ int flag_gcse_sm = 1;
int flag_gcse_las = 1;
/* Nonzero means perform global cse after register allocation. */
int flag_gcse_after_reload = 0;
/* Perform target register optimization before prologue / epilogue
threading. */
@ -915,6 +918,7 @@ static const lang_independent_options f_options[] =
{"gcse-lm", &flag_gcse_lm, 1 },
{"gcse-sm", &flag_gcse_sm, 1 },
{"gcse-las", &flag_gcse_las, 1 },
{"gcse-after-reload", &flag_gcse_after_reload, 1},
{"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 },
{"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 },
{"btr-bb-exclusive", &flag_btr_bb_exclusive, 1 },