common.opt: Add description of the new -fgcse-las flag.
2003-10-17 Mostafa Hagog <mustafa@il.ibm.com> * common.opt: Add description of the new -fgcse-las flag. * flags.h (flag_gcse_las): Declaration of global flag_gcse_las. * gcse.c (hash_scan_set): Handle the case of store expression and insert the memory expression to the hash table, this way we make it possible to discover redundant loads after stores and remove them. (pre_insert_copy_insn): moved the call to update_ld_motion_stores, to pre_insert_copies, it is not the correct place to call it after adding stores to be in the available expression hash table. (pre_insert_copies): Added the call to update_ld_motion_stores when one or more copies were inserted. * opts.c (common_handle_option): Handle the -fgcse-las flag. * toplev.c (flag_gcse_las): Initialization of flag_gcse_las. * doc/invoke.tex: Document new -fgcse-las flag. From-SVN: r72612
This commit is contained in:
parent
9579624e72
commit
f5f2e3cd9b
@ -1,3 +1,20 @@
|
||||
2003-10-17 Mostafa Hagog <mustafa@il.ibm.com>
|
||||
|
||||
* common.opt: Add description of the new -fgcse-las flag.
|
||||
* flags.h (flag_gcse_las): Declaration of global flag_gcse_las.
|
||||
* gcse.c (hash_scan_set): Handle the case of store expression and
|
||||
insert the memory expression to the hash table, this way we make it
|
||||
possible to discover redundant loads after stores and remove them.
|
||||
(pre_insert_copy_insn): moved the call to update_ld_motion_stores,
|
||||
to pre_insert_copies, it is not the correct place to call it after
|
||||
adding stores to be in the available expression hash table.
|
||||
(pre_insert_copies): Added the call to update_ld_motion_stores when
|
||||
one or more copies were inserted.
|
||||
* opts.c (common_handle_option): Handle the -fgcse-las flag.
|
||||
* toplev.c (flag_gcse_las): Initialization of flag_gcse_las.
|
||||
|
||||
* doc/invoke.tex: Document new -fgcse-las flag.
|
||||
|
||||
2003-10-18 Alan Modra <amodra@bigpond.net.au>
|
||||
|
||||
* config/rs6000/crtsavres.asm: Correct alignment of powerpc64 code
|
||||
|
@ -362,6 +362,10 @@ fgcse-sm
|
||||
Common
|
||||
Perform store motion after global common subexpression elimination
|
||||
|
||||
fgcse-las
|
||||
Common
|
||||
Perform redundant load after store elimination in global common subexpression elimination
|
||||
|
||||
fgnu-linker
|
||||
Common
|
||||
Output GNU ld formatted global initializers
|
||||
|
@ -270,8 +270,8 @@ in the following sections.
|
||||
-fdelayed-branch -fdelete-null-pointer-checks @gol
|
||||
-fexpensive-optimizations -ffast-math -ffloat-store @gol
|
||||
-fforce-addr -fforce-mem -ffunction-sections @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm -floop-optimize -fcrossjumping @gol
|
||||
-fif-conversion -fif-conversion2 @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol
|
||||
-fcrossjumping -fif-conversion -fif-conversion2 @gol
|
||||
-finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol
|
||||
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
|
||||
-fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
|
||||
@ -3677,10 +3677,10 @@ also turns on the following optimization flags:
|
||||
-fstrength-reduce @gol
|
||||
-fcse-follow-jumps -fcse-skip-blocks @gol
|
||||
-frerun-cse-after-loop -frerun-loop-opt @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm -fgcse-las @gol
|
||||
-fdelete-null-pointer-checks @gol
|
||||
-fexpensive-optimizations @gol
|
||||
-fregmove -@gol
|
||||
-fregmove @gol
|
||||
-fschedule-insns -fschedule-insns2 @gol
|
||||
-fsched-interblock -fsched-spec @gol
|
||||
-fcaller-saves @gol
|
||||
@ -3996,10 +3996,19 @@ Enabled by default when gcse is enabled.
|
||||
|
||||
@item -fgcse-sm
|
||||
@opindex fgcse-sm
|
||||
When @option{-fgcse-sm} is enabled, A store motion pass is run after global common
|
||||
subexpression elimination. This pass will attempt to move stores out of loops.
|
||||
When used in conjunction with @option{-fgcse-lm}, loops containing a load/store sequence
|
||||
can be changed to a load before the loop and a store after the loop.
|
||||
When @option{-fgcse-sm} is enabled, a store motion pass is run after
|
||||
global common subexpression elimination. This pass will attempt to move
|
||||
stores out of loops. When used in conjunction with @option{-fgcse-lm},
|
||||
loops containing a load/store sequence can be changed to a load before
|
||||
the loop and a store after the loop.
|
||||
|
||||
Enabled by default when gcse is enabled.
|
||||
|
||||
@item -fgcse-las
|
||||
@opindex fgcse-las
|
||||
When @option{-fgcse-las} is enabled, the global common subexpression
|
||||
elimination pass eliminates redundant loads that come after stores to the
|
||||
same memory location (both partial and full redundacies).
|
||||
|
||||
Enabled by default when gcse is enabled.
|
||||
|
||||
|
@ -675,6 +675,11 @@ extern int flag_gcse_lm;
|
||||
|
||||
extern int flag_gcse_sm;
|
||||
|
||||
/* Nonzero if we want to perform redundant load-after-store elimination
|
||||
in gcse. */
|
||||
|
||||
extern int flag_gcse_las;
|
||||
|
||||
/* Perform branch target register optimization before prologue / epilogue
|
||||
threading. */
|
||||
|
||||
|
80
gcc/gcse.c
80
gcc/gcse.c
@ -2205,6 +2205,49 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table *table)
|
||||
&& oprs_available_p (pat, tmp))))
|
||||
insert_set_in_table (pat, insn, table);
|
||||
}
|
||||
/* In case of store we want to consider the memory value as avaiable in
|
||||
the REG stored in that memory. This makes it possible to remove
|
||||
redundant loads from due to stores to the same location. */
|
||||
else if (flag_gcse_las && GET_CODE (src) == REG && GET_CODE (dest) == MEM)
|
||||
{
|
||||
unsigned int regno = REGNO (src);
|
||||
|
||||
/* Do not do this for constant/copy propagation. */
|
||||
if (! table->set_p
|
||||
/* Only record sets of pseudo-regs in the hash table. */
|
||||
&& regno >= FIRST_PSEUDO_REGISTER
|
||||
/* Don't GCSE something if we can't do a reg/reg copy. */
|
||||
&& can_copy_p (GET_MODE (src))
|
||||
/* GCSE commonly inserts instruction after the insn. We can't
|
||||
do that easily for EH_REGION notes so disable GCSE on these
|
||||
for now. */
|
||||
&& ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
|
||||
/* Is SET_DEST something we want to gcse? */
|
||||
&& want_to_gcse_p (dest)
|
||||
/* Don't CSE a nop. */
|
||||
&& ! set_noop_p (pat)
|
||||
/* Don't GCSE if it has attached REG_EQUIV note.
|
||||
At this point this only function parameters should have
|
||||
REG_EQUIV notes and if the argument slot is used somewhere
|
||||
explicitly, it means address of parameter has been taken,
|
||||
so we should not extend the lifetime of the pseudo. */
|
||||
&& ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
|
||||
|| GET_CODE (XEXP (note, 0)) != MEM))
|
||||
{
|
||||
/* Stores are never anticipatable. */
|
||||
int antic_p = 0;
|
||||
/* An expression is not available if its operands are
|
||||
subsequently modified, including this insn. It's also not
|
||||
available if this is a branch, because we can't insert
|
||||
a set after the branch. */
|
||||
int avail_p = oprs_available_p (dest, insn)
|
||||
&& ! JUMP_P (insn);
|
||||
|
||||
/* Record the memory expression (DEST) in the hash table. */
|
||||
insert_expr_in_table (dest, GET_MODE (dest), insn,
|
||||
antic_p, avail_p, table);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -5360,7 +5403,13 @@ pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
|
||||
reaching_reg <- expr
|
||||
old_reg <- reaching_reg
|
||||
because this way copy propagation can discover additional PRE
|
||||
opportunities. But if this fails, we try the old way. */
|
||||
opportunities. But if this fails, we try the old way.
|
||||
When "expr" is a store, i.e.
|
||||
given "MEM <- old_reg", instead of adding after it
|
||||
reaching_reg <- old_reg
|
||||
it's better to add it before as follows:
|
||||
reaching_reg <- old_reg
|
||||
MEM <- reaching_reg. */
|
||||
|
||||
static void
|
||||
pre_insert_copy_insn (struct expr *expr, rtx insn)
|
||||
@ -5395,8 +5444,9 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
|
||||
else
|
||||
abort ();
|
||||
|
||||
if (GET_CODE (SET_DEST (set)) == REG)
|
||||
{
|
||||
old_reg = SET_DEST (set);
|
||||
|
||||
/* Check if we can modify the set destination in the original insn. */
|
||||
if (validate_change (insn, &SET_DEST (set), reg, 0))
|
||||
{
|
||||
@ -5415,6 +5465,21 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
|
||||
/* Keep register set table up to date. */
|
||||
record_one_set (regno, new_insn);
|
||||
}
|
||||
}
|
||||
else /* This is possible only in case of a store to memory. */
|
||||
{
|
||||
old_reg = SET_SRC (set);
|
||||
new_insn = gen_move_insn (reg, old_reg);
|
||||
|
||||
/* Check if we can modify the set source in the original insn. */
|
||||
if (validate_change (insn, &SET_SRC (set), reg, 0))
|
||||
new_insn = emit_insn_before (new_insn, insn);
|
||||
else
|
||||
new_insn = emit_insn_after (new_insn, insn);
|
||||
|
||||
/* Keep register set table up to date. */
|
||||
record_one_set (regno, new_insn);
|
||||
}
|
||||
|
||||
gcse_create_count++;
|
||||
|
||||
@ -5423,7 +5488,6 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
|
||||
"PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
|
||||
BLOCK_NUM (insn), INSN_UID (new_insn), indx,
|
||||
INSN_UID (insn), regno);
|
||||
update_ld_motion_stores (expr);
|
||||
}
|
||||
|
||||
/* Copy available expressions that reach the redundant expression
|
||||
@ -5432,7 +5496,7 @@ pre_insert_copy_insn (struct expr *expr, rtx insn)
|
||||
static void
|
||||
pre_insert_copies (void)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int i, added_copy;
|
||||
struct expr *expr;
|
||||
struct occr *occr;
|
||||
struct occr *avail;
|
||||
@ -5454,6 +5518,9 @@ pre_insert_copies (void)
|
||||
if (expr->reaching_reg == NULL)
|
||||
continue;
|
||||
|
||||
/* Set when we add a copy for that expression. */
|
||||
added_copy = 0;
|
||||
|
||||
for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
|
||||
{
|
||||
if (! occr->deleted_p)
|
||||
@ -5477,11 +5544,16 @@ pre_insert_copies (void)
|
||||
BLOCK_FOR_INSN (occr->insn)))
|
||||
continue;
|
||||
|
||||
added_copy = 1;
|
||||
|
||||
/* Copy the result of avail to reaching_reg. */
|
||||
pre_insert_copy_insn (expr, insn);
|
||||
avail->copied_p = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (added_copy)
|
||||
update_ld_motion_stores (expr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1019,6 +1019,10 @@ common_handle_option (size_t scode, const char *arg,
|
||||
flag_gcse_sm = value;
|
||||
break;
|
||||
|
||||
case OPT_fgcse_las:
|
||||
flag_gcse_las = value;
|
||||
break;
|
||||
|
||||
case OPT_fgnu_linker:
|
||||
flag_gnu_linker = value;
|
||||
break;
|
||||
|
@ -697,6 +697,11 @@ int flag_gcse_lm = 1;
|
||||
|
||||
int flag_gcse_sm = 1;
|
||||
|
||||
/* Nonzero if we want to perfrom redundant load after store elimination
|
||||
in gcse. */
|
||||
|
||||
int flag_gcse_las = 1;
|
||||
|
||||
/* Perform target register optimization before prologue / epilogue
|
||||
threading. */
|
||||
|
||||
@ -1075,6 +1080,7 @@ static const lang_independent_options f_options[] =
|
||||
{"gcse", &flag_gcse, 1 },
|
||||
{"gcse-lm", &flag_gcse_lm, 1 },
|
||||
{"gcse-sm", &flag_gcse_sm, 1 },
|
||||
{"gcse-las", &flag_gcse_las, 1 },
|
||||
{"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 },
|
||||
{"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 },
|
||||
{"loop-optimize", &flag_loop_optimize, 1 },
|
||||
|
Loading…
x
Reference in New Issue
Block a user