New flag to control reg-moves generation
Co-Authored-By: Revital Eres <eres@il.ibm.com> From-SVN: r127223
This commit is contained in:
parent
152b97088e
commit
517d76faef
@ -1,3 +1,18 @@
|
||||
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
|
||||
Revital Eres <eres@il.ibm.com>
|
||||
|
||||
* doc/invoke.texi (-fmodulo-sched-allow-regmoves): Document new
|
||||
flag.
|
||||
* ddg.c (create_ddg_dependence): Rename to...
|
||||
(create_ddg_dep_from_intra_loop_link): This. Do not check
|
||||
for interloop edges. Do not create anti dependence edge when
|
||||
a true dependence edge exists in the opposite direction and
|
||||
-fmodulo-sched-allow-regmoves is set.
|
||||
(build_intra_loop_deps): Call create_ddg_dep_from_intra_loop_link.
|
||||
(add_cross_iteration_register_deps): Create anti dependence edge
|
||||
when -fno-modulo-sched-allow-regmoves is set.
|
||||
* common.opt (-fmodulo-sched-allow-regmoves): New flag.
|
||||
|
||||
2007-08-04 Richard Sandiford <richard@codesourcery.com>
|
||||
|
||||
* config/arm/arm.md (movsi): Add braces.
|
||||
|
@ -651,6 +651,10 @@ fmodulo-sched
|
||||
Common Report Var(flag_modulo_sched) Optimization
|
||||
Perform SMS based modulo scheduling before the first scheduling pass
|
||||
|
||||
fmodulo-sched-allow-regmoves
|
||||
Common Report Var(flag_modulo_sched_allow_regmoves)
|
||||
Perform SMS based modulo scheduling with register moves allowed
|
||||
|
||||
fmove-loop-invariants
|
||||
Common Report Var(flag_move_loop_invariants) Init(1) Optimization
|
||||
Move loop invariant computations out of loops
|
||||
|
78
gcc/ddg.c
78
gcc/ddg.c
@ -51,7 +51,8 @@ enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
|
||||
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
|
||||
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
|
||||
static void add_scc_to_ddg (ddg_all_sccs_ptr, ddg_scc_ptr);
|
||||
static void create_ddg_dependence (ddg_ptr, ddg_node_ptr, ddg_node_ptr, dep_t);
|
||||
static void create_ddg_dep_from_intra_loop_link (ddg_ptr, ddg_node_ptr,
|
||||
ddg_node_ptr, dep_t);
|
||||
static void create_ddg_dep_no_link (ddg_ptr, ddg_node_ptr, ddg_node_ptr,
|
||||
dep_type, dep_data_type, int);
|
||||
static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type,
|
||||
@ -145,22 +146,16 @@ mem_access_insn_p (rtx insn)
|
||||
/* Computes the dependence parameters (latency, distance etc.), creates
|
||||
a ddg_edge and adds it to the given DDG. */
|
||||
static void
|
||||
create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
|
||||
ddg_node_ptr dest_node, dep_t link)
|
||||
create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
|
||||
ddg_node_ptr dest_node, dep_t link)
|
||||
{
|
||||
ddg_edge_ptr e;
|
||||
int latency, distance = 0;
|
||||
int interloop = (src_node->cuid >= dest_node->cuid);
|
||||
dep_type t = TRUE_DEP;
|
||||
dep_data_type dt = (mem_access_insn_p (src_node->insn)
|
||||
&& mem_access_insn_p (dest_node->insn) ? MEM_DEP
|
||||
: REG_DEP);
|
||||
|
||||
/* For now we don't have an exact calculation of the distance,
|
||||
so assume 1 conservatively. */
|
||||
if (interloop)
|
||||
distance = 1;
|
||||
|
||||
gcc_assert (src_node->cuid < dest_node->cuid);
|
||||
gcc_assert (link);
|
||||
|
||||
/* Note: REG_DEP_ANTI applies to MEM ANTI_DEP as well!! */
|
||||
@ -168,27 +163,34 @@ create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
|
||||
t = ANTI_DEP;
|
||||
else if (DEP_KIND (link) == REG_DEP_OUTPUT)
|
||||
t = OUTPUT_DEP;
|
||||
latency = dep_cost (link);
|
||||
|
||||
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
||||
|
||||
if (interloop)
|
||||
/* We currently choose not to create certain anti-deps edges and
|
||||
compensate for that by generating reg-moves based on the life-range
|
||||
analysis. The anti-deps that will be deleted are the ones which
|
||||
have true-deps edges in the opposite direction (in other words
|
||||
the kernel has only one def of the relevant register). TODO:
|
||||
support the removal of all anti-deps edges, i.e. including those
|
||||
whose register has multiple defs in the loop. */
|
||||
if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
|
||||
{
|
||||
/* Some interloop dependencies are relaxed:
|
||||
1. Every insn is output dependent on itself; ignore such deps.
|
||||
2. Every true/flow dependence is an anti dependence in the
|
||||
opposite direction with distance 1; such register deps
|
||||
will be removed by renaming if broken --- ignore them. */
|
||||
if (!(t == OUTPUT_DEP && src_node == dest_node)
|
||||
&& !(t == ANTI_DEP && dt == REG_DEP))
|
||||
add_backarc_to_ddg (g, e);
|
||||
else
|
||||
free (e);
|
||||
rtx set;
|
||||
|
||||
set = single_set (dest_node->insn);
|
||||
if (set)
|
||||
{
|
||||
int regno = REGNO (SET_DEST (set));
|
||||
struct df_ref *first_def =
|
||||
df_bb_regno_first_def_find (g->bb, regno);
|
||||
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
|
||||
|
||||
if (bitmap_bit_p (bb_info->gen, first_def->id))
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (t == ANTI_DEP && dt == REG_DEP)
|
||||
free (e); /* We can fix broken anti register deps using reg-moves. */
|
||||
else
|
||||
add_edge_to_ddg (g, e);
|
||||
|
||||
latency = dep_cost (link);
|
||||
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
||||
add_edge_to_ddg (g, e);
|
||||
}
|
||||
|
||||
/* The same as the above function, but it doesn't require a link parameter. */
|
||||
@ -247,6 +249,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
|
||||
gcc_assert (last_def_node);
|
||||
gcc_assert (first_def);
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
if (last_def->id != first_def->id)
|
||||
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
|
||||
#endif
|
||||
|
||||
/* Create inter-loop true dependences and anti dependences. */
|
||||
for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
|
||||
{
|
||||
@ -280,14 +287,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
|
||||
|
||||
gcc_assert (first_def_node);
|
||||
|
||||
if (last_def->id != first_def->id)
|
||||
{
|
||||
#ifdef ENABLE_CHECKING
|
||||
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
|
||||
#endif
|
||||
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
||||
REG_DEP, 1);
|
||||
}
|
||||
if (last_def->id != first_def->id
|
||||
|| !flag_modulo_sched_allow_regmoves)
|
||||
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
||||
REG_DEP, 1);
|
||||
|
||||
}
|
||||
}
|
||||
/* Create an inter-loop output dependence between LAST_DEF (which is the
|
||||
@ -392,7 +396,7 @@ build_intra_loop_deps (ddg_ptr g)
|
||||
continue;
|
||||
|
||||
add_forw_dep (link);
|
||||
create_ddg_dependence (g, src_node, dest_node, dep);
|
||||
create_ddg_dep_from_intra_loop_link (g, src_node, dest_node, dep);
|
||||
}
|
||||
|
||||
/* If this insn modifies memory, add an edge to all insns that access
|
||||
|
@ -328,7 +328,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-finline-functions -finline-functions-called-once @gol
|
||||
-finline-limit=@var{n} -fkeep-inline-functions @gol
|
||||
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
|
||||
-fmodulo-sched -fno-branch-count-reg @gol
|
||||
-fmodulo-sched -fmodulo-sched-allow-regmoves -fno-branch-count-reg @gol
|
||||
-fno-default-inline -fno-defer-pop -fmove-loop-invariants @gol
|
||||
-fno-function-cse -fno-guess-branch-probability @gol
|
||||
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
|
||||
@ -5265,6 +5265,13 @@ Perform swing modulo scheduling immediately before the first scheduling
|
||||
pass. This pass looks at innermost loops and reorders their
|
||||
instructions by overlapping different iterations.
|
||||
|
||||
@item -fmodulo-sched-allow-regmoves
|
||||
@opindex fmodulo-sched-allow-regmoves
|
||||
Perform more aggressive SMS based modulo scheduling with register moves
|
||||
allowed. By setting this flag certain anti-dependences edges will be
|
||||
deleted which will trigger the generation of reg-moves based on the
|
||||
life-range analysis.
|
||||
|
||||
@item -fno-branch-count-reg
|
||||
@opindex fno-branch-count-reg
|
||||
Do not use ``decrement and branch'' instructions on a count register,
|
||||
|
@ -1,3 +1,8 @@
|
||||
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
|
||||
Revital Eres <eres@il.ibm.com>
|
||||
|
||||
* gcc.dg/sms-antideps.c: New test.
|
||||
|
||||
2007-08-04 Paul Thomas <pault@gcc.gnu.org>
|
||||
|
||||
PR fortran/31214
|
||||
|
37
gcc/testsuite/gcc.dg/sms-antideps.c
Normal file
37
gcc/testsuite/gcc.dg/sms-antideps.c
Normal file
@ -0,0 +1,37 @@
|
||||
/* This test is a reduced test case for a bug that caused
|
||||
bootstrapping with -fmodulo-sched. Related to a broken anti-dep
|
||||
that was not fixed by reg-moves. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
unsigned long long
|
||||
foo (long long ixi, unsigned ctr)
|
||||
{
|
||||
unsigned long long irslt = 1;
|
||||
long long ix = ixi;
|
||||
|
||||
for (; ctr; ctr--)
|
||||
{
|
||||
irslt *= ix;
|
||||
ix *= ix;
|
||||
}
|
||||
|
||||
if (irslt != 14348907)
|
||||
abort ();
|
||||
return irslt;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned long long res;
|
||||
|
||||
res = foo (3, 4);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user