New flag to control reg-moves generation
Co-Authored-By: Revital Eres <eres@il.ibm.com> From-SVN: r127223
This commit is contained in:
parent
152b97088e
commit
517d76faef
@ -1,3 +1,18 @@
|
|||||||
|
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
|
||||||
|
Revital Eres <eres@il.ibm.com>
|
||||||
|
|
||||||
|
* doc/invoke.texi (-fmodulo-sched-allow-regmoves): Document new
|
||||||
|
flag.
|
||||||
|
* ddg.c (create_ddg_dependence): Rename to...
|
||||||
|
(create_ddg_dep_from_intra_loop_link): This. Do not check
|
||||||
|
for interloop edges. Do not create anti dependence edge when
|
||||||
|
a true dependence edge exists in the opposite direction and
|
||||||
|
-fmodulo-sched-allow-regmoves is set.
|
||||||
|
(build_intra_loop_deps): Call create_ddg_dep_from_intra_loop_link.
|
||||||
|
(add_cross_iteration_register_deps): Create anti dependence edge
|
||||||
|
when -fno-modulo-sched-allow-regmoves is set.
|
||||||
|
* common.opt (-fmodulo-sched-allow-regmoves): New flag.
|
||||||
|
|
||||||
2007-08-04 Richard Sandiford <richard@codesourcery.com>
|
2007-08-04 Richard Sandiford <richard@codesourcery.com>
|
||||||
|
|
||||||
* config/arm/arm.md (movsi): Add braces.
|
* config/arm/arm.md (movsi): Add braces.
|
||||||
|
@ -651,6 +651,10 @@ fmodulo-sched
|
|||||||
Common Report Var(flag_modulo_sched) Optimization
|
Common Report Var(flag_modulo_sched) Optimization
|
||||||
Perform SMS based modulo scheduling before the first scheduling pass
|
Perform SMS based modulo scheduling before the first scheduling pass
|
||||||
|
|
||||||
|
fmodulo-sched-allow-regmoves
|
||||||
|
Common Report Var(flag_modulo_sched_allow_regmoves)
|
||||||
|
Perform SMS based modulo scheduling with register moves allowed
|
||||||
|
|
||||||
fmove-loop-invariants
|
fmove-loop-invariants
|
||||||
Common Report Var(flag_move_loop_invariants) Init(1) Optimization
|
Common Report Var(flag_move_loop_invariants) Init(1) Optimization
|
||||||
Move loop invariant computations out of loops
|
Move loop invariant computations out of loops
|
||||||
|
78
gcc/ddg.c
78
gcc/ddg.c
@ -51,7 +51,8 @@ enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
|
|||||||
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
|
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
|
||||||
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
|
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
|
||||||
static void add_scc_to_ddg (ddg_all_sccs_ptr, ddg_scc_ptr);
|
static void add_scc_to_ddg (ddg_all_sccs_ptr, ddg_scc_ptr);
|
||||||
static void create_ddg_dependence (ddg_ptr, ddg_node_ptr, ddg_node_ptr, dep_t);
|
static void create_ddg_dep_from_intra_loop_link (ddg_ptr, ddg_node_ptr,
|
||||||
|
ddg_node_ptr, dep_t);
|
||||||
static void create_ddg_dep_no_link (ddg_ptr, ddg_node_ptr, ddg_node_ptr,
|
static void create_ddg_dep_no_link (ddg_ptr, ddg_node_ptr, ddg_node_ptr,
|
||||||
dep_type, dep_data_type, int);
|
dep_type, dep_data_type, int);
|
||||||
static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type,
|
static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type,
|
||||||
@ -145,22 +146,16 @@ mem_access_insn_p (rtx insn)
|
|||||||
/* Computes the dependence parameters (latency, distance etc.), creates
|
/* Computes the dependence parameters (latency, distance etc.), creates
|
||||||
a ddg_edge and adds it to the given DDG. */
|
a ddg_edge and adds it to the given DDG. */
|
||||||
static void
|
static void
|
||||||
create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
|
create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
|
||||||
ddg_node_ptr dest_node, dep_t link)
|
ddg_node_ptr dest_node, dep_t link)
|
||||||
{
|
{
|
||||||
ddg_edge_ptr e;
|
ddg_edge_ptr e;
|
||||||
int latency, distance = 0;
|
int latency, distance = 0;
|
||||||
int interloop = (src_node->cuid >= dest_node->cuid);
|
|
||||||
dep_type t = TRUE_DEP;
|
dep_type t = TRUE_DEP;
|
||||||
dep_data_type dt = (mem_access_insn_p (src_node->insn)
|
dep_data_type dt = (mem_access_insn_p (src_node->insn)
|
||||||
&& mem_access_insn_p (dest_node->insn) ? MEM_DEP
|
&& mem_access_insn_p (dest_node->insn) ? MEM_DEP
|
||||||
: REG_DEP);
|
: REG_DEP);
|
||||||
|
gcc_assert (src_node->cuid < dest_node->cuid);
|
||||||
/* For now we don't have an exact calculation of the distance,
|
|
||||||
so assume 1 conservatively. */
|
|
||||||
if (interloop)
|
|
||||||
distance = 1;
|
|
||||||
|
|
||||||
gcc_assert (link);
|
gcc_assert (link);
|
||||||
|
|
||||||
/* Note: REG_DEP_ANTI applies to MEM ANTI_DEP as well!! */
|
/* Note: REG_DEP_ANTI applies to MEM ANTI_DEP as well!! */
|
||||||
@ -168,27 +163,34 @@ create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
|
|||||||
t = ANTI_DEP;
|
t = ANTI_DEP;
|
||||||
else if (DEP_KIND (link) == REG_DEP_OUTPUT)
|
else if (DEP_KIND (link) == REG_DEP_OUTPUT)
|
||||||
t = OUTPUT_DEP;
|
t = OUTPUT_DEP;
|
||||||
latency = dep_cost (link);
|
|
||||||
|
|
||||||
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
/* We currently choose not to create certain anti-deps edges and
|
||||||
|
compensate for that by generating reg-moves based on the life-range
|
||||||
if (interloop)
|
analysis. The anti-deps that will be deleted are the ones which
|
||||||
|
have true-deps edges in the opposite direction (in other words
|
||||||
|
the kernel has only one def of the relevant register). TODO:
|
||||||
|
support the removal of all anti-deps edges, i.e. including those
|
||||||
|
whose register has multiple defs in the loop. */
|
||||||
|
if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
|
||||||
{
|
{
|
||||||
/* Some interloop dependencies are relaxed:
|
rtx set;
|
||||||
1. Every insn is output dependent on itself; ignore such deps.
|
|
||||||
2. Every true/flow dependence is an anti dependence in the
|
set = single_set (dest_node->insn);
|
||||||
opposite direction with distance 1; such register deps
|
if (set)
|
||||||
will be removed by renaming if broken --- ignore them. */
|
{
|
||||||
if (!(t == OUTPUT_DEP && src_node == dest_node)
|
int regno = REGNO (SET_DEST (set));
|
||||||
&& !(t == ANTI_DEP && dt == REG_DEP))
|
struct df_ref *first_def =
|
||||||
add_backarc_to_ddg (g, e);
|
df_bb_regno_first_def_find (g->bb, regno);
|
||||||
else
|
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
|
||||||
free (e);
|
|
||||||
|
if (bitmap_bit_p (bb_info->gen, first_def->id))
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (t == ANTI_DEP && dt == REG_DEP)
|
|
||||||
free (e); /* We can fix broken anti register deps using reg-moves. */
|
latency = dep_cost (link);
|
||||||
else
|
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
||||||
add_edge_to_ddg (g, e);
|
add_edge_to_ddg (g, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The same as the above function, but it doesn't require a link parameter. */
|
/* The same as the above function, but it doesn't require a link parameter. */
|
||||||
@ -247,6 +249,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
|
|||||||
gcc_assert (last_def_node);
|
gcc_assert (last_def_node);
|
||||||
gcc_assert (first_def);
|
gcc_assert (first_def);
|
||||||
|
|
||||||
|
#ifdef ENABLE_CHECKING
|
||||||
|
if (last_def->id != first_def->id)
|
||||||
|
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Create inter-loop true dependences and anti dependences. */
|
/* Create inter-loop true dependences and anti dependences. */
|
||||||
for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
|
for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
|
||||||
{
|
{
|
||||||
@ -280,14 +287,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
|
|||||||
|
|
||||||
gcc_assert (first_def_node);
|
gcc_assert (first_def_node);
|
||||||
|
|
||||||
if (last_def->id != first_def->id)
|
if (last_def->id != first_def->id
|
||||||
{
|
|| !flag_modulo_sched_allow_regmoves)
|
||||||
#ifdef ENABLE_CHECKING
|
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
||||||
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
|
REG_DEP, 1);
|
||||||
#endif
|
|
||||||
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
|
||||||
REG_DEP, 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Create an inter-loop output dependence between LAST_DEF (which is the
|
/* Create an inter-loop output dependence between LAST_DEF (which is the
|
||||||
@ -392,7 +396,7 @@ build_intra_loop_deps (ddg_ptr g)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
add_forw_dep (link);
|
add_forw_dep (link);
|
||||||
create_ddg_dependence (g, src_node, dest_node, dep);
|
create_ddg_dep_from_intra_loop_link (g, src_node, dest_node, dep);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If this insn modifies memory, add an edge to all insns that access
|
/* If this insn modifies memory, add an edge to all insns that access
|
||||||
|
@ -328,7 +328,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||||||
-finline-functions -finline-functions-called-once @gol
|
-finline-functions -finline-functions-called-once @gol
|
||||||
-finline-limit=@var{n} -fkeep-inline-functions @gol
|
-finline-limit=@var{n} -fkeep-inline-functions @gol
|
||||||
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
|
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
|
||||||
-fmodulo-sched -fno-branch-count-reg @gol
|
-fmodulo-sched -fmodulo-sched-allow-regmoves -fno-branch-count-reg @gol
|
||||||
-fno-default-inline -fno-defer-pop -fmove-loop-invariants @gol
|
-fno-default-inline -fno-defer-pop -fmove-loop-invariants @gol
|
||||||
-fno-function-cse -fno-guess-branch-probability @gol
|
-fno-function-cse -fno-guess-branch-probability @gol
|
||||||
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
|
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
|
||||||
@ -5265,6 +5265,13 @@ Perform swing modulo scheduling immediately before the first scheduling
|
|||||||
pass. This pass looks at innermost loops and reorders their
|
pass. This pass looks at innermost loops and reorders their
|
||||||
instructions by overlapping different iterations.
|
instructions by overlapping different iterations.
|
||||||
|
|
||||||
|
@item -fmodulo-sched-allow-regmoves
|
||||||
|
@opindex fmodulo-sched-allow-regmoves
|
||||||
|
Perform more aggressive SMS based modulo scheduling with register moves
|
||||||
|
allowed. By setting this flag certain anti-dependences edges will be
|
||||||
|
deleted which will trigger the generation of reg-moves based on the
|
||||||
|
life-range analysis.
|
||||||
|
|
||||||
@item -fno-branch-count-reg
|
@item -fno-branch-count-reg
|
||||||
@opindex fno-branch-count-reg
|
@opindex fno-branch-count-reg
|
||||||
Do not use ``decrement and branch'' instructions on a count register,
|
Do not use ``decrement and branch'' instructions on a count register,
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
|
||||||
|
Revital Eres <eres@il.ibm.com>
|
||||||
|
|
||||||
|
* gcc.dg/sms-antideps.c: New test.
|
||||||
|
|
||||||
2007-08-04 Paul Thomas <pault@gcc.gnu.org>
|
2007-08-04 Paul Thomas <pault@gcc.gnu.org>
|
||||||
|
|
||||||
PR fortran/31214
|
PR fortran/31214
|
||||||
|
37
gcc/testsuite/gcc.dg/sms-antideps.c
Normal file
37
gcc/testsuite/gcc.dg/sms-antideps.c
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/* This test is a reduced test case for a bug that caused
|
||||||
|
bootstrapping with -fmodulo-sched. Related to a broken anti-dep
|
||||||
|
that was not fixed by reg-moves. */
|
||||||
|
|
||||||
|
/* { dg-do run } */
|
||||||
|
/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves" } */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
unsigned long long
|
||||||
|
foo (long long ixi, unsigned ctr)
|
||||||
|
{
|
||||||
|
unsigned long long irslt = 1;
|
||||||
|
long long ix = ixi;
|
||||||
|
|
||||||
|
for (; ctr; ctr--)
|
||||||
|
{
|
||||||
|
irslt *= ix;
|
||||||
|
ix *= ix;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (irslt != 14348907)
|
||||||
|
abort ();
|
||||||
|
return irslt;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
unsigned long long res;
|
||||||
|
|
||||||
|
res = foo (3, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user