New flag to control reg-moves generation

Co-Authored-By: Revital Eres <eres@il.ibm.com>

From-SVN: r127223
This commit is contained in:
Vladimir Yanovsky 2007-08-05 09:43:07 +00:00 committed by Revital Eres
parent 152b97088e
commit 517d76faef
6 changed files with 110 additions and 38 deletions

View File

@ -1,3 +1,18 @@
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
* doc/invoke.texi (-fmodulo-sched-allow-regmoves): Document new
flag.
* ddg.c (create_ddg_dependence): Rename to...
(create_ddg_dep_from_intra_loop_link): This. Do not check
for interloop edges. Do not create anti dependence edge when
a true dependence edge exists in the opposite direction and
-fmodulo-sched-allow-regmoves is set.
(build_intra_loop_deps): Call create_ddg_dep_from_intra_loop_link.
(add_cross_iteration_register_deps): Create anti dependence edge
when -fno-modulo-sched-allow-regmoves is set.
* common.opt (-fmodulo-sched-allow-regmoves): New flag.
2007-08-04 Richard Sandiford <richard@codesourcery.com> 2007-08-04 Richard Sandiford <richard@codesourcery.com>
* config/arm/arm.md (movsi): Add braces. * config/arm/arm.md (movsi): Add braces.

View File

@ -651,6 +651,10 @@ fmodulo-sched
Common Report Var(flag_modulo_sched) Optimization Common Report Var(flag_modulo_sched) Optimization
Perform SMS based modulo scheduling before the first scheduling pass Perform SMS based modulo scheduling before the first scheduling pass
fmodulo-sched-allow-regmoves
Common Report Var(flag_modulo_sched_allow_regmoves)
Perform SMS based modulo scheduling with register moves allowed
fmove-loop-invariants fmove-loop-invariants
Common Report Var(flag_move_loop_invariants) Init(1) Optimization Common Report Var(flag_move_loop_invariants) Init(1) Optimization
Move loop invariant computations out of loops Move loop invariant computations out of loops

View File

@ -51,7 +51,8 @@ enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr); static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr); static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
static void add_scc_to_ddg (ddg_all_sccs_ptr, ddg_scc_ptr); static void add_scc_to_ddg (ddg_all_sccs_ptr, ddg_scc_ptr);
static void create_ddg_dependence (ddg_ptr, ddg_node_ptr, ddg_node_ptr, dep_t); static void create_ddg_dep_from_intra_loop_link (ddg_ptr, ddg_node_ptr,
ddg_node_ptr, dep_t);
static void create_ddg_dep_no_link (ddg_ptr, ddg_node_ptr, ddg_node_ptr, static void create_ddg_dep_no_link (ddg_ptr, ddg_node_ptr, ddg_node_ptr,
dep_type, dep_data_type, int); dep_type, dep_data_type, int);
static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type, static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type,
@ -145,22 +146,16 @@ mem_access_insn_p (rtx insn)
/* Computes the dependence parameters (latency, distance etc.), creates /* Computes the dependence parameters (latency, distance etc.), creates
a ddg_edge and adds it to the given DDG. */ a ddg_edge and adds it to the given DDG. */
static void static void
create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node, create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
ddg_node_ptr dest_node, dep_t link) ddg_node_ptr dest_node, dep_t link)
{ {
ddg_edge_ptr e; ddg_edge_ptr e;
int latency, distance = 0; int latency, distance = 0;
int interloop = (src_node->cuid >= dest_node->cuid);
dep_type t = TRUE_DEP; dep_type t = TRUE_DEP;
dep_data_type dt = (mem_access_insn_p (src_node->insn) dep_data_type dt = (mem_access_insn_p (src_node->insn)
&& mem_access_insn_p (dest_node->insn) ? MEM_DEP && mem_access_insn_p (dest_node->insn) ? MEM_DEP
: REG_DEP); : REG_DEP);
gcc_assert (src_node->cuid < dest_node->cuid);
/* For now we don't have an exact calculation of the distance,
so assume 1 conservatively. */
if (interloop)
distance = 1;
gcc_assert (link); gcc_assert (link);
/* Note: REG_DEP_ANTI applies to MEM ANTI_DEP as well!! */ /* Note: REG_DEP_ANTI applies to MEM ANTI_DEP as well!! */
@ -168,27 +163,34 @@ create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
t = ANTI_DEP; t = ANTI_DEP;
else if (DEP_KIND (link) == REG_DEP_OUTPUT) else if (DEP_KIND (link) == REG_DEP_OUTPUT)
t = OUTPUT_DEP; t = OUTPUT_DEP;
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance); /* We currently choose not to create certain anti-deps edges and
compensate for that by generating reg-moves based on the life-range
if (interloop) analysis. The anti-deps that will be deleted are the ones which
have true-deps edges in the opposite direction (in other words
the kernel has only one def of the relevant register). TODO:
support the removal of all anti-deps edges, i.e. including those
whose register has multiple defs in the loop. */
if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
{ {
/* Some interloop dependencies are relaxed: rtx set;
1. Every insn is output dependent on itself; ignore such deps.
2. Every true/flow dependence is an anti dependence in the set = single_set (dest_node->insn);
opposite direction with distance 1; such register deps if (set)
will be removed by renaming if broken --- ignore them. */ {
if (!(t == OUTPUT_DEP && src_node == dest_node) int regno = REGNO (SET_DEST (set));
&& !(t == ANTI_DEP && dt == REG_DEP)) struct df_ref *first_def =
add_backarc_to_ddg (g, e); df_bb_regno_first_def_find (g->bb, regno);
else struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
free (e);
if (bitmap_bit_p (bb_info->gen, first_def->id))
return;
}
} }
else if (t == ANTI_DEP && dt == REG_DEP)
free (e); /* We can fix broken anti register deps using reg-moves. */ latency = dep_cost (link);
else e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e); add_edge_to_ddg (g, e);
} }
/* The same as the above function, but it doesn't require a link parameter. */ /* The same as the above function, but it doesn't require a link parameter. */
@ -247,6 +249,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
gcc_assert (last_def_node); gcc_assert (last_def_node);
gcc_assert (first_def); gcc_assert (first_def);
#ifdef ENABLE_CHECKING
if (last_def->id != first_def->id)
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
#endif
/* Create inter-loop true dependences and anti dependences. */ /* Create inter-loop true dependences and anti dependences. */
for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next) for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
{ {
@ -280,14 +287,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
gcc_assert (first_def_node); gcc_assert (first_def_node);
if (last_def->id != first_def->id) if (last_def->id != first_def->id
{ || !flag_modulo_sched_allow_regmoves)
#ifdef ENABLE_CHECKING create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id)); REG_DEP, 1);
#endif
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
}
} }
} }
/* Create an inter-loop output dependence between LAST_DEF (which is the /* Create an inter-loop output dependence between LAST_DEF (which is the
@ -392,7 +396,7 @@ build_intra_loop_deps (ddg_ptr g)
continue; continue;
add_forw_dep (link); add_forw_dep (link);
create_ddg_dependence (g, src_node, dest_node, dep); create_ddg_dep_from_intra_loop_link (g, src_node, dest_node, dep);
} }
/* If this insn modifies memory, add an edge to all insns that access /* If this insn modifies memory, add an edge to all insns that access

View File

@ -328,7 +328,7 @@ Objective-C and Objective-C++ Dialects}.
-finline-functions -finline-functions-called-once @gol -finline-functions -finline-functions-called-once @gol
-finline-limit=@var{n} -fkeep-inline-functions @gol -finline-limit=@var{n} -fkeep-inline-functions @gol
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol -fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
-fmodulo-sched -fno-branch-count-reg @gol -fmodulo-sched -fmodulo-sched-allow-regmoves -fno-branch-count-reg @gol
-fno-default-inline -fno-defer-pop -fmove-loop-invariants @gol -fno-default-inline -fno-defer-pop -fmove-loop-invariants @gol
-fno-function-cse -fno-guess-branch-probability @gol -fno-function-cse -fno-guess-branch-probability @gol
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol -fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
@ -5265,6 +5265,13 @@ Perform swing modulo scheduling immediately before the first scheduling
pass. This pass looks at innermost loops and reorders their pass. This pass looks at innermost loops and reorders their
instructions by overlapping different iterations. instructions by overlapping different iterations.
@item -fmodulo-sched-allow-regmoves
@opindex fmodulo-sched-allow-regmoves
Perform more aggressive SMS based modulo scheduling with register moves
allowed. By setting this flag certain anti-dependences edges will be
deleted which will trigger the generation of reg-moves based on the
life-range analysis.
@item -fno-branch-count-reg @item -fno-branch-count-reg
@opindex fno-branch-count-reg @opindex fno-branch-count-reg
Do not use ``decrement and branch'' instructions on a count register, Do not use ``decrement and branch'' instructions on a count register,

View File

@ -1,3 +1,8 @@
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
* gcc.dg/sms-antideps.c: New test.
2007-08-04 Paul Thomas <pault@gcc.gnu.org> 2007-08-04 Paul Thomas <pault@gcc.gnu.org>
PR fortran/31214 PR fortran/31214

View File

@ -0,0 +1,37 @@
/* This test is a reduced test case for a bug that caused
bootstrapping with -fmodulo-sched. Related to a broken anti-dep
that was not fixed by reg-moves. */
/* { dg-do run } */
/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves" } */
#include <stdlib.h>
unsigned long long
foo (long long ixi, unsigned ctr)
{
unsigned long long irslt = 1;
long long ix = ixi;
for (; ctr; ctr--)
{
irslt *= ix;
ix *= ix;
}
if (irslt != 14348907)
abort ();
return irslt;
}
int
main ()
{
unsigned long long res;
res = foo (3, 4);
}