New flag to control reg-moves generation

Co-Authored-By: Revital Eres <eres@il.ibm.com>

From-SVN: r127223
This commit is contained in:
Vladimir Yanovsky 2007-08-05 09:43:07 +00:00 committed by Revital Eres
parent 152b97088e
commit 517d76faef
6 changed files with 110 additions and 38 deletions

View File

@ -1,3 +1,18 @@
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
* doc/invoke.texi (-fmodulo-sched-allow-regmoves): Document new
flag.
* ddg.c (create_ddg_dependence): Rename to...
(create_ddg_dep_from_intra_loop_link): This. Do not check
for interloop edges. Do not create anti dependence edge when
a true dependence edge exists in the opposite direction and
-fmodulo-sched-allow-regmoves is set.
(build_intra_loop_deps): Call create_ddg_dep_from_intra_loop_link.
(add_cross_iteration_register_deps): Create anti dependence edge
when -fno-modulo-sched-allow-regmoves is set.
* common.opt (-fmodulo-sched-allow-regmoves): New flag.
2007-08-04 Richard Sandiford <richard@codesourcery.com>
* config/arm/arm.md (movsi): Add braces.

View File

@ -651,6 +651,10 @@ fmodulo-sched
Common Report Var(flag_modulo_sched) Optimization
Perform SMS based modulo scheduling before the first scheduling pass
fmodulo-sched-allow-regmoves
Common Report Var(flag_modulo_sched_allow_regmoves)
Perform SMS based modulo scheduling with register moves allowed
fmove-loop-invariants
Common Report Var(flag_move_loop_invariants) Init(1) Optimization
Move loop invariant computations out of loops

View File

@ -51,7 +51,8 @@ enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
static void add_scc_to_ddg (ddg_all_sccs_ptr, ddg_scc_ptr);
static void create_ddg_dependence (ddg_ptr, ddg_node_ptr, ddg_node_ptr, dep_t);
static void create_ddg_dep_from_intra_loop_link (ddg_ptr, ddg_node_ptr,
ddg_node_ptr, dep_t);
static void create_ddg_dep_no_link (ddg_ptr, ddg_node_ptr, ddg_node_ptr,
dep_type, dep_data_type, int);
static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type,
@ -145,22 +146,16 @@ mem_access_insn_p (rtx insn)
/* Computes the dependence parameters (latency, distance etc.), creates
a ddg_edge and adds it to the given DDG. */
static void
create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
ddg_node_ptr dest_node, dep_t link)
create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
ddg_node_ptr dest_node, dep_t link)
{
ddg_edge_ptr e;
int latency, distance = 0;
int interloop = (src_node->cuid >= dest_node->cuid);
dep_type t = TRUE_DEP;
dep_data_type dt = (mem_access_insn_p (src_node->insn)
&& mem_access_insn_p (dest_node->insn) ? MEM_DEP
: REG_DEP);
/* For now we don't have an exact calculation of the distance,
so assume 1 conservatively. */
if (interloop)
distance = 1;
gcc_assert (src_node->cuid < dest_node->cuid);
gcc_assert (link);
/* Note: REG_DEP_ANTI applies to MEM ANTI_DEP as well!! */
@ -168,27 +163,34 @@ create_ddg_dependence (ddg_ptr g, ddg_node_ptr src_node,
t = ANTI_DEP;
else if (DEP_KIND (link) == REG_DEP_OUTPUT)
t = OUTPUT_DEP;
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
if (interloop)
/* We currently choose not to create certain anti-deps edges and
compensate for that by generating reg-moves based on the life-range
analysis. The anti-deps that will be deleted are the ones which
have true-deps edges in the opposite direction (in other words
the kernel has only one def of the relevant register). TODO:
support the removal of all anti-deps edges, i.e. including those
whose register has multiple defs in the loop. */
if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
{
/* Some interloop dependencies are relaxed:
1. Every insn is output dependent on itself; ignore such deps.
2. Every true/flow dependence is an anti dependence in the
opposite direction with distance 1; such register deps
will be removed by renaming if broken --- ignore them. */
if (!(t == OUTPUT_DEP && src_node == dest_node)
&& !(t == ANTI_DEP && dt == REG_DEP))
add_backarc_to_ddg (g, e);
else
free (e);
rtx set;
set = single_set (dest_node->insn);
if (set)
{
int regno = REGNO (SET_DEST (set));
struct df_ref *first_def =
df_bb_regno_first_def_find (g->bb, regno);
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
if (bitmap_bit_p (bb_info->gen, first_def->id))
return;
}
}
else if (t == ANTI_DEP && dt == REG_DEP)
free (e); /* We can fix broken anti register deps using reg-moves. */
else
add_edge_to_ddg (g, e);
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e);
}
/* The same as the above function, but it doesn't require a link parameter. */
@ -247,6 +249,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
gcc_assert (last_def_node);
gcc_assert (first_def);
#ifdef ENABLE_CHECKING
if (last_def->id != first_def->id)
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
#endif
/* Create inter-loop true dependences and anti dependences. */
for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
{
@ -280,14 +287,11 @@ add_cross_iteration_register_deps (ddg_ptr g, struct df_ref *last_def)
gcc_assert (first_def_node);
if (last_def->id != first_def->id)
{
#ifdef ENABLE_CHECKING
gcc_assert (!bitmap_bit_p (bb_info->gen, first_def->id));
#endif
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
}
if (last_def->id != first_def->id
|| !flag_modulo_sched_allow_regmoves)
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
}
}
/* Create an inter-loop output dependence between LAST_DEF (which is the
@ -392,7 +396,7 @@ build_intra_loop_deps (ddg_ptr g)
continue;
add_forw_dep (link);
create_ddg_dependence (g, src_node, dest_node, dep);
create_ddg_dep_from_intra_loop_link (g, src_node, dest_node, dep);
}
/* If this insn modifies memory, add an edge to all insns that access

View File

@ -328,7 +328,7 @@ Objective-C and Objective-C++ Dialects}.
-finline-functions -finline-functions-called-once @gol
-finline-limit=@var{n} -fkeep-inline-functions @gol
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
-fmodulo-sched -fno-branch-count-reg @gol
-fmodulo-sched -fmodulo-sched-allow-regmoves -fno-branch-count-reg @gol
-fno-default-inline -fno-defer-pop -fmove-loop-invariants @gol
-fno-function-cse -fno-guess-branch-probability @gol
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
@ -5265,6 +5265,13 @@ Perform swing modulo scheduling immediately before the first scheduling
pass. This pass looks at innermost loops and reorders their
instructions by overlapping different iterations.
@item -fmodulo-sched-allow-regmoves
@opindex fmodulo-sched-allow-regmoves
Perform more aggressive SMS based modulo scheduling with register moves
allowed. By setting this flag certain anti-dependences edges will be
deleted which will trigger the generation of reg-moves based on the
life-range analysis.
@item -fno-branch-count-reg
@opindex fno-branch-count-reg
Do not use ``decrement and branch'' instructions on a count register,

View File

@ -1,3 +1,8 @@
2007-08-05 Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
* gcc.dg/sms-antideps.c: New test.
2007-08-04 Paul Thomas <pault@gcc.gnu.org>
PR fortran/31214

View File

@ -0,0 +1,37 @@
/* This test is a reduced test case for a bug that caused
bootstrapping with -fmodulo-sched. Related to a broken anti-dep
that was not fixed by reg-moves. */
/* { dg-do run } */
/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves" } */
#include <stdlib.h>
unsigned long long
foo (long long ixi, unsigned ctr)
{
unsigned long long irslt = 1;
long long ix = ixi;
for (; ctr; ctr--)
{
irslt *= ix;
ix *= ix;
}
if (irslt != 14348907)
abort ();
return irslt;
}
int
main ()
{
unsigned long long res;
res = foo (3, 4);
}