common.opt (freschedule-modulo-scheduled-loops): New flag.

2004-08-08  Mostafa Hagog  <mustafa@il.ibm.com>
	Ayal Zaks <zaks@il.ibm.com>

	* common.opt (freschedule-modulo-scheduled-loops): New flag.
	* final.c (final_scan_insn): Handle NOTE_DISABLE_SCHED_OF_BLOCK.
	* modulo-sched.c (sms_schedule): Emit a note to disable scheduling
	when -freschedule-modulo-scheduled-loops flag is not specified.
	(sms_schedule_by_order, ps_insn_advance_column, add_node_to_ps,
	add_node_to_ps, ps_has_conflicts, ps_add_node_check_conflicts):
	More accurate placing of insn in row of partial schedule.
	(ps_insn_find_column): New function.
	* rtl.h (NOTE_DISABLE_SCHED_OF_BLOCK): New note.
	* sched-rgn.c (sched_is_disabled_for_current_region_p): New.
	(schedule_region): Use sched_is_disabled_for_current_region_p.
	* docs/invoke.texi: Document -freschedule-modulo-scheduled-loops.

Co-Authored-By: Ayal Zaks <zaks@il.ibm.com>

From-SVN: r85696
This commit is contained in:
Mostafa Hagog 2004-08-08 21:35:53 +00:00 committed by Mostafa Hagog
parent 94538bd12a
commit d72372e46a
7 changed files with 211 additions and 54 deletions

View File

@ -1,3 +1,19 @@
2004-08-08 Mostafa Hagog <mustafa@il.ibm.com>
Ayal Zaks <zaks@il.ibm.com>
* common.opt (freschedule-modulo-scheduled-loops): New flag.
* final.c (final_scan_insn): Handle NOTE_DISABLE_SCHED_OF_BLOCK.
* modulo-sched.c (sms_schedule): Emit a note to disable scheduling
when -freschedule-modulo-scheduled-loops flag is not specified.
(sms_schedule_by_order, ps_insn_advance_column, add_node_to_ps,
add_node_to_ps, ps_has_conflicts, ps_add_node_check_conflicts):
More accurate placing of insn in row of partial schedule.
(ps_insn_find_column): New function.
* rtl.h (NOTE_DISABLE_SCHED_OF_BLOCK): New note.
* sched-rgn.c (sched_is_disabled_for_current_region_p): New.
(schedule_region): Use sched_is_disabled_for_current_region_p.
* docs/invoke.texi: Document -freschedule-modulo-scheduled-loops.
2004-08-07 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_expand_clrmem): Revert the last

View File

@ -543,6 +543,10 @@ fnew-ra
Common Report Var(flag_new_regalloc)
Use graph-coloring register allocation
freschedule-modulo-scheduled-loops
Common Report Var(flag_resched_modulo_sched)
Enable/Disable the traditional scheduling in loops that already passed modulo scheduling
fnon-call-exceptions
Common Report Var(flag_non_call_exceptions)
Support synchronous non-call exceptions

View File

@ -308,8 +308,8 @@ in the following sections.
-fsched-spec-load-dangerous @gol
-fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol
-fsched2-use-superblocks @gol
-fsched2-use-traces -fsignaling-nans @gol
-fsingle-precision-constant @gol
-fsched2-use-traces -freschedule-modulo-scheduled-loops @gol
-fsignaling-nans -fsingle-precision-constant @gol
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
-funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
@ -4393,6 +4393,12 @@ reality and hurt the performance. This only makes
sense when scheduling after register allocation, i.e.@: with
@option{-fschedule-insns2} or at @option{-O2} or higher.
@item -freschedule-modulo-scheduled-loops
@opindex fscheduling-in-modulo-scheduled-loops
The modulo scheduling comes before the traditional scheduling, if a loop was modulo scheduled
we may want to prevent the later scheduling passes from changing its schedule, we use this
option to control that.
@item -fcaller-saves
@opindex fcaller-saves
Enable values to be allocated in registers that will be clobbered by

View File

@ -1704,6 +1704,7 @@ final_scan_insn (rtx insn, FILE *file, int optimize ATTRIBUTE_UNUSED,
case NOTE_INSN_FUNCTION_END:
case NOTE_INSN_REPEATED_LINE_NUMBER:
case NOTE_INSN_EXPECTED_VALUE:
case NOTE_DISABLE_SCHED_OF_BLOCK:
break;
case NOTE_INSN_UNLIKELY_EXECUTED_CODE:

View File

@ -152,7 +152,9 @@ void free_partial_schedule (partial_schedule_ptr);
void reset_partial_schedule (partial_schedule_ptr, int new_ii);
void print_partial_schedule (partial_schedule_ptr, FILE *);
ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
ddg_node_ptr node, int cycle);
ddg_node_ptr node, int cycle,
sbitmap must_precede,
sbitmap must_follow);
void rotate_partial_schedule (partial_schedule_ptr, int);
void set_row_column_for_ps (partial_schedule_ptr);
@ -874,7 +876,7 @@ sms_schedule (FILE *dump_file)
continue;
/* For debugging. */
if (passes++ > MAX_SMS_LOOP_NUMBER && MAX_SMS_LOOP_NUMBER != -1)
if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
{
if (dump_file)
fprintf (dump_file, "SMS reached MAX_PASSES... \n");
@ -1086,8 +1088,8 @@ sms_schedule (FILE *dump_file)
int i;
start_sequence ();
/* Copy the original loop code before modifying it - so we can use
it later. */
/* Copy the original loop code before modifying it -
so we can use it later. */
for (i = 0; i < ps->g->num_nodes; i++)
duplicate_insn_chain (ps->g->nodes[i].first_note,
ps->g->nodes[i].insn);
@ -1106,6 +1108,13 @@ sms_schedule (FILE *dump_file)
set_columns_for_ps (ps);
permute_partial_schedule (ps, g->closing_branch->first_note);
/* Mark this loop as software pipelined so the later
scheduling passes doesn't touch it. */
if (! flag_resched_modulo_sched)
emit_note_before (NOTE_DISABLE_SCHED_OF_BLOCK,
g->closing_branch->insn);
generate_reg_moves (ps);
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes);
@ -1217,6 +1226,9 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
sbitmap psp = sbitmap_alloc (num_nodes);
sbitmap pss = sbitmap_alloc (num_nodes);
sbitmap must_precede = sbitmap_alloc (num_nodes);
sbitmap must_follow = sbitmap_alloc (num_nodes);
partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
while (try_again_with_larger_ii && ii < maxii)
@ -1258,9 +1270,11 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du
ddg_node_ptr v_node = e->src;
if (TEST_BIT (sched_nodes, v_node->cuid))
{
early_start = MAX (early_start,
SCHED_TIME (v_node)
+ e->latency - (e->distance * ii));
int node_st = SCHED_TIME (v_node)
+ e->latency - (e->distance * ii);
early_start = MAX (early_start, node_st);
if (e->data_type == MEM_DEP)
end = MIN (end, SCHED_TIME (v_node) + ii - 1);
}
@ -1341,11 +1355,31 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du
fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
u, start, end, step);
/* use must_follow & must_precede bitmaps to determine order
of nodes within the cycle. */
sbitmap_zero (must_precede);
sbitmap_zero (must_follow);
for (e = u_node->in; e != 0; e = e->next_in)
if (TEST_BIT (sched_nodes, e->src->cuid)
&& e->latency == (ii * e->distance)
&& start == SCHED_TIME (e->src))
SET_BIT (must_precede, e->src->cuid);
for (e = u_node->out; e != 0; e = e->next_out)
if (TEST_BIT (sched_nodes, e->dest->cuid)
&& e->latency == (ii * e->distance)
&& end == SCHED_TIME (e->dest))
SET_BIT (must_follow, e->dest->cuid);
success = 0;
if ((step > 0 && start < end) || (step < 0 && start > end))
for (c = start; c != end; c += step)
{
ps_insn_ptr psi = ps_add_node_check_conflicts (ps, u_node, c);
ps_insn_ptr psi;
psi = ps_add_node_check_conflicts (ps, u_node, c,
must_precede,
must_follow);
if (psi)
{
@ -1884,13 +1918,80 @@ remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
return true;
}
/* Unlike what literature describes for modulo scheduling (which focuses
on VLIW machines) the order of the instructions inside a cycle is
important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
where the current instruction should go relative to the already
scheduled instructions in the given cycle. Go over these
instructions and find the first possible column to put it in. */
static bool
ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
sbitmap must_precede, sbitmap must_follow)
{
ps_insn_ptr next_ps_i;
ps_insn_ptr first_must_follow = NULL;
ps_insn_ptr last_must_precede = NULL;
int row;
if (! ps_i)
return false;
row = SMODULO (ps_i->cycle, ps->ii);
/* Find the first must follow and the last must precede
and insert the node immediatly after the must precede
but make sure that it there is no must follow after it. */
for (next_ps_i = ps->rows[row];
next_ps_i;
next_ps_i = next_ps_i->next_in_row)
{
if (TEST_BIT (must_follow, next_ps_i->node->cuid)
&& ! first_must_follow)
first_must_follow = next_ps_i;
if (TEST_BIT (must_precede, next_ps_i->node->cuid))
{
/* If we have already met a node that must follow, then
there is no possible column. */
if (first_must_follow)
return false;
else
last_must_precede = next_ps_i;
}
}
/* Now insert the node after INSERT_AFTER_PSI. */
if (! last_must_precede)
{
ps_i->next_in_row = ps->rows[row];
ps_i->prev_in_row = NULL;
if (ps_i->next_in_row)
ps_i->next_in_row->prev_in_row = ps_i;
ps->rows[row] = ps_i;
}
else
{
ps_i->next_in_row = last_must_precede->next_in_row;
last_must_precede->next_in_row = ps_i;
ps_i->prev_in_row = last_must_precede;
if (ps_i->next_in_row)
ps_i->next_in_row->prev_in_row = ps_i;
}
return true;
}
/* Advances the PS_INSN one column in its current row; returns false
in failure and true in success. */
in failure and true in success. Bit N is set in MUST_FOLLOW if
the node with cuid N must be come after the node pointed to by
PS_I when scheduled in the same cycle. */
static int
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
sbitmap must_follow)
{
ps_insn_ptr prev, next;
int row;
ddg_node_ptr next_node;
if (!ps || !ps_i)
return false;
@ -1900,17 +2001,12 @@ ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
if (! ps_i->next_in_row)
return false;
next_node = ps_i->next_in_row->node;
/* Check if next_in_row is dependent on ps_i, both having same sched
times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
if (ps_i->cycle == ps_i->next_in_row->cycle)
{
ddg_edge_ptr e;
ddg_node_ptr next_node = ps_i->next_in_row->node;
for (e = ps_i->node->out; e; e = e->next_out)
if (e->dest == next_node)
return false;
}
if (TEST_BIT (must_follow, next_node->cuid))
return false;
/* Advace PS_I over its next_in_row in the doubly linked list. */
prev = ps_i->prev_in_row;
@ -1935,14 +2031,17 @@ ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
}
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
Returns 0 if this is not possible and a PS_INSN otherwise. */
Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
before/after (respectively) the node pointed to by PS_I when scheduled
in the same cycle. */
static ps_insn_ptr
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle)
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
sbitmap must_precede, sbitmap must_follow)
{
ps_insn_ptr ps_i, next_ps_i, advance_after;
ps_insn_ptr ps_i;
int rest_count = 1;
int row = SMODULO (cycle, ps->ii);
ddg_edge_ptr e;
if (ps->rows[row]
&& ps->rows[row]->row_rest_count >= issue_rate)
@ -1952,30 +2051,14 @@ add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle)
rest_count += ps->rows[row]->row_rest_count;
ps_i = create_ps_insn (node, rest_count, cycle);
ps_i->next_in_row = ps->rows[row];
ps_i->prev_in_row = NULL;
if (ps_i->next_in_row)
ps_i->next_in_row->prev_in_row = ps_i;
ps->rows[row] = ps_i;
/* Check if n is dependent on an insn already in row, having same cycle
(typically ANTI_DEP). If so, n must skip over it. */
advance_after = NULL;
for (next_ps_i = ps_i->next_in_row;
next_ps_i;
next_ps_i = next_ps_i->next_in_row)
if (next_ps_i->cycle == cycle)
for (e = node->in; e; e = e->next_in)
if (e->src == next_ps_i->node)
advance_after = next_ps_i;
if (advance_after)
while (ps_i->prev_in_row != advance_after)
if (!ps_insn_advance_column (ps, ps_i))
{
remove_node_from_ps (ps, ps_i);
return NULL;
}
/* Finds and inserts PS_I according to MUST_FOLLOW and
MUST_PRECEDE. */
if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
{
free (ps_i);
return NULL;
}
return ps_i;
}
@ -2049,16 +2132,20 @@ ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
/* Checks if the given node causes resource conflicts when added to PS at
cycle C. If not the node is added to PS and returned; otherwise zero
is returned. */
is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
cuid N must be come before/after (respectively) the node pointed to by
PS_I when scheduled in the same cycle. */
ps_insn_ptr
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c)
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
int c, sbitmap must_precede,
sbitmap must_follow)
{
int has_conflicts = 0;
ps_insn_ptr ps_i;
/* First add the node to the PS, if this succeeds check for conflicts,
trying different issue slots in the same row. */
if (! (ps_i = add_node_to_ps (ps, n, c)))
/* First add the node to the PS, if this succeeds check for
conflicts, trying different issue slots in the same row. */
if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
return NULL; /* Failed to insert the node at the given cycle. */
has_conflicts = ps_has_conflicts (ps, c, c)
@ -2071,7 +2158,7 @@ ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c)
scheduled in without conflicts. */
while (has_conflicts)
{
if (! ps_insn_advance_column (ps, ps_i))
if (! ps_insn_advance_column (ps, ps_i, must_follow))
break;
has_conflicts = ps_has_conflicts (ps, c, c)
|| (ps->history > 0

View File

@ -983,6 +983,12 @@ enum insn_note
/* Generated at the start of a duplicated exit test. */
NOTE_INSN_LOOP_VTOP,
/* Mark that a block shouldn't be scheduled. This is currently
used in modulo scheduling. Modulo scheduling adds this note
to the blocks of the modulo-scheduled loops to disable scheduling
them in the later traditional scheduling passes. */
NOTE_DISABLE_SCHED_OF_BLOCK,
/* This kind of note is generated at the end of the function body,
just before the return insn or return label. In an optimizing
compilation it is deleted by the first jump optimization, after

View File

@ -117,6 +117,7 @@ static int *out_edges;
static int is_cfg_nonregular (void);
static int build_control_flow (struct edge_list *);
static void new_edge (int, int);
static bool sched_is_disabled_for_current_region_p (void);
/* A region is the main entity for interblock scheduling: insns
are allowed to move between blocks in the same region, along
@ -2332,6 +2333,37 @@ debug_dependencies (void)
fprintf (sched_dump, "\n");
}
/* Returns true if all the basic blocks of the current region have
NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
static bool
sched_is_disabled_for_current_region_p (void)
{
rtx first_bb_insn, last_bb_insn, insn;
int bb;
for (bb = 0; bb < current_nr_blocks; bb++)
{
bool disable_sched = false;
/* Searching for NOTE_DISABLE_SCHED_OF_BLOCK note between the
start and end of the basic block. */
get_block_head_tail (BB_TO_BLOCK (bb), &first_bb_insn,
&last_bb_insn);
for (insn = last_bb_insn; insn != NULL && insn != first_bb_insn;
insn = PREV_INSN (insn))
if (GET_CODE (insn) == NOTE
&& (NOTE_LINE_NUMBER (insn)
== NOTE_DISABLE_SCHED_OF_BLOCK))
{
disable_sched = true;
break;
}
if (! disable_sched)
return false;
}
return true;
}
/* Schedule a region. A region is either an inner loop, a loop-free
subroutine, or a single basic block. Each bb in the region is
scheduled after its flow predecessors. */
@ -2347,6 +2379,11 @@ schedule_region (int rgn)
current_nr_blocks = RGN_NR_BLOCKS (rgn);
current_blocks = RGN_BLOCKS (rgn);
/* Don't schedule region that is marked by
NOTE_DISABLE_SCHED_OF_BLOCK. */
if (sched_is_disabled_for_current_region_p ())
return;
init_deps_global ();
/* Initializations for region data dependence analysis. */