common.opt (freschedule-modulo-scheduled-loops): New flag.
2004-08-08 Mostafa Hagog <mustafa@il.ibm.com> Ayal Zaks <zaks@il.ibm.com> * common.opt (freschedule-modulo-scheduled-loops): New flag. * final.c (final_scan_insn): Handle NOTE_DISABLE_SCHED_OF_BLOCK. * modulo-sched.c (sms_schedule): Emit a note to disable scheduling when -freschedule-modulo-scheduled-loops flag is not specified. (sms_schedule_by_order, ps_insn_advance_column, add_node_to_ps, add_node_to_ps, ps_has_conflicts, ps_add_node_check_conflicts): More accurate placing of insn in row of partial schedule. (ps_insn_find_column): New function. * rtl.h (NOTE_DISABLE_SCHED_OF_BLOCK): New note. * sched-rgn.c (sched_is_disabled_for_current_region_p): New. (schedule_region): Use sched_is_disabled_for_current_region_p. * docs/invoke.texi: Document -freschedule-modulo-scheduled-loops. Co-Authored-By: Ayal Zaks <zaks@il.ibm.com> From-SVN: r85696
This commit is contained in:
parent
94538bd12a
commit
d72372e46a
|
@ -1,3 +1,19 @@
|
|||
2004-08-08 Mostafa Hagog <mustafa@il.ibm.com>
|
||||
Ayal Zaks <zaks@il.ibm.com>
|
||||
|
||||
* common.opt (freschedule-modulo-scheduled-loops): New flag.
|
||||
* final.c (final_scan_insn): Handle NOTE_DISABLE_SCHED_OF_BLOCK.
|
||||
* modulo-sched.c (sms_schedule): Emit a note to disable scheduling
|
||||
when -freschedule-modulo-scheduled-loops flag is not specified.
|
||||
(sms_schedule_by_order, ps_insn_advance_column, add_node_to_ps,
|
||||
add_node_to_ps, ps_has_conflicts, ps_add_node_check_conflicts):
|
||||
More accurate placing of insn in row of partial schedule.
|
||||
(ps_insn_find_column): New function.
|
||||
* rtl.h (NOTE_DISABLE_SCHED_OF_BLOCK): New note.
|
||||
* sched-rgn.c (sched_is_disabled_for_current_region_p): New.
|
||||
(schedule_region): Use sched_is_disabled_for_current_region_p.
|
||||
* docs/invoke.texi: Document -freschedule-modulo-scheduled-loops.
|
||||
|
||||
2004-08-07 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_clrmem): Revert the last
|
||||
|
|
|
@ -543,6 +543,10 @@ fnew-ra
|
|||
Common Report Var(flag_new_regalloc)
|
||||
Use graph-coloring register allocation
|
||||
|
||||
freschedule-modulo-scheduled-loops
|
||||
Common Report Var(flag_resched_modulo_sched)
|
||||
Enable/Disable the traditional scheduling in loops that already passed modulo scheduling
|
||||
|
||||
fnon-call-exceptions
|
||||
Common Report Var(flag_non_call_exceptions)
|
||||
Support synchronous non-call exceptions
|
||||
|
|
|
@ -308,8 +308,8 @@ in the following sections.
|
|||
-fsched-spec-load-dangerous @gol
|
||||
-fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol
|
||||
-fsched2-use-superblocks @gol
|
||||
-fsched2-use-traces -fsignaling-nans @gol
|
||||
-fsingle-precision-constant @gol
|
||||
-fsched2-use-traces -freschedule-modulo-scheduled-loops @gol
|
||||
-fsignaling-nans -fsingle-precision-constant @gol
|
||||
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
|
||||
-funroll-all-loops -funroll-loops -fpeel-loops @gol
|
||||
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
|
||||
|
@ -4393,6 +4393,12 @@ reality and hurt the performance. This only makes
|
|||
sense when scheduling after register allocation, i.e.@: with
|
||||
@option{-fschedule-insns2} or at @option{-O2} or higher.
|
||||
|
||||
@item -freschedule-modulo-scheduled-loops
|
||||
@opindex fscheduling-in-modulo-scheduled-loops
|
||||
The modulo scheduling comes before the traditional scheduling, if a loop was modulo scheduled
|
||||
we may want to prevent the later scheduling passes from changing its schedule, we use this
|
||||
option to control that.
|
||||
|
||||
@item -fcaller-saves
|
||||
@opindex fcaller-saves
|
||||
Enable values to be allocated in registers that will be clobbered by
|
||||
|
|
|
@ -1704,6 +1704,7 @@ final_scan_insn (rtx insn, FILE *file, int optimize ATTRIBUTE_UNUSED,
|
|||
case NOTE_INSN_FUNCTION_END:
|
||||
case NOTE_INSN_REPEATED_LINE_NUMBER:
|
||||
case NOTE_INSN_EXPECTED_VALUE:
|
||||
case NOTE_DISABLE_SCHED_OF_BLOCK:
|
||||
break;
|
||||
|
||||
case NOTE_INSN_UNLIKELY_EXECUTED_CODE:
|
||||
|
|
|
@ -152,7 +152,9 @@ void free_partial_schedule (partial_schedule_ptr);
|
|||
void reset_partial_schedule (partial_schedule_ptr, int new_ii);
|
||||
void print_partial_schedule (partial_schedule_ptr, FILE *);
|
||||
ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
|
||||
ddg_node_ptr node, int cycle);
|
||||
ddg_node_ptr node, int cycle,
|
||||
sbitmap must_precede,
|
||||
sbitmap must_follow);
|
||||
void rotate_partial_schedule (partial_schedule_ptr, int);
|
||||
void set_row_column_for_ps (partial_schedule_ptr);
|
||||
|
||||
|
@ -874,7 +876,7 @@ sms_schedule (FILE *dump_file)
|
|||
continue;
|
||||
|
||||
/* For debugging. */
|
||||
if (passes++ > MAX_SMS_LOOP_NUMBER && MAX_SMS_LOOP_NUMBER != -1)
|
||||
if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "SMS reached MAX_PASSES... \n");
|
||||
|
@ -1086,8 +1088,8 @@ sms_schedule (FILE *dump_file)
|
|||
int i;
|
||||
|
||||
start_sequence ();
|
||||
/* Copy the original loop code before modifying it - so we can use
|
||||
it later. */
|
||||
/* Copy the original loop code before modifying it -
|
||||
so we can use it later. */
|
||||
for (i = 0; i < ps->g->num_nodes; i++)
|
||||
duplicate_insn_chain (ps->g->nodes[i].first_note,
|
||||
ps->g->nodes[i].insn);
|
||||
|
@ -1106,6 +1108,13 @@ sms_schedule (FILE *dump_file)
|
|||
set_columns_for_ps (ps);
|
||||
|
||||
permute_partial_schedule (ps, g->closing_branch->first_note);
|
||||
|
||||
/* Mark this loop as software pipelined so the later
|
||||
scheduling passes doesn't touch it. */
|
||||
if (! flag_resched_modulo_sched)
|
||||
emit_note_before (NOTE_DISABLE_SCHED_OF_BLOCK,
|
||||
g->closing_branch->insn);
|
||||
|
||||
generate_reg_moves (ps);
|
||||
if (dump_file)
|
||||
print_node_sched_params (dump_file, g->num_nodes);
|
||||
|
@ -1217,6 +1226,9 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du
|
|||
sbitmap sched_nodes = sbitmap_alloc (num_nodes);
|
||||
sbitmap psp = sbitmap_alloc (num_nodes);
|
||||
sbitmap pss = sbitmap_alloc (num_nodes);
|
||||
sbitmap must_precede = sbitmap_alloc (num_nodes);
|
||||
sbitmap must_follow = sbitmap_alloc (num_nodes);
|
||||
|
||||
partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
|
||||
|
||||
while (try_again_with_larger_ii && ii < maxii)
|
||||
|
@ -1258,9 +1270,11 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du
|
|||
ddg_node_ptr v_node = e->src;
|
||||
if (TEST_BIT (sched_nodes, v_node->cuid))
|
||||
{
|
||||
early_start = MAX (early_start,
|
||||
SCHED_TIME (v_node)
|
||||
+ e->latency - (e->distance * ii));
|
||||
int node_st = SCHED_TIME (v_node)
|
||||
+ e->latency - (e->distance * ii);
|
||||
|
||||
early_start = MAX (early_start, node_st);
|
||||
|
||||
if (e->data_type == MEM_DEP)
|
||||
end = MIN (end, SCHED_TIME (v_node) + ii - 1);
|
||||
}
|
||||
|
@ -1341,11 +1355,31 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du
|
|||
fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
|
||||
u, start, end, step);
|
||||
|
||||
/* use must_follow & must_precede bitmaps to determine order
|
||||
of nodes within the cycle. */
|
||||
sbitmap_zero (must_precede);
|
||||
sbitmap_zero (must_follow);
|
||||
for (e = u_node->in; e != 0; e = e->next_in)
|
||||
if (TEST_BIT (sched_nodes, e->src->cuid)
|
||||
&& e->latency == (ii * e->distance)
|
||||
&& start == SCHED_TIME (e->src))
|
||||
SET_BIT (must_precede, e->src->cuid);
|
||||
|
||||
for (e = u_node->out; e != 0; e = e->next_out)
|
||||
if (TEST_BIT (sched_nodes, e->dest->cuid)
|
||||
&& e->latency == (ii * e->distance)
|
||||
&& end == SCHED_TIME (e->dest))
|
||||
SET_BIT (must_follow, e->dest->cuid);
|
||||
|
||||
success = 0;
|
||||
if ((step > 0 && start < end) || (step < 0 && start > end))
|
||||
for (c = start; c != end; c += step)
|
||||
{
|
||||
ps_insn_ptr psi = ps_add_node_check_conflicts (ps, u_node, c);
|
||||
ps_insn_ptr psi;
|
||||
|
||||
psi = ps_add_node_check_conflicts (ps, u_node, c,
|
||||
must_precede,
|
||||
must_follow);
|
||||
|
||||
if (psi)
|
||||
{
|
||||
|
@ -1884,13 +1918,80 @@ remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Unlike what literature describes for modulo scheduling (which focuses
|
||||
on VLIW machines) the order of the instructions inside a cycle is
|
||||
important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
|
||||
where the current instruction should go relative to the already
|
||||
scheduled instructions in the given cycle. Go over these
|
||||
instructions and find the first possible column to put it in. */
|
||||
static bool
|
||||
ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
|
||||
sbitmap must_precede, sbitmap must_follow)
|
||||
{
|
||||
ps_insn_ptr next_ps_i;
|
||||
ps_insn_ptr first_must_follow = NULL;
|
||||
ps_insn_ptr last_must_precede = NULL;
|
||||
int row;
|
||||
|
||||
if (! ps_i)
|
||||
return false;
|
||||
|
||||
row = SMODULO (ps_i->cycle, ps->ii);
|
||||
|
||||
/* Find the first must follow and the last must precede
|
||||
and insert the node immediatly after the must precede
|
||||
but make sure that it there is no must follow after it. */
|
||||
for (next_ps_i = ps->rows[row];
|
||||
next_ps_i;
|
||||
next_ps_i = next_ps_i->next_in_row)
|
||||
{
|
||||
if (TEST_BIT (must_follow, next_ps_i->node->cuid)
|
||||
&& ! first_must_follow)
|
||||
first_must_follow = next_ps_i;
|
||||
if (TEST_BIT (must_precede, next_ps_i->node->cuid))
|
||||
{
|
||||
/* If we have already met a node that must follow, then
|
||||
there is no possible column. */
|
||||
if (first_must_follow)
|
||||
return false;
|
||||
else
|
||||
last_must_precede = next_ps_i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now insert the node after INSERT_AFTER_PSI. */
|
||||
|
||||
if (! last_must_precede)
|
||||
{
|
||||
ps_i->next_in_row = ps->rows[row];
|
||||
ps_i->prev_in_row = NULL;
|
||||
if (ps_i->next_in_row)
|
||||
ps_i->next_in_row->prev_in_row = ps_i;
|
||||
ps->rows[row] = ps_i;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_i->next_in_row = last_must_precede->next_in_row;
|
||||
last_must_precede->next_in_row = ps_i;
|
||||
ps_i->prev_in_row = last_must_precede;
|
||||
if (ps_i->next_in_row)
|
||||
ps_i->next_in_row->prev_in_row = ps_i;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Advances the PS_INSN one column in its current row; returns false
|
||||
in failure and true in success. */
|
||||
in failure and true in success. Bit N is set in MUST_FOLLOW if
|
||||
the node with cuid N must be come after the node pointed to by
|
||||
PS_I when scheduled in the same cycle. */
|
||||
static int
|
||||
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
|
||||
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
|
||||
sbitmap must_follow)
|
||||
{
|
||||
ps_insn_ptr prev, next;
|
||||
int row;
|
||||
ddg_node_ptr next_node;
|
||||
|
||||
if (!ps || !ps_i)
|
||||
return false;
|
||||
|
@ -1900,17 +2001,12 @@ ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
|
|||
if (! ps_i->next_in_row)
|
||||
return false;
|
||||
|
||||
next_node = ps_i->next_in_row->node;
|
||||
|
||||
/* Check if next_in_row is dependent on ps_i, both having same sched
|
||||
times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
|
||||
if (ps_i->cycle == ps_i->next_in_row->cycle)
|
||||
{
|
||||
ddg_edge_ptr e;
|
||||
ddg_node_ptr next_node = ps_i->next_in_row->node;
|
||||
|
||||
for (e = ps_i->node->out; e; e = e->next_out)
|
||||
if (e->dest == next_node)
|
||||
return false;
|
||||
}
|
||||
if (TEST_BIT (must_follow, next_node->cuid))
|
||||
return false;
|
||||
|
||||
/* Advace PS_I over its next_in_row in the doubly linked list. */
|
||||
prev = ps_i->prev_in_row;
|
||||
|
@ -1935,14 +2031,17 @@ ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i)
|
|||
}
|
||||
|
||||
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
|
||||
Returns 0 if this is not possible and a PS_INSN otherwise. */
|
||||
Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
|
||||
set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
|
||||
before/after (respectively) the node pointed to by PS_I when scheduled
|
||||
in the same cycle. */
|
||||
static ps_insn_ptr
|
||||
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle)
|
||||
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
|
||||
sbitmap must_precede, sbitmap must_follow)
|
||||
{
|
||||
ps_insn_ptr ps_i, next_ps_i, advance_after;
|
||||
ps_insn_ptr ps_i;
|
||||
int rest_count = 1;
|
||||
int row = SMODULO (cycle, ps->ii);
|
||||
ddg_edge_ptr e;
|
||||
|
||||
if (ps->rows[row]
|
||||
&& ps->rows[row]->row_rest_count >= issue_rate)
|
||||
|
@ -1952,30 +2051,14 @@ add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle)
|
|||
rest_count += ps->rows[row]->row_rest_count;
|
||||
|
||||
ps_i = create_ps_insn (node, rest_count, cycle);
|
||||
ps_i->next_in_row = ps->rows[row];
|
||||
ps_i->prev_in_row = NULL;
|
||||
if (ps_i->next_in_row)
|
||||
ps_i->next_in_row->prev_in_row = ps_i;
|
||||
ps->rows[row] = ps_i;
|
||||
|
||||
/* Check if n is dependent on an insn already in row, having same cycle
|
||||
(typically ANTI_DEP). If so, n must skip over it. */
|
||||
advance_after = NULL;
|
||||
for (next_ps_i = ps_i->next_in_row;
|
||||
next_ps_i;
|
||||
next_ps_i = next_ps_i->next_in_row)
|
||||
if (next_ps_i->cycle == cycle)
|
||||
for (e = node->in; e; e = e->next_in)
|
||||
if (e->src == next_ps_i->node)
|
||||
advance_after = next_ps_i;
|
||||
|
||||
if (advance_after)
|
||||
while (ps_i->prev_in_row != advance_after)
|
||||
if (!ps_insn_advance_column (ps, ps_i))
|
||||
{
|
||||
remove_node_from_ps (ps, ps_i);
|
||||
return NULL;
|
||||
}
|
||||
/* Finds and inserts PS_I according to MUST_FOLLOW and
|
||||
MUST_PRECEDE. */
|
||||
if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
|
||||
{
|
||||
free (ps_i);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ps_i;
|
||||
}
|
||||
|
@ -2049,16 +2132,20 @@ ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
|
|||
|
||||
/* Checks if the given node causes resource conflicts when added to PS at
|
||||
cycle C. If not the node is added to PS and returned; otherwise zero
|
||||
is returned. */
|
||||
is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
|
||||
cuid N must be come before/after (respectively) the node pointed to by
|
||||
PS_I when scheduled in the same cycle. */
|
||||
ps_insn_ptr
|
||||
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c)
|
||||
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
|
||||
int c, sbitmap must_precede,
|
||||
sbitmap must_follow)
|
||||
{
|
||||
int has_conflicts = 0;
|
||||
ps_insn_ptr ps_i;
|
||||
|
||||
/* First add the node to the PS, if this succeeds check for conflicts,
|
||||
trying different issue slots in the same row. */
|
||||
if (! (ps_i = add_node_to_ps (ps, n, c)))
|
||||
/* First add the node to the PS, if this succeeds check for
|
||||
conflicts, trying different issue slots in the same row. */
|
||||
if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
|
||||
return NULL; /* Failed to insert the node at the given cycle. */
|
||||
|
||||
has_conflicts = ps_has_conflicts (ps, c, c)
|
||||
|
@ -2071,7 +2158,7 @@ ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c)
|
|||
scheduled in without conflicts. */
|
||||
while (has_conflicts)
|
||||
{
|
||||
if (! ps_insn_advance_column (ps, ps_i))
|
||||
if (! ps_insn_advance_column (ps, ps_i, must_follow))
|
||||
break;
|
||||
has_conflicts = ps_has_conflicts (ps, c, c)
|
||||
|| (ps->history > 0
|
||||
|
|
|
@ -983,6 +983,12 @@ enum insn_note
|
|||
/* Generated at the start of a duplicated exit test. */
|
||||
NOTE_INSN_LOOP_VTOP,
|
||||
|
||||
/* Mark that a block shouldn't be scheduled. This is currently
|
||||
used in modulo scheduling. Modulo scheduling adds this note
|
||||
to the blocks of the modulo-scheduled loops to disable scheduling
|
||||
them in the later traditional scheduling passes. */
|
||||
NOTE_DISABLE_SCHED_OF_BLOCK,
|
||||
|
||||
/* This kind of note is generated at the end of the function body,
|
||||
just before the return insn or return label. In an optimizing
|
||||
compilation it is deleted by the first jump optimization, after
|
||||
|
|
|
@ -117,6 +117,7 @@ static int *out_edges;
|
|||
static int is_cfg_nonregular (void);
|
||||
static int build_control_flow (struct edge_list *);
|
||||
static void new_edge (int, int);
|
||||
static bool sched_is_disabled_for_current_region_p (void);
|
||||
|
||||
/* A region is the main entity for interblock scheduling: insns
|
||||
are allowed to move between blocks in the same region, along
|
||||
|
@ -2332,6 +2333,37 @@ debug_dependencies (void)
|
|||
fprintf (sched_dump, "\n");
|
||||
}
|
||||
|
||||
/* Returns true if all the basic blocks of the current region have
|
||||
NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
|
||||
static bool
|
||||
sched_is_disabled_for_current_region_p (void)
|
||||
{
|
||||
rtx first_bb_insn, last_bb_insn, insn;
|
||||
int bb;
|
||||
|
||||
for (bb = 0; bb < current_nr_blocks; bb++)
|
||||
{
|
||||
bool disable_sched = false;
|
||||
/* Searching for NOTE_DISABLE_SCHED_OF_BLOCK note between the
|
||||
start and end of the basic block. */
|
||||
get_block_head_tail (BB_TO_BLOCK (bb), &first_bb_insn,
|
||||
&last_bb_insn);
|
||||
for (insn = last_bb_insn; insn != NULL && insn != first_bb_insn;
|
||||
insn = PREV_INSN (insn))
|
||||
if (GET_CODE (insn) == NOTE
|
||||
&& (NOTE_LINE_NUMBER (insn)
|
||||
== NOTE_DISABLE_SCHED_OF_BLOCK))
|
||||
{
|
||||
disable_sched = true;
|
||||
break;
|
||||
}
|
||||
if (! disable_sched)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Schedule a region. A region is either an inner loop, a loop-free
|
||||
subroutine, or a single basic block. Each bb in the region is
|
||||
scheduled after its flow predecessors. */
|
||||
|
@ -2347,6 +2379,11 @@ schedule_region (int rgn)
|
|||
current_nr_blocks = RGN_NR_BLOCKS (rgn);
|
||||
current_blocks = RGN_BLOCKS (rgn);
|
||||
|
||||
/* Don't schedule region that is marked by
|
||||
NOTE_DISABLE_SCHED_OF_BLOCK. */
|
||||
if (sched_is_disabled_for_current_region_p ())
|
||||
return;
|
||||
|
||||
init_deps_global ();
|
||||
|
||||
/* Initializations for region data dependence analysis. */
|
||||
|
|
Loading…
Reference in New Issue