Remove profitability check

Co-Authored-By: Revital Eres <eres@il.ibm.com>

From-SVN: r127293
This commit is contained in:
Vladimir Yanovsky 2007-08-08 12:21:37 +00:00 committed by Revital Eres
parent 473210a961
commit 76b4f0f724
6 changed files with 97 additions and 201 deletions

View File

@ -1,3 +1,17 @@
2007-08-08 Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
* ddg.c (print_ddg): Add dump information.
* modulo-sched.c (print_node_sched_params): Add parameter and
verbosity.
(calculate_maxii): Remove function.
(undo_generate_reg_moves): Likewise.
(undo_permute_partial_schedule): Likewise.
(kernel_number_of_cycles): Likewise.
(MAXII_FACTOR): New definition to calculate the upper bound of II.
(sms_schedule): Use it. Remove profitability checks.
(sms_schedule_by_order): Fix order of nodes within the cycle.
2007-08-08 Samuel Thibault <samuel.thibault@ens-lyon.org>
* gcc/config/i386/gnu.h (STARTFILE_SPEC): Use gcrt0.o in profile mode, add

View File

@ -568,6 +568,7 @@ print_ddg (FILE *file, ddg_ptr g)
{
ddg_edge_ptr e;
fprintf (file, "Node num: %d\n", g->nodes[i].cuid);
print_rtl_single (file, g->nodes[i].insn);
fprintf (file, "OUT ARCS: ");
for (e = g->nodes[i].out; e; e = e->next_out)

View File

@ -159,7 +159,6 @@ static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int histor
static void free_partial_schedule (partial_schedule_ptr);
static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
void print_partial_schedule (partial_schedule_ptr, FILE *);
static int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
ddg_node_ptr node, int cycle,
sbitmap must_precede,
@ -365,7 +364,7 @@ set_node_sched_params (ddg_ptr g)
}
static void
print_node_sched_params (FILE * file, int num_nodes)
print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
{
int i;
@ -377,7 +376,8 @@ print_node_sched_params (FILE * file, int num_nodes)
rtx reg_move = nsp->first_reg_move;
int j;
fprintf (file, "Node %d:\n", i);
fprintf (file, "Node = %d; INSN = %d\n", i,
(INSN_UID (g->nodes[i].insn)));
fprintf (file, " asap = %d:\n", nsp->asap);
fprintf (file, " time = %d:\n", nsp->time);
fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
@ -390,29 +390,6 @@ print_node_sched_params (FILE * file, int num_nodes)
}
}
/* Calculate an upper bound for II. SMS should not schedule the loop if it
requires more cycles than this bound. Currently set to the sum of the
longest latency edge for each node. Reset based on experiments. */
static int
calculate_maxii (ddg_ptr g)
{
int i;
int maxii = 0;
for (i = 0; i < g->num_nodes; i++)
{
ddg_node_ptr u = &g->nodes[i];
ddg_edge_ptr e;
int max_edge_latency = 0;
for (e = u->out; e; e = e->next_out)
max_edge_latency = MAX (max_edge_latency, e->latency);
maxii += max_edge_latency;
}
return maxii;
}
/*
Breaking intra-loop register anti-dependences:
Each intra-loop register anti-dependence implies a cross-iteration true
@ -533,40 +510,6 @@ generate_reg_moves (partial_schedule_ptr ps, bool rescan)
return reg_move_replaces;
}
/* We call this when we want to undo the SMS schedule for a given loop.
One of the things that we do is to delete the register moves generated
for the sake of SMS; this function deletes the register move instructions
recorded in the undo buffer. */
static void
undo_generate_reg_moves (partial_schedule_ptr ps,
struct undo_replace_buff_elem *reg_move_replaces)
{
int i,j;
for (i = 0; i < ps->g->num_nodes; i++)
{
ddg_node_ptr u = &ps->g->nodes[i];
rtx prev;
rtx crr = SCHED_FIRST_REG_MOVE (u);
for (j = 0; j < SCHED_NREG_MOVES (u); j++)
{
prev = PREV_INSN (crr);
delete_insn (crr);
crr = prev;
}
SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
}
while (reg_move_replaces)
{
struct undo_replace_buff_elem *rep = reg_move_replaces;
reg_move_replaces = reg_move_replaces->next;
replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
}
}
/* Free memory allocated for the undo buffer. */
static void
free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
@ -638,28 +581,6 @@ permute_partial_schedule (partial_schedule_ptr ps, rtx last)
PREV_INSN (last));
}
/* As part of undoing SMS we return to the original ordering of the
instructions inside the loop kernel. Given the partial schedule PS, this
function returns the ordering of the instruction according to their CUID
in the DDG (PS->G), which is the original order of the instruction before
performing SMS. */
static void
undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
{
int i;
for (i = 0 ; i < ps->g->num_nodes; i++)
if (last == ps->g->nodes[i].insn
|| last == ps->g->nodes[i].first_note)
break;
else if (PREV_INSN (last) != ps->g->nodes[i].insn)
reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
PREV_INSN (last));
}
/* Used to generate the prologue & epilogue. Duplicate the subset of
nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
of both), together with a prefix/suffix of their reg_moves. */
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int to_stage, int for_prolog)
@ -869,6 +790,9 @@ canon_loop (struct loop *loop)
version may be entered. Just a guess. */
#define PROB_SMS_ENOUGH_ITERATIONS 80
/* Used to calculate the upper bound of ii. */
#define MAXII_FACTOR 2
/* Main entry point, perform SMS scheduling on the loops of the function
that consist of single basic blocks. */
static void
@ -1097,7 +1021,7 @@ sms_schedule (void)
mii = 1; /* Need to pass some estimate of mii. */
rec_mii = sms_order_nodes (g, mii, node_order);
mii = MAX (res_MII (g), rec_mii);
maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
maxii = MAXII_FACTOR * mii;
if (dump_file)
fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
@ -1131,8 +1055,6 @@ sms_schedule (void)
}
else
{
int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
int new_cycles;
struct undo_replace_buff_elem *reg_move_replaces;
if (dump_file)
@ -1154,68 +1076,46 @@ sms_schedule (void)
normalize_sched_times (ps);
rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
set_columns_for_ps (ps);
canon_loop (loop);
/* Generate the kernel just to be able to measure its cycles. */
/* case the BCT count is not known , Do loop-versioning */
if (count_reg && ! count_init)
{
rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
GEN_INT(stage_count));
unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
* REG_BR_PROB_BASE) / 100;
loop_version (loop, comp_rtx, &condition_bb,
prob, prob, REG_BR_PROB_BASE - prob,
true);
}
/* Set new iteration count of loop kernel. */
if (count_reg && count_init)
SET_SRC (single_set (count_init)) = GEN_INT (loop_count
- stage_count + 1);
/* Now apply the scheduled kernel to the RTL of the loop. */
permute_partial_schedule (ps, g->closing_branch->first_note);
reg_move_replaces = generate_reg_moves (ps, false);
/* Get the number of cycles the new kernel expect to execute in. */
new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
/* Mark this loop as software pipelined so the later
scheduling passes doesn't touch it. */
if (! flag_resched_modulo_sched)
g->bb->flags |= BB_DISABLE_SCHEDULE;
/* The life-info is not valid any more. */
df_set_bb_dirty (g->bb);
/* Get back to the original loop so we can do loop versioning. */
undo_permute_partial_schedule (ps, g->closing_branch->first_note);
if (reg_move_replaces)
undo_generate_reg_moves (ps, reg_move_replaces);
if ( new_cycles >= orig_cycles)
{
/* SMS is not profitable so undo the permutation and reg move generation
and return the kernel to its original state. */
if (dump_file)
fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
}
reg_move_replaces = generate_reg_moves (ps, true);
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes, g);
/* Generate prolog and epilog. */
if (count_reg && !count_init)
generate_prolog_epilog (ps, loop, count_reg);
else
{
canon_loop (loop);
/* case the BCT count is not known , Do loop-versioning */
if (count_reg && ! count_init)
{
rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
GEN_INT(stage_count));
unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
* REG_BR_PROB_BASE) / 100;
loop_version (loop, comp_rtx, &condition_bb,
prob, prob, REG_BR_PROB_BASE - prob,
true);
}
/* Set new iteration count of loop kernel. */
if (count_reg && count_init)
SET_SRC (single_set (count_init)) = GEN_INT (loop_count
- stage_count + 1);
/* Now apply the scheduled kernel to the RTL of the loop. */
permute_partial_schedule (ps, g->closing_branch->first_note);
/* Mark this loop as software pipelined so the later
scheduling passes doesn't touch it. */
if (! flag_resched_modulo_sched)
g->bb->flags |= BB_DISABLE_SCHEDULE;
/* The life-info is not valid any more. */
df_set_bb_dirty (g->bb);
reg_move_replaces = generate_reg_moves (ps, true);
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes);
/* Generate prolog and epilog. */
if (count_reg && !count_init)
generate_prolog_epilog (ps, loop, count_reg);
else
generate_prolog_epilog (ps, loop, NULL_RTX);
}
generate_prolog_epilog (ps, loop, NULL_RTX);
free_undo_replace_buff (reg_move_replaces);
}
@ -1529,17 +1429,21 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
of nodes within the cycle. */
sbitmap_zero (must_precede);
sbitmap_zero (must_follow);
for (e = u_node->in; e != 0; e = e->next_in)
/* TODO: We can add an insn to the must_precede or must_follow
bitmaps only if it has tight dependence to U and they
both scheduled in the same row. The current check is less
conservative and content with the fact that both U and the
insn are scheduled in the same row. */
for (e = u_node->in; e != 0; e = e->next_in)
if (TEST_BIT (sched_nodes, e->src->cuid)
&& e->latency == (ii * e->distance)
&& start == SCHED_TIME (e->src))
SET_BIT (must_precede, e->src->cuid);
&& (SMODULO (SCHED_TIME (e->src), ii) == SMODULO (start, ii)))
SET_BIT (must_precede, e->src->cuid);
for (e = u_node->out; e != 0; e = e->next_out)
for (e = u_node->out; e != 0; e = e->next_out)
if (TEST_BIT (sched_nodes, e->dest->cuid)
&& e->latency == (ii * e->distance)
&& end == SCHED_TIME (e->dest))
SET_BIT (must_follow, e->dest->cuid);
&& (SMODULO (SCHED_TIME (e->dest), ii) ==
SMODULO ((end - step), ii)))
SET_BIT (must_follow, e->dest->cuid);
success = 0;
if ((step > 0 && start < end) || (step < 0 && start > end))
@ -2259,57 +2163,7 @@ advance_one_cycle (void)
targetm.sched.dfa_post_cycle_insn ());
}
/* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
the number of cycles according to DFA that the kernel fits in,
we use this to check if we done well with SMS after we add
register moves. In some cases register moves overhead makes
it even worse than the original loop. We want SMS to be performed
when it gives less cycles after register moves are added. */
static int
kernel_number_of_cycles (rtx first_insn, rtx last_insn)
{
int cycles = 0;
rtx insn;
int can_issue_more = issue_rate;
state_reset (curr_state);
for (insn = first_insn;
insn != NULL_RTX && insn != last_insn;
insn = NEXT_INSN (insn))
{
if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
continue;
/* Check if there is room for the current insn. */
if (!can_issue_more || state_dead_lock_p (curr_state))
{
cycles ++;
advance_one_cycle ();
can_issue_more = issue_rate;
}
/* Update the DFA state and return with failure if the DFA found
recource conflicts. */
if (state_transition (curr_state, insn) >= 0)
{
cycles ++;
advance_one_cycle ();
can_issue_more = issue_rate;
}
if (targetm.sched.variable_issue)
can_issue_more =
targetm.sched.variable_issue (sched_dump, sched_verbose,
insn, can_issue_more);
/* A naked CLOBBER or USE generates no instruction, so don't
let them consume issue slots. */
else if (GET_CODE (PATTERN (insn)) != USE
&& GET_CODE (PATTERN (insn)) != CLOBBER)
can_issue_more--;
}
return cycles;
}
/* Checks if PS has resource conflicts according to DFA, starting from
FROM cycle to TO cycle; returns true if there are conflicts and false

View File

@ -1,3 +1,9 @@
2007-08-08 Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
* gfortran.dg/sms-1.f90: Add comment.
* gfortran.dg/sms-2.f90: New.
2007-08-07 Ian Lance Taylor <iant@google.com>
* gcc.dg/instrument-1.c: New test.

View File

@ -1,5 +1,7 @@
! { dg-do run }
! { dg-options "-O2 -fmodulo-sched" }
! { dg-options "-O2 -fmodulo-sched" }
! This testcase related to INC instruction which is
! currently not supported in SMS.
program main
integer (kind = 8) :: i, l8, u8, step8
integer (kind = 4) :: l4, step4

View File

@ -0,0 +1,19 @@
! { dg-do run }
! { dg-options "-O2 -fmodulo-sched" }
! This testcase related to wrong order within a cycle fix.
!
program foo
real, dimension (5, 5, 5, 5) :: a
a (:, :, :, :) = 4
a (:, 2, :, 4) = 10
a (:, 2, :, 1) = 0
forall (i = 1:5, i == 3)
a(i, i, i, i) = -5
end forall
if (sum (a) .ne. 2541.0) call abort ()
end