sparc.c (ultra_code_from_mask, [...]): New functions to describe UltraSPARC pipeline exactly to Haifa.

* config/sparc/sparc.c (ultra_code_from_mask,
	ultra_cmove_results_ready_p, ultra_fpmode_conflict_exists,
	ultra_find_type, ultra_schedule_insn, ultra_flush_pipeline,
	ultrasparc_sched_init, ultrasparc_variable_issue,
	ultra_rescan_pipeline_state, ultrasparc_sched_reorder): New
	functions to describe UltraSPARC pipeline exactly to Haifa.
	(ultrasparc_adjust_cost): Indicate IMUL type insns have zero cost,
	as there is nothing the scheduler can do about it.  Indicate that
	REG_DEP_OUTPUT's collide.  Fixup formatting.
	* config/sparc/sparc.h (RTX_COSTS): Fixup integer multiply and
	divide costs on Ultra for DImode.
	(MD_SCHED_INIT, MD_SCHED_REORDER, MD_SCHED_VARIABLE_ISSUE):
	Define.
	* config/sparc/sparc.md (ieu_unnamed function unit): Rename to
	ieuN and add call_no_delay_slot to type list.
	(cti function unit): New unit for branches on UltraSPARC.
	(subx/addx insns): Set type to misc.
	(sidi zero/sign extension insns on arch64): Set type to shift.
	(sign_extendhidi2_insn): Set type to sload.

From-SVN: r21767
This commit is contained in:
David S. Miller 1998-08-16 18:50:32 +00:00 committed by David S. Miller
parent a6c2a102ae
commit 3bc8b61efc
4 changed files with 873 additions and 103 deletions

View File

@ -1,3 +1,25 @@
Sun Aug 16 17:37:06 1998 David S. Miller <davem@pierdol.cobaltmicro.com>
* config/sparc/sparc.c (ultra_code_from_mask,
ultra_cmove_results_ready_p, ultra_fpmode_conflict_exists,
ultra_find_type, ultra_schedule_insn, ultra_flush_pipeline,
ultrasparc_sched_init, ultrasparc_variable_issue,
ultra_rescan_pipeline_state, ultrasparc_sched_reorder): New
functions to describe UltraSPARC pipeline exactly to Haifa.
(ultrasparc_adjust_cost): Indicate IMUL type insns have zero cost,
as there is nothing the scheduler can do about it. Indicate that
REG_DEP_OUTPUT's collide. Fixup formatting.
* config/sparc/sparc.h (RTX_COSTS): Fixup integer multiply and
divide costs on Ultra for DImode.
(MD_SCHED_INIT, MD_SCHED_REORDER, MD_SCHED_VARIABLE_ISSUE):
Define.
* config/sparc/sparc.md (ieu_unnamed function unit): Rename to
ieuN and add call_no_delay_slot to type list.
(cti function unit): New unit for branches on UltraSPARC.
(subx/addx insns): Set type to misc.
(sidi zero/sign extension insns on arch64): Set type to shift.
(sign_extendhidi2_insn): Set type to sload.
Sun Aug 16 13:52:00 1998 David Edelsohn <edelsohn@mhpcc.edu>
* rs6000.c (rs6000_stack_info): Use if == 0 for sizes.

View File

@ -6102,8 +6102,8 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
cycles later. */
/* if a load, then the dependence must be on the memory address;
add an extra 'cycle'. Note that the cost could be two cycles
if the reg was written late in an instruction group; we can't tell
add an extra "cycle". Note that the cost could be two cycles
if the reg was written late in an instruction group; we ca not tell
here. */
if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
return cost + 3;
@ -6115,7 +6115,7 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
rtx dep_pat = PATTERN (dep_insn);
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
return cost; /* This shouldn't happen! */
return cost; /* This should not happen! */
/* The dependency between the two instructions was on the data that
is being stored. Assume that this implies that the address of the
@ -6147,67 +6147,765 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
return cost;
}
/* This describes the state of the UltraSPARC pipeline during
instruction scheduling. */
#define TMASK(__x) (1U << ((int)(__x)))
#define UMASK(__x) (1U << ((int)(__x)))
enum ultra_code { NONE=0, /* no insn at all */
IEU0, /* shifts and conditional moves */
IEU1, /* condition code setting insns, calls+jumps */
IEUN, /* all other single cycle ieu insns */
LSU, /* loads and stores */
CTI, /* branches */
FPM, /* FPU pipeline 1, multiplies and divides */
FPA, /* FPU pipeline 2, all other operations */
SINGLE, /* single issue instructions */
NUM_ULTRA_CODES };
static char *ultra_code_names[NUM_ULTRA_CODES] = {
"NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
"FPM", "FPA", "SINGLE" };
struct ultrasparc_pipeline_state {
/* The insns in this group. */
rtx group[4];
/* The code for each insn. */
enum ultra_code codes[4];
/* Which insns in this group have been committed by the
scheduler. This is how we determine how many more
can issue this cycle. */
char commit[4];
/* How many insns in this group. */
char group_size;
/* Mask of free slots still in this group. */
char free_slot_mask;
/* The slotter uses the following to determine what other
insn types can still make their way into this group. */
char contents [NUM_ULTRA_CODES];
char num_ieu_insns;
};
#define ULTRA_NUM_HIST 8
static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
static int ultra_cur_hist;
static int ultra_cycles_elapsed;
#define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
/* Given TYPE_MASK compute the ultra_code it has. */
static enum ultra_code
ultra_code_from_mask (type_mask)
int type_mask;
{
int mask;
if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
return IEU0;
else if (type_mask & (TMASK (TYPE_COMPARE) |
TMASK (TYPE_CALL) |
TMASK (TYPE_UNCOND_BRANCH)))
return IEU1;
else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)))
return IEUN;
else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
TMASK (TYPE_FPSTORE)))
return LSU;
else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
return FPM;
else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
return FPA;
else if (type_mask & TMASK (TYPE_BRANCH))
return CTI;
return SINGLE;
}
/* Check INSN (a conditional move) and make sure that it's
results are available at this cycle. Return 1 if the
results are in fact ready. */
static int
ultra_cmove_results_ready_p (insn)
rtx insn;
{
struct ultrasparc_pipeline_state *up;
int entry, slot;
/* If this got dispatched in the previous
group, the results are not ready. */
entry = (ultra_cur_hist - 1) % ULTRA_NUM_HIST;
up = &ultra_pipe_hist[entry];
slot = 4;
while (--slot >= 0)
if (up->group[slot] == insn)
return 1;
return 0;
}
/* Walk backwards in pipeline history looking for FPU
operations which use a mode different than FPMODE and
will create a stall if an insn using FPMODE were to be
dispatched this cycle. */
static int
ultra_fpmode_conflict_exists (fpmode)
enum machine_mode fpmode;
{
int hist_ent;
int hist_lim;
hist_ent = (ultra_cur_hist - 1) % ULTRA_NUM_HIST;
if (ultra_cycles_elapsed < 4)
hist_lim = ultra_cycles_elapsed;
else
hist_lim = 4;
while (hist_lim > 0)
{
struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
int slot = 4;
while (--slot >= 0)
{
rtx insn = up->group[slot];
enum machine_mode this_mode;
enum attr_type this_type;
rtx pat;
if (! insn
|| GET_CODE (insn) != INSN
|| (pat = PATTERN (insn)) == 0
|| GET_CODE (pat) != SET)
continue;
this_mode = GET_MODE (SET_DEST (pat));
if ((this_mode != SFmode
&& this_mode != DFmode)
|| this_mode == fpmode)
continue;
/* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
we will get a stall. */
if (GET_CODE (SET_SRC (pat)) != ABS
&& GET_CODE (SET_SRC (pat)) != NEG
&& ((TMASK (get_attr_type (insn)) &
(TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT))) == 0))
return 1;
}
hist_lim--;
hist_ent = (hist_ent - 1) % ULTRA_NUM_HIST;
}
/* No conflicts, safe to dispatch. */
return 0;
}
/* Find an instruction in LIST which has one of the
type attributes enumerated in TYPE_MASK. START
says where to begin the search.
NOTE: This scheme depends upon the fact that we
have less than 32 distinct type attributes. */
static rtx *
ultra_find_type (type_mask, list, start)
int type_mask;
rtx *list;
int start;
{
int i;
for (i = start; i >= 0; i--)
{
rtx insn = list[i];
if (recog_memoized (insn) >= 0
&& (TMASK(get_attr_type (insn)) & type_mask))
{
enum machine_mode fpmode;
rtx pat = 0;
int slot;
int check_depend = 0;
int check_fpmode_conflict = 0;
if (GET_CODE (insn) == INSN
&& (pat = PATTERN(insn)) != 0
&& GET_CODE (pat) == SET
&& !(type_mask & (TMASK (TYPE_STORE) |
TMASK (TYPE_FPSTORE))))
{
check_depend = 1;
if (GET_MODE (SET_DEST (pat)) == SFmode
|| GET_MODE (SET_DEST (pat)) == DFmode)
{
fpmode = GET_MODE (SET_DEST (pat));
check_fpmode_conflict = 1;
}
}
slot = 4;
while(--slot >= 0)
{
rtx slot_insn = ultra_pipe.group[slot];
rtx slot_pat;
/* Already issued, bad dependency, or FPU
mode conflict. */
if (slot_insn != 0
&& (slot_pat = PATTERN (slot_insn)) != 0
&& ((insn == slot_insn)
|| (check_depend == 1
&& GET_CODE (slot_insn) == INSN
&& GET_CODE (slot_pat) == SET
&& rtx_equal_p (SET_DEST (slot_pat),
SET_SRC (pat)))
|| (check_fpmode_conflict == 1
&& GET_CODE (slot_insn) == INSN
&& GET_CODE (slot_pat) == SET
&& ((GET_MODE (SET_DEST (slot_pat)) == SFmode
|| GET_MODE (SET_DEST (slot_pat)) == DFmode)
&& GET_MODE (SET_DEST (slot_pat)) != fpmode))))
goto next;
}
/* Check for peculiar result availability and dispatch
interference situations. */
if (pat != 0
&& ultra_cycles_elapsed > 0)
{
rtx link;
for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
{
rtx link_insn = XEXP (link, 0);
if (GET_CODE (link_insn) == INSN
&& recog_memoized (link_insn) >= 0
&& (TMASK (get_attr_type (link_insn)) &
(TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
&& ! ultra_cmove_results_ready_p (link_insn))
goto next;
}
if (check_fpmode_conflict
&& ultra_fpmode_conflict_exists (fpmode))
goto next;
}
return &list[i];
}
next:
}
return 0;
}
/* Place insn pointed to my IP into the pipeline.
Make element THIS of READY be that insn if it
is not already. TYPE indicates the pipeline class
this insn falls into. */
static void
ultra_schedule_insn (ip, ready, this, type)
rtx *ip;
rtx *ready;
int this;
enum ultra_code type;
{
int pipe_slot;
char mask = ultra_pipe.free_slot_mask;
/* Obtain free slot. */
for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
if ((mask & (1 << pipe_slot)) != 0)
break;
if (pipe_slot == 4)
abort ();
/* In it goes, and it hasn't been committed yet. */
ultra_pipe.group[pipe_slot] = *ip;
ultra_pipe.codes[pipe_slot] = type;
ultra_pipe.contents[type] = 1;
if (UMASK (type) &
(UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
ultra_pipe.num_ieu_insns += 1;
ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
ultra_pipe.group_size += 1;
ultra_pipe.commit[pipe_slot] = 0;
/* Update ready list. */
if (ip != &ready[this])
{
rtx temp = *ip;
*ip = ready[this];
ready[this] = temp;
}
}
/* Advance to the next pipeline group. */
static void
ultra_flush_pipeline ()
{
ultra_cur_hist = (ultra_cur_hist + 1) % ULTRA_NUM_HIST;
ultra_cycles_elapsed += 1;
bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
ultra_pipe.free_slot_mask = 0xf;
}
static int ultra_reorder_called_this_block;
/* Init our data structures for this current block. */
void
ultrasparc_sched_init (dump, sched_verbose)
FILE *dump;
int sched_verbose;
{
bzero ((char *) &ultra_pipe_hist, sizeof ultra_pipe_hist);
ultra_pipe.free_slot_mask = 0xf;
ultra_cur_hist = 0;
ultra_cycles_elapsed = 0;
ultra_reorder_called_this_block = 0;
}
/* INSN has been scheduled, update pipeline commit state
and return how many instructions are still to be
scheduled in this group. */
int
ultrasparc_adjust_cost (insn, link, dep_insn, cost)
rtx insn;
rtx link;
rtx dep_insn;
int cost;
ultrasparc_variable_issue (insn)
rtx insn;
{
struct ultrasparc_pipeline_state *up = &ultra_pipe;
int i, left_to_fire;
left_to_fire = 0;
for (i = 0; i < 4; i++)
{
if (up->group[i] == 0)
continue;
if (up->group[i] == insn)
{
up->commit[i] = 1;
}
else if (! up->commit[i])
left_to_fire++;
}
return left_to_fire;
}
/* In actual_hazard_this_instance, we may have yanked some
instructions from the ready list due to conflict cost
adjustments. If so, and such an insn was in our pipeline
group, remove it and update state. */
static void
ultra_rescan_pipeline_state (ready, n_ready)
rtx *ready;
int n_ready;
{
struct ultrasparc_pipeline_state *up = &ultra_pipe;
int i;
for (i = 0; i < 4; i++)
{
rtx insn = up->group[i];
enum ultra_code ucode;
int j;
if (! insn)
continue;
/* If it has been committed, then it was removed from
the ready list because it was actually scheduled,
and that is not the case we are searching for here. */
if (up->commit[i] != 0)
continue;
for (j = n_ready - 1; j >= 0; j--)
if (ready[j] == insn)
break;
/* If we didn't find it, toss it. */
if (j < 0)
{
enum ultra_code ucode = up->codes[i];
up->group[i] = 0;
up->codes[i] = NONE;
up->contents[ucode] = 0;
if (UMASK (ucode) &
(UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
up->num_ieu_insns -= 1;
up->free_slot_mask |= (1 << i);
up->group_size -= 1;
up->commit[i] = 0;
}
}
}
void
ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
FILE *dump;
int sched_verbose;
rtx *ready;
int n_ready;
{
struct ultrasparc_pipeline_state *up = &ultra_pipe;
int i, this_insn;
/* We get called once unnecessarily per block of insns
scheduled. */
if (ultra_reorder_called_this_block == 0)
{
ultra_reorder_called_this_block = 1;
return;
}
if (sched_verbose)
{
int n;
fprintf (dump, "\n;;\tUltraSPARC Looking at [");
for (n = n_ready - 1; n >= 0; n--)
{
rtx insn = ready[n];
enum ultra_code ucode;
if (recog_memoized (insn) < 0)
continue;
ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
if (n != 0)
fprintf (dump, "%s(%d) ",
ultra_code_names[ucode],
INSN_UID (insn));
else
fprintf (dump, "%s(%d)",
ultra_code_names[ucode],
INSN_UID (insn));
}
fprintf (dump, "]\n");
}
this_insn = n_ready - 1;
/* Skip over junk we don't understand. */
while ((this_insn >= 0)
&& recog_memoized (ready[this_insn]) < 0)
this_insn--;
while (this_insn >= 0) {
int old_group_size = up->group_size;
if (up->group_size != 0)
{
int num_committed;
num_committed = (up->commit[0] + up->commit[1] +
up->commit[2] + up->commit[3]);
/* If nothing has been commited from our group, or all of
them have. Clear out the (current cycle's) pipeline
state and start afresh. */
if (num_committed == 0
|| num_committed == up->group_size)
{
bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
ultra_pipe.free_slot_mask = 0xf;
old_group_size = 0;
}
else
{
/* OK, some ready list insns got requeued and thus removed
from the ready list. Account for this fact. */
ultra_rescan_pipeline_state (ready, n_ready);
/* Something "changed", make this look like a newly
formed group so the code at the end of the loop
knows that progress was in fact made. */
if (up->group_size != old_group_size)
old_group_size == 0;
}
}
if (up->group_size == 0)
{
/* If the pipeline is (still) empty and we have any single
group insns, get them out now as this is a good time. */
rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) |
TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
ready, this_insn);
if (ip)
{
ultra_schedule_insn (ip, ready, this_insn, SINGLE);
break;
}
/* If we are not in the process of emptying out the pipe, try to
obtain an instruction which must be the first in it's group. */
ip = ultra_find_type ((TMASK (TYPE_CALL) |
TMASK (TYPE_CALL_NO_DELAY_SLOT) |
TMASK (TYPE_UNCOND_BRANCH)),
ready, this_insn);
if (ip)
{
ultra_schedule_insn (ip, ready, this_insn, IEU1);
this_insn--;
}
else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
TMASK (TYPE_FPDIVD) |
TMASK (TYPE_FPSQRT)),
ready, this_insn)) != 0)
{
ultra_schedule_insn (ip, ready, this_insn, FPM);
this_insn--;
}
}
/* Try to fill the integer pipeline. First, look for an IEU0 specific
operation. We can't do more IEU operations if the first 3 slots are
all full or we have dispatched two IEU insns already. */
if ((up->free_slot_mask & 0x7) != 0
&& up->num_ieu_insns < 2
&& up->contents[IEU0] == 0
&& up->contents[IEUN] == 0)
{
rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
if (ip)
{
ultra_schedule_insn (ip, ready, this_insn, IEU0);
this_insn--;
}
}
/* If we can, try to find an IEU1 specific or an unnamed
IEU instruction. */
if ((up->free_slot_mask & 0x7) != 0
&& up->num_ieu_insns < 2)
{
rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) |
(up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
ready, this_insn);
if (ip)
{
rtx insn = *ip;
ultra_schedule_insn (ip, ready, this_insn,
(!up->contents[IEU1]
&& get_attr_type (insn) == TYPE_COMPARE)
? IEU1 : IEUN);
this_insn--;
}
}
/* If only one IEU insn has been found, try to find another unnamed
IEU operation or an IEU1 specific one. */
if ((up->free_slot_mask & 0x7) != 0
&& up->num_ieu_insns < 2)
{
rtx *ip;
int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY));
if (!up->contents[IEU1])
tmask |= TMASK (TYPE_COMPARE);
ip = ultra_find_type (tmask, ready, this_insn);
if (ip)
{
rtx insn = *ip;
ultra_schedule_insn (ip, ready, this_insn,
(!up->contents[IEU1]
&& get_attr_type (insn) == TYPE_COMPARE)
? IEU1 : IEUN);
this_insn--;
}
}
/* Try for a load or store, but such an insn can only be issued
if it is within' one of the first 3 slots. */
if ((up->free_slot_mask & 0x7) != 0
&& up->contents[LSU] == 0)
{
rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
TMASK (TYPE_FPSTORE)), ready, this_insn);
if (ip)
{
ultra_schedule_insn (ip, ready, this_insn, LSU);
this_insn--;
}
}
/* Now find FPU operations, first FPM class. But not divisions or
square-roots because those will break the group up. Unlike all
the previous types, these can go in any slot. */
if (up->free_slot_mask != 0
&& up->contents[FPM] == 0)
{
rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
if (ip)
{
ultra_schedule_insn (ip, ready, this_insn, FPM);
this_insn--;
}
}
/* Continue on with FPA class if we have not filled the group already. */
if (up->free_slot_mask != 0
&& up->contents[FPA] == 0)
{
rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
ready, this_insn);
if (ip)
{
ultra_schedule_insn (ip, ready, this_insn, FPA);
this_insn--;
}
}
/* Finally, maybe stick a branch in here. */
if (up->free_slot_mask != 0
&& up->contents[CTI] == 0)
{
rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
/* Try to slip in a branch only if it is one of the
next 2 in the ready list. */
if (ip && ((&ready[this_insn] - ip) < 2))
{
ultra_schedule_insn (ip, ready, this_insn, CTI);
this_insn--;
}
}
up->group_size = 0;
for (i = 0; i < 4; i++)
if ((up->free_slot_mask & (1 << i)) == 0)
up->group_size++;
/* See if we made any progress... */
if (old_group_size != up->group_size)
break;
/* Clean out the (current cycle's) pipeline state
and try once more. */
bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
ultra_pipe.free_slot_mask = 0xf;
}
if (sched_verbose)
{
int n, gsize;
fprintf (dump, ";;\tUltraSPARC Launched [");
gsize = up->group_size;
for (n = 0; n < 4; n++)
{
rtx insn = up->group[n];
if (! insn)
continue;
gsize -= 1;
if (gsize != 0)
fprintf (dump, "%s(%d) ",
ultra_code_names[up->codes[n]],
INSN_UID (insn));
else
fprintf (dump, "%s(%d)",
ultra_code_names[up->codes[n]],
INSN_UID (insn));
}
fprintf (dump, "]\n");
}
}
int
ultrasparc_adjust_cost (insn, link, dep_insn, previous, cost)
rtx insn;
rtx link;
rtx dep_insn;
rtx previous;
int cost;
{
enum attr_type insn_type, dep_type;
rtx pat = PATTERN(insn);
rtx dep_pat = PATTERN (dep_insn);
rtx pat = PATTERN(insn);
rtx dep_pat = PATTERN (dep_insn);
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
return cost;
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
return cost;
insn_type = get_attr_type (insn);
dep_type = get_attr_type (dep_insn);
insn_type = get_attr_type (insn);
dep_type = get_attr_type (dep_insn);
/* Nothing issues in parallel with integer multiplies, so
mark as zero cost since the scheduler can not do anything
about it. */
if (insn_type == TYPE_IMUL)
return 0;
#define SLOW_FP(dep_type) \
(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
switch (REG_NOTE_KIND (link))
{
case 0:
{
case 0:
/* Data dependency; DEP_INSN writes a register that INSN reads some
cycles later. */
cycles later. */
if (dep_type == TYPE_CMOVE)
{
/* Instructions that read the result of conditional moves cannot
be in the same group or the following group. */
return cost + 1;
}
switch (insn_type)
{
/* UltraSPARC can dual issue a store and an instruction setting
the value stored, except for divide and square root. */
{
/* UltraSPARC can dual issue a store and an instruction setting
the value stored, except for divide and square root. */
case TYPE_FPSTORE:
if (! SLOW_FP (dep_type))
return 0;
if (! SLOW_FP (dep_type))
return 0;
return cost;
case TYPE_STORE:
case TYPE_STORE:
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
return cost;
return cost;
if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
/* The dependency between the two instructions is on the data
that is being stored. Assume that the address of the store
is not also dependent. */
return 0;
return cost;
/* The dependency between the two instructions is on the data
that is being stored. Assume that the address of the store
is not also dependent. */
return 0;
return cost;
case TYPE_LOAD:
case TYPE_SLOAD:
case TYPE_FPLOAD:
/* A load does not return data until at least 11 cycles after
case TYPE_LOAD:
case TYPE_SLOAD:
case TYPE_FPLOAD:
/* A load does not return data until at least 11 cycles after
a store to the same location. 3 cycles are accounted for
in the load latency; add the other 8 here. */
if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
{
{
/* If the addresses are not equal this may be a false
dependency because pointer aliasing could not be
determined. Add only 2 cycles in that case. 2 is
an arbitrary compromise between 8, which would cause
the scheduler to generate worse code elsewhere to
compensate for a dependency which might not really
exist, and 0. */
compensate for a dependency which might not really
exist, and 0. */
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
|| GET_CODE (SET_SRC (pat)) != MEM
|| GET_CODE (SET_DEST (dep_pat)) != MEM
@ -6215,72 +6913,73 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
XEXP (SET_DEST (dep_pat), 0)))
return cost + 2;
return cost + 8;
}
return cost + 8;
}
return cost;
case TYPE_BRANCH:
case TYPE_BRANCH:
/* Compare to branch latency is 0. There is no benefit from
separating compare and branch. */
if (dep_type == TYPE_COMPARE)
return 0;
/* Floating point compare to branch latency is less than
compare to conditional move. */
if (dep_type == TYPE_FPCMP)
return cost - 1;
if (dep_type == TYPE_COMPARE)
return 0;
/* Floating point compare to branch latency is less than
compare to conditional move. */
if (dep_type == TYPE_FPCMP)
return cost - 1;
return cost;
case TYPE_FPCMOVE:
case TYPE_FPCMOVE:
/* FMOVR class instructions can not issue in the same cycle
or the cycle after an instruction which writes any
integer register. Model this as cost 2 for dependent
instructions. */
instructions. */
if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
|| dep_type == TYPE_BINARY)
&& cost < 2)
&& cost < 2)
return 2;
/* Otherwise check as for integer conditional moves. */
case TYPE_CMOVE:
case TYPE_CMOVE:
/* Conditional moves involving integer registers wait until
3 cycles after loads return data. The interlock applies
to all loads, not just dependent loads, but that is hard
to model. */
if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
return cost + 3;
to model. */
if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
return cost + 3;
return cost;
default:
break;
}
break;
break;
case REG_DEP_ANTI:
case REG_DEP_ANTI:
/* Divide and square root lock destination registers for full latency. */
if (! SLOW_FP (dep_type))
return 0;
break;
if (! SLOW_FP (dep_type))
return 0;
break;
case REG_DEP_OUTPUT:
/* IEU and FPU instruction that have the same destination
register cannot be grouped together. */
return cost + 1;
default:
break;
}
/* Other costs not accounted for:
- Multiply should be modeled as having no latency because there is
nothing the scheduler can do about it.
- Single precision floating point loads lock the other half of
the even/odd register pair.
/* Other costs not accounted for:
- Single precision floating point loads lock the other half of
the even/odd register pair.
- Several hazards associated with ldd/std are ignored because these
instructions are rarely generated for V9.
- A shift following an integer instruction which does not set the
condition codes can not issue in the same cycle.
instructions are rarely generated for V9.
- The floating point pipeline can not have both a single and double
precision operation active at the same time. Format conversions
and graphics instructions are given honorary double precision status.
- call and jmpl are always the first instruction in a group. */
return cost;
}
return cost;
}
int
sparc_issue_rate ()

View File

@ -2704,11 +2704,17 @@ extern struct rtx_def *legitimize_pic_address ();
#define RTX_COSTS(X,CODE,OUTER_CODE) \
case MULT: \
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
return (GET_MODE (X) == DImode ? \
COSTS_N_INSNS (34) : COSTS_N_INSNS (19)); \
return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
case DIV: \
case UDIV: \
case MOD: \
case UMOD: \
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
return (GET_MODE (X) == DImode ? \
COSTS_N_INSNS (68) : COSTS_N_INSNS (37)); \
return COSTS_N_INSNS (25); \
/* Make FLOAT and FIX more expensive than CONST_DOUBLE,\
so that cse will favor the latter. */ \
@ -2723,9 +2729,28 @@ extern struct rtx_def *legitimize_pic_address ();
if (sparc_cpu == PROCESSOR_SUPERSPARC) \
(COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST); \
else if (sparc_cpu == PROCESSOR_ULTRASPARC) \
(COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST); \
(COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, \
last_scheduled_insn, COST);\
else
extern void ultrasparc_sched_reorder ();
extern void ultrasparc_sched_init ();
extern int ultrasparc_variable_issue ();
#define MD_SCHED_INIT(DUMP, SCHED_VERBOSE) \
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
ultrasparc_sched_init (DUMP, SCHED_VERBOSE)
#define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY) \
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
ultrasparc_sched_reorder (DUMP, SCHED_VERBOSE, READY, N_READY)
#define MD_SCHED_VARIABLE_ISSUE(DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE) \
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
(CAN_ISSUE_MORE) = ultrasparc_variable_issue (INSN); \
else \
(CAN_ISSUE_MORE)--
/* Conditional branches with empty delay slots have a length of two. */
#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
if (GET_CODE (INSN) == CALL_INSN \

View File

@ -398,9 +398,9 @@
(eq_attr "type" "store,fpstore"))
1 1)
(define_function_unit "ieu_unnamed" 2 0
(define_function_unit "ieuN" 2 0
(and (eq_attr "cpu" "ultrasparc")
(eq_attr "type" "ialu,binary,move,unary,shift,cmove,compare,call"))
(eq_attr "type" "ialu,binary,move,unary,shift,compare,call,call_no_delay_slot,uncond_branch"))
1 1)
(define_function_unit "ieu0" 1 0
@ -415,7 +415,12 @@
(define_function_unit "ieu1" 1 0
(and (eq_attr "cpu" "ultrasparc")
(eq_attr "type" "compare,call,uncond_branch"))
(eq_attr "type" "compare,call,call_no_delay_slot,uncond_branch"))
1 1)
(define_function_unit "cti" 1 0
(and (eq_attr "cpu" "ultrasparc")
(eq_attr "type" "branch"))
1 1)
;; Timings; throughput/latency
@ -1416,7 +1421,7 @@
(match_operand:SI 1 "arith_operand" "rI")))]
"! TARGET_LIVE_G0"
"subx\\t%%g0, %1, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*neg_sltu_plus_x"
@ -1425,7 +1430,7 @@
(match_operand:SI 1 "arith_operand" "rI"))))]
"! TARGET_LIVE_G0"
"subx\\t%%g0, %1, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*sgeu_insn"
@ -1454,7 +1459,7 @@
(match_operand:SI 1 "arith_operand" "rI")))]
"! TARGET_LIVE_G0"
"addx\\t%%g0, %1, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*sltu_plus_x_plus_y"
@ -1464,7 +1469,7 @@
(match_operand:SI 2 "arith_operand" "rI"))))]
""
"addx\\t%1, %2, %0"
[(set_attr "type" "binary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*x_minus_sltu"
@ -1473,7 +1478,7 @@
(ltu:SI (reg:CC 100) (const_int 0))))]
""
"subx\\t%1, 0, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
;; ??? Combine should canonicalize these next two to the same pattern.
@ -1484,7 +1489,7 @@
(ltu:SI (reg:CC 100) (const_int 0))))]
""
"subx\\t%r1, %2, %0"
[(set_attr "type" "binary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*x_minus_sltu_plus_y"
@ -1494,7 +1499,7 @@
(match_operand:SI 2 "arith_operand" "rI"))))]
""
"subx\\t%r1, %2, %0"
[(set_attr "type" "binary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*sgeu_plus_x"
@ -1503,7 +1508,7 @@
(match_operand:SI 1 "register_operand" "r")))]
""
"subx\\t%1, -1, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*x_minus_sgeu"
@ -1512,7 +1517,7 @@
(geu:SI (reg:CC 100) (const_int 0))))]
""
"addx\\t%1, -1, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_split
@ -2167,7 +2172,8 @@
(unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] 0)))]
"flag_pic"
"or\\t%1, %%lo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "movsi_high_pic"
[(set (match_operand:SI 0 "register_operand" "=r")
@ -2212,7 +2218,8 @@
(match_operand:SI 3 "" "")] 5)))]
"flag_pic"
"or\\t%1, %%lo(%a3-(%a2-.)), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_expand "movdi"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "")
@ -2394,7 +2401,8 @@
(unspec:DI [(match_operand:DI 2 "immediate_operand" "in")] 0)))]
"TARGET_ARCH64 && flag_pic"
"or\\t%1, %%lo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "*pic_sethi_di"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2426,7 +2434,8 @@
(match_operand:DI 2 "symbolic_operand" "")))]
"TARGET_CM_MEDLOW"
"or\\t%1, %%lo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "seth44"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2451,7 +2460,8 @@
(match_operand:DI 2 "symbolic_operand" "")))]
"TARGET_CM_MEDMID"
"or\\t%1, %%l44(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "sethh"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2475,7 +2485,8 @@
(unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] 18)))]
"TARGET_CM_MEDANY"
"or\\t%1, %%hm(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "setlo"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2483,7 +2494,8 @@
(match_operand:DI 2 "symbolic_operand" "")))]
"TARGET_CM_MEDANY"
"or\\t%1, %%lo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "embmedany_sethi"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2499,7 +2511,8 @@
(match_operand:DI 2 "data_segment_operand" "")))]
"TARGET_CM_EMBMEDANY"
"add\\t%1, %%lo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "embmedany_brsum"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2530,7 +2543,8 @@
(unspec:DI [(match_operand:DI 2 "text_segment_operand" "")] 15)))]
"TARGET_CM_EMBMEDANY"
"or\\t%1, %%ulo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_insn "embmedany_textlo"
[(set (match_operand:DI 0 "register_operand" "=r")
@ -2538,7 +2552,8 @@
(match_operand:DI 2 "text_segment_operand" "")))]
"TARGET_CM_EMBMEDANY"
"or\\t%1, %%lo(%a2), %0"
[(set_attr "length" "1")])
[(set_attr "type" "ialu")
(set_attr "length" "1")])
;; Now some patterns to help reload out a bit.
(define_expand "reload_indi"
@ -3797,7 +3812,7 @@
"@
srl\\t%1, 0, %0
lduw\\t%1, %0"
[(set_attr "type" "unary,load")
[(set_attr "type" "shift,load")
(set_attr "length" "1")])
(define_insn "*zero_extendsidi2_insn_sp32"
@ -4090,7 +4105,7 @@
(sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
"TARGET_ARCH64"
"ldsh\\t%1, %0"
[(set_attr "type" "load")
[(set_attr "type" "sload")
(set_attr "length" "1")])
(define_expand "extendsidi2"
@ -4106,7 +4121,7 @@
"@
sra\\t%1, 0, %0
ldsw\\t%1, %0"
[(set_attr "type" "unary,sload")
[(set_attr "type" "shift,sload")
(set_attr "length" "1")])
;; Special pattern for optimizing bit-field compares. This is needed
@ -4469,7 +4484,7 @@
(ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
"TARGET_ARCH64"
"addx\\t%r1, %2, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "subx"
@ -4479,7 +4494,7 @@
(ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
""
"subx\\t%r1, %2, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*subx_extend_sp64"
@ -4489,7 +4504,7 @@
(ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
"TARGET_ARCH64"
"subx\\t%r1, %2, %0"
[(set_attr "type" "unary")
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "*subx_extend"
@ -4547,7 +4562,9 @@
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))]
"TARGET_ARCH64"
"add\\t%1, %2, %0")
"add\\t%1, %2, %0"
[(set_attr "type" "binary")
(set_attr "length" "1")])
(define_insn "addsi3"
[(set (match_operand:SI 0 "register_operand" "=r,d")
@ -7016,7 +7033,9 @@
(define_insn "nop"
[(const_int 0)]
""
"nop")
"nop"
[(set_attr "type" "ialu")
(set_attr "length" "1")])
(define_expand "indirect_jump"
[(set (pc) (match_operand 0 "address_operand" "p"))]
@ -7096,7 +7115,8 @@
[(unspec_volatile [(const_int 0)] 1)]
""
"* return TARGET_V9 ? \"flushw\" : \"ta\\t3\";"
[(set_attr "type" "misc")])
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn "goto_handler_and_restore"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "=r")] 2)]
@ -7162,7 +7182,8 @@
[(unspec_volatile [(match_operand 0 "memory_operand" "m")] 4)]
""
"* return TARGET_V9 ? \"flush\\t%f0\" : \"iflush\\t%f0\";"
[(set_attr "type" "misc")])
[(set_attr "type" "misc")
(set_attr "length" "1")])
;; find first set.
@ -7527,7 +7548,8 @@
[(trap_if (const_int 1) (const_int 5))]
""
"ta\\t5"
[(set_attr "type" "misc")])
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_expand "conditional_trap"
[(trap_if (match_operator 0 "noov_compare_op"
@ -7543,12 +7565,14 @@
(match_operand:SI 1 "arith_operand" "rM"))]
""
"t%C0\\t%1"
[(set_attr "type" "misc")])
[(set_attr "type" "misc")
(set_attr "length" "1")])
(define_insn ""
[(trap_if (match_operator 0 "noov_compare_op" [(reg:CCX 100) (const_int 0)])
(match_operand:SI 1 "arith_operand" "rM"))]
"TARGET_V9"
"t%C0\\t%%xcc, %1"
[(set_attr "type" "misc")])
[(set_attr "type" "misc")
(set_attr "length" "1")])