sparc.c (sparc_override_options): Make v8plus and ultrasparc set MASK_V8PLUS.

Fri Jan 30 22:30:39 1998  John Carr  <jfc@mit.edu>
        * sparc.c (sparc_override_options): Make v8plus and ultrasparc set
        MASK_V8PLUS.
        (output_function_epilogue): Omit epilogue if nothing drops through.
        (output_move_double): Supress int ldd usage on ultrasparc and v9.
        (registers_ok_for_ldd_peep): Likewise.
        (print_operand): Supress b,a on ultrasparc.  Let Y accept a constant.
        (ultrasparc_adjust_cost): New function.
        (sparc_issue_rate): New function.
        * sparc.h (MASK_VIS, TARGET_VIS): New
        (MASK_V8PLUS, TARGET_V8PLUS): New.
        (TARGET_HARD_MUL32, TARGET_HARD_MUL): New.
        (TARGET_SWITCHES): Add vis and v8plus.
        (REG_CLASS_FROM_LETTER): Accept d and b for VIS.
        (REGISTER_MOVE_COST): FP<->INT move cost 12 for ultrasparc.
        (RTX_COSTS): Use TARGET_HARD_MUL
        (ADJUST_COST): Call ultrasparc_adjust_cost.
        (ISSUE_RATE): New.
        * sparc.md (attr type): Add sload, fpmove, fpcmove.  Adjust users
        of load & fp appropritely.
        (supersparc function units): Adjust for Haifa.
        (ultrasparc function units): Likewise.
        (get_pc_via_rdpc): All v9, not just arch64.
        (movdi_v8plus, movdi_v8plus+1): New.
        (adddi3_sp32+1): New.
        (subdi3_sp32+1): New.
        (movsi_insn, movsf_const_insn, movdf_const_insn): Know VIS.
        (addsi3, subsi3, anddi3_sp32, andsi3, and_not_di_sp32): Likewise.
        (and_not_si, iordi3_sp32, iorsi3, or_not_di_sp32, or_not_si): Likewise.
        (xorsi3_sp32, xorsi3, xor_not_di_sp32, xor_not_si): Likewise.
        (one_cmpldi2_sp32, one_cmplsi2): Likewise.
        (ldd peepholes): Suppress for v9.
        (return_adddi): Kill redundant test.  Arg1 may be arith_operand.
        (return_subsi): Revmove.

From-SVN: r17560
This commit is contained in:
John Carr 1998-01-30 23:34:15 +00:00 committed by Richard Henderson
parent 4b526a9a94
commit bfd6bc60f5
4 changed files with 703 additions and 239 deletions

View File

@ -1,3 +1,40 @@
Fri Jan 30 22:30:39 1998 John Carr <jfc@mit.edu>
* sparc.c (sparc_override_options): Make v8plus and ultrasparc set
MASK_V8PLUS.
(output_function_epilogue): Omit epilogue if nothing drops through.
(output_move_double): Supress int ldd usage on ultrasparc and v9.
(registers_ok_for_ldd_peep): Likewise.
(print_operand): Supress b,a on ultrasparc. Let Y accept a constant.
(ultrasparc_adjust_cost): New function.
(sparc_issue_rate): New function.
* sparc.h (MASK_VIS, TARGET_VIS): New
(MASK_V8PLUS, TARGET_V8PLUS): New.
(TARGET_HARD_MUL32, TARGET_HARD_MUL): New.
(TARGET_SWITCHES): Add vis and v8plus.
(REG_CLASS_FROM_LETTER): Accept d and b for VIS.
(REGISTER_MOVE_COST): FP<->INT move cost 12 for ultrasparc.
(RTX_COSTS): Use TARGET_HARD_MUL
(ADJUST_COST): Call ultrasparc_adjust_cost.
(ISSUE_RATE): New.
* sparc.md (attr type): Add sload, fpmove, fpcmove. Adjust users
of load & fp appropritely.
(supersparc function units): Adjust for Haifa.
(ultrasparc function units): Likewise.
(get_pc_via_rdpc): All v9, not just arch64.
(movdi_v8plus, movdi_v8plus+1): New.
(adddi3_sp32+1): New.
(subdi3_sp32+1): New.
(movsi_insn, movsf_const_insn, movdf_const_insn): Know VIS.
(addsi3, subsi3, anddi3_sp32, andsi3, and_not_di_sp32): Likewise.
(and_not_si, iordi3_sp32, iorsi3, or_not_di_sp32, or_not_si): Likewise.
(xorsi3_sp32, xorsi3, xor_not_di_sp32, xor_not_si): Likewise.
(one_cmpldi2_sp32, one_cmplsi2): Likewise.
(ldd peepholes): Suppress for v9.
(return_adddi): Kill redundant test. Arg1 may be arith_operand.
(return_subsi): Revmove.
Fri Jan 30 18:30:03 1998 John F Carr <jfc@mit.edu>
* mips.c (save_restore_insns): Set RTX_UNCHANGING_P in register

View File

@ -209,10 +209,10 @@ sparc_override_options ()
/* TEMIC sparclet */
{ "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
/* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's. */
{ "v8plus", PROCESSOR_V8PLUS, MASK_ISA, MASK_V9 },
{ "v8plus", PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS },
{ "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
/* TI ultrasparc */
{ "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
{ "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS },
{ 0 }
};
struct cpu_table *cpu;
@ -379,6 +379,7 @@ v9_regcmp_p (code)
return (code == EQ || code == NE || code == GE || code == LT
|| code == LE || code == GT);
}
/* Operand constraints. */
@ -1257,7 +1258,7 @@ eligible_for_epilogue_delay (trial, slot)
src = SET_SRC (pat);
/* This matches "*return_[qhs]". */
/* This matches "*return_[qhs]i". */
if (arith_operand (src, GET_MODE (src)))
return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
@ -2009,13 +2010,26 @@ output_move_double (operands)
/* In v9, ldd can be used for word aligned addresses, so technically
some of this logic is unneeded. We still avoid ldd if the address
is obviously unaligned though. */
is obviously unaligned though.
if (mem_aligned_8 (mem)
Integer ldd/std are deprecated in V9 and are slow on UltraSPARC.
Use them only if the access is volatile or not offsettable. */
if ((mem_aligned_8 (mem)
&& (REGNO (reg) >= 32
|| MEM_VOLATILE_P (mem)
|| ! ((optype0 == OFFSOP || optype1 == OFFSOP)
&& (sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_V9))))
/* If this is a floating point register higher than %f31,
then we *must* use an aligned load, since `ld' will not accept
the register number. */
|| (TARGET_V9 && REGNO (reg) >= 64))
|| (TARGET_V9 && REGNO (reg) >= 64)
/* Even if two instructions would otherwise be better than ldd/std,
if this insn was put in a delay slot because reorg thought it
was only one machine instruction, make sure it is only one
instruction. */
|| dbr_sequence_length () != 0)
{
if (FP_REG_P (reg) || ! TARGET_ARCH64)
return (mem == op1 ? "ldd %1,%0" : "std %1,%0");
@ -3504,6 +3518,16 @@ output_function_epilogue (file, size, leaf_function)
}
#endif
else if (current_function_epilogue_delay_list == 0)
{
/* If code does not drop into the epilogue, do nothing. */
rtx insn = get_last_insn ();
if (GET_CODE (insn) == NOTE)
insn = prev_nonnote_insn (insn);
if (insn && GET_CODE (insn) == BARRIER)
return;
}
/* Restore any call saved registers. */
if (num_gfregs)
{
@ -4631,8 +4655,7 @@ order_regs_for_local_alloc ()
/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
This makes them candidates for using ldd and std insns.
Note reg1 and reg2 *must* be hard registers. To be sure we will
abort if we are passed pseudo registers. */
Note reg1 and reg2 *must* be hard registers. */
int
registers_ok_for_ldd_peep (reg1, reg2)
@ -4645,6 +4668,10 @@ registers_ok_for_ldd_peep (reg1, reg2)
if (REGNO (reg1) % 2 != 0)
return 0;
/* Integer ldd is deprecated in SPARC V9 */
if (TARGET_V9 && REGNO (reg1) < 32)
return 0;
return (REGNO (reg1) == REGNO (reg2) - 1);
}
@ -4762,13 +4789,17 @@ print_operand (file, x, code)
are optimizing. This is always used with '(' below. */
/* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
this is a dbx bug. So, we only do this when optimizing. */
if (dbr_sequence_length () == 0 && optimize)
/* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
Always emit a nop in case the next instruction is a branch. */
if (dbr_sequence_length () == 0
&& (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
fputs (",a", file);
return;
case '(':
/* Output a 'nop' if there's nothing for the delay slot and we are
not optimizing. This is always used with '*' above. */
if (dbr_sequence_length () == 0 && ! optimize)
if (dbr_sequence_length () == 0
&& ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
fputs ("\n\tnop", file);
return;
case '_':
@ -4783,7 +4814,9 @@ print_operand (file, x, code)
return;
case 'Y':
/* Adjust the operand to take into account a RESTORE operation. */
if (GET_CODE (x) != REG)
if (GET_CODE (x) == CONST_INT)
break;
else if (GET_CODE (x) != REG)
output_operand_lossage ("Invalid %%Y operand");
else if (REGNO (x) < 8)
fputs (reg_names[REGNO (x)], file);
@ -6022,3 +6055,150 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
return cost;
}
int
ultrasparc_adjust_cost (insn, link, dep_insn, cost)
rtx insn;
rtx link;
rtx dep_insn;
int cost;
{
enum attr_type insn_type, dep_type;
rtx pat = PATTERN(insn);
rtx dep_pat = PATTERN (dep_insn);
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
return cost;
insn_type = get_attr_type (insn);
dep_type = get_attr_type (dep_insn);
#define SLOW_FP(dep_type) \
(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
switch (REG_NOTE_KIND (link))
{
case 0:
/* Data dependency; DEP_INSN writes a register that INSN reads some
cycles later. */
switch (insn_type)
{
/* UltraSPARC can dual issue a store and an instruction setting
the value stored, except for divide and square root. */
case TYPE_FPSTORE:
if (! SLOW_FP (dep_type))
return 0;
break;
case TYPE_STORE:
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
return cost;
/* The dependency between the two instructions is on the data
that is being stored. Assume that the address of the store
is not also dependent. */
if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
return 0;
return cost;
case TYPE_LOAD:
case TYPE_SLOAD:
case TYPE_FPLOAD:
/* A load does not return data until at least 11 cycles after
a store to the same location. 3 cycles are accounted for
in the load latency; add the other 8 here. */
if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
{
/* If the addresses are not equal this may be a false
dependency because pointer aliasing could not be
determined. Add only 2 cycles in that case. 2 is
an arbitrary compromise between 8, which would cause
the scheduler to generate worse code elsewhere to
compensate for a dependency which might not really
exist, and 0. */
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
|| GET_CODE (SET_DEST (pat)) != MEM
|| GET_CODE (SET_SRC (dep_pat)) != MEM
|| ! rtx_equal_p (XEXP (SET_DEST (pat), 0),
XEXP (SET_SRC (dep_pat), 0)))
return cost + 2;
return cost + 8;
}
break;
case TYPE_BRANCH:
/* Compare to branch latency is 0. There is no benefit from
separating compare and branch. */
if (dep_type == TYPE_COMPARE)
return 0;
/* Floating point compare to branch latency is less than
compare to conditional move. */
if (dep_type == TYPE_FPCMP)
return cost - 1;
break;
case TYPE_FPCMOVE:
/* FMOVR class instructions can not issue in the same cycle
or the cycle after an instruction which writes any
integer register. Model this as cost 2 for dependent
instructions. */
if (GET_CODE (PATTERN (insn)) == SET
&& (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode
|| GET_MODE (SET_DEST (PATTERN (insn))) == DFmode)
&& cost < 2)
return 2;
/* Otherwise check as for integer conditional moves. */
case TYPE_CMOVE:
/* Conditional moves involving integer registers wait until
3 cycles after loads return data. The interlock applies
to all loads, not just dependent loads, but that is hard
to model. */
if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
return cost + 3;
break;
}
break;
case REG_DEP_ANTI:
/* Divide and square root lock destination registers for full latency. */
if (! SLOW_FP (dep_type))
return 0;
break;
}
/* Other costs not accounted for:
- Multiply should be modeled as having no latency because there is
nothing the scheduler can do about it.
- Single precision floating point loads lock the other half of
the even/odd register pair.
- Several hazards associated with ldd/std are ignored because these
instructions are rarely generated for V9.
- A shift following an integer instruction which does not set the
condition codes can not issue in the same cycle.
- The floating point pipeline can not have both a single and double
precision operation active at the same time. Format conversions
and graphics instructions are given honorary double precision status.
- call and jmpl are always the first instruction in a group. */
return cost;
}
int
sparc_issue_rate ()
{
switch (sparc_cpu)
{
default:
return 1;
case PROCESSOR_V8PLUS:
case PROCESSOR_V9:
/* Assume these generic V9 types are capable of at least dual-issue. */
return 2;
case PROCESSOR_SUPERSPARC:
return 3;
case PROCESSOR_ULTRASPARC:
return 4;
}
}

View File

@ -449,6 +449,27 @@ extern int target_flags;
#define MASK_FPU_SET 0x400000
#define TARGET_FPU_SET (target_flags & MASK_FPU_SET)
/* Use the UltraSPARC Visual Instruction Set extensions. */
#define MASK_VIS 0x1000000
#define TARGET_VIS (target_flags & MASK_VIS)
/* Compile for Solaris V8+. 64 bit instructions are available but the
high 32 bits of all registers except the globals and current outs may
be cleared at any time. */
#define MASK_V8PLUS 0x2000000
#define TARGET_V8PLUS (target_flags & MASK_V8PLUS)
/* See sparc.md */
#define TARGET_HARD_MUL32 \
((TARGET_V8 || TARGET_SPARCLITE \
|| TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS) \
&& ! TARGET_V8PLUS)
#define TARGET_HARD_MUL \
(TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET \
|| TARGET_DEPRECATED_V8_INSNS || TARGET_V8PLUS)
/* Macro to define tables used to set the flags.
This is a list in braces of pairs in braces,
each pair being { "NAME", VALUE }
@ -474,12 +495,14 @@ extern int target_flags;
{"no-app-regs", -MASK_APP_REGS}, \
{"hard-quad-float", MASK_HARD_QUAD}, \
{"soft-quad-float", -MASK_HARD_QUAD}, \
{"vis", MASK_VIS}, \
/* ??? These are deprecated, coerced to -mcpu=. Delete in 2.9. */ \
{"cypress", 0}, \
{"sparclite", 0}, \
{"f930", 0}, \
{"f934", 0}, \
{"v8", 0}, \
{"v8plus", 0}, \
{"supersparc", 0}, \
/* End of deprecated options. */ \
/* -mptrNN exists for *experimental* purposes. */ \
@ -1242,17 +1265,20 @@ extern char leaf_reg_remap[];
/* Get reg_class from a letter such as appears in the machine description.
In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the
.md file for v8 and v9. */
.md file for v8 and v9.
Use 'd' and 'b' for single precision VIS operations if TARGET_VIS. */
#define REG_CLASS_FROM_LETTER(C) \
(TARGET_V9 \
? ((C) == 'f' ? FP_REGS \
: (C) == 'e' ? EXTRA_FP_REGS \
: (C) == 'c' ? FPCC_REGS \
: NO_REGS) \
: ((C) == 'f' ? FP_REGS \
: (C) == 'e' ? FP_REGS \
: (C) == 'c' ? FPCC_REGS \
#define REG_CLASS_FROM_LETTER(C) \
(TARGET_V9 \
? ((C) == 'f' ? FP_REGS \
: (C) == 'e' ? EXTRA_FP_REGS \
: (C) == 'c' ? FPCC_REGS \
: ((C) == 'd' && TARGET_VIS) ? FP_REGS \
: ((C) == 'b' && TARGET_VIS) ? FP_REGS \
: NO_REGS) \
: ((C) == 'f' ? FP_REGS \
: (C) == 'e' ? FP_REGS \
: (C) == 'c' ? FPCC_REGS \
: NO_REGS))
/* The letters I, J, K, L and M in a register constraint string
@ -2683,11 +2709,13 @@ extern struct rtx_def *legitimize_pic_address ();
#define ADDRESS_COST(RTX) 1
/* Compute extra cost of moving data between one register class
and another.
??? v9: We ignore FPCC_REGS on the assumption they'll never be seen. */
#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
(((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS) \
|| ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))) ? 6 : 2)
and another. */
#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
(((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS) \
|| ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2)) \
|| (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS) \
? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) \
: 2)
/* Provide the costs of a rtl expression. This is in the body of a
switch on CODE. The purpose for the cost of MULT is to encourage
@ -2698,8 +2726,7 @@ extern struct rtx_def *legitimize_pic_address ();
#define RTX_COSTS(X,CODE,OUTER_CODE) \
case MULT: \
return (TARGET_V8 || TARGET_SPARCLITE) \
? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
case DIV: \
case UDIV: \
case MOD: \
@ -2711,16 +2738,24 @@ extern struct rtx_def *legitimize_pic_address ();
case FIX: \
return 19;
#define ISSUE_RATE sparc_issue_rate()
/* Adjust the cost of dependencies. */
#define ADJUST_COST(INSN,LINK,DEP,COST) \
if (sparc_cpu == PROCESSOR_SUPERSPARC) \
(COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST)
#define ADJUST_COST(INSN,LINK,DEP,COST) \
do { \
if (sparc_cpu == PROCESSOR_SUPERSPARC) \
(COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST); \
else if (sparc_cpu == PROCESSOR_ULTRASPARC) \
(COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST); \
} while (0)
/* Conditional branches with empty delay slots have a length of two. */
#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
do { \
if (GET_CODE (INSN) == CALL_INSN \
|| (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn))) \
LENGTH += 1;
LENGTH += 1; \
} while (0)
/* Control the assembler format that we output. */

File diff suppressed because it is too large Load Diff