sparc.c (sparc_override_options): Make v8plus and ultrasparc set MASK_V8PLUS.

Fri Jan 30 22:30:39 1998 John Carr <jfc@mit.edu> * sparc.c (sparc_override_options): Make v8plus and ultrasparc set MASK_V8PLUS. (output_function_epilogue): Omit epilogue if nothing drops through. (output_move_double): Supress int ldd usage on ultrasparc and v9. (registers_ok_for_ldd_peep): Likewise. (print_operand): Supress b,a on ultrasparc. Let Y accept a constant. (ultrasparc_adjust_cost): New function. (sparc_issue_rate): New function. * sparc.h (MASK_VIS, TARGET_VIS): New (MASK_V8PLUS, TARGET_V8PLUS): New. (TARGET_HARD_MUL32, TARGET_HARD_MUL): New. (TARGET_SWITCHES): Add vis and v8plus. (REG_CLASS_FROM_LETTER): Accept d and b for VIS. (REGISTER_MOVE_COST): FP<->INT move cost 12 for ultrasparc. (RTX_COSTS): Use TARGET_HARD_MUL (ADJUST_COST): Call ultrasparc_adjust_cost. (ISSUE_RATE): New. * sparc.md (attr type): Add sload, fpmove, fpcmove. Adjust users of load & fp appropritely. (supersparc function units): Adjust for Haifa. (ultrasparc function units): Likewise. (get_pc_via_rdpc): All v9, not just arch64. (movdi_v8plus, movdi_v8plus+1): New. (adddi3_sp32+1): New. (subdi3_sp32+1): New. (movsi_insn, movsf_const_insn, movdf_const_insn): Know VIS. (addsi3, subsi3, anddi3_sp32, andsi3, and_not_di_sp32): Likewise. (and_not_si, iordi3_sp32, iorsi3, or_not_di_sp32, or_not_si): Likewise. (xorsi3_sp32, xorsi3, xor_not_di_sp32, xor_not_si): Likewise. (one_cmpldi2_sp32, one_cmplsi2): Likewise. (ldd peepholes): Suppress for v9. (return_adddi): Kill redundant test. Arg1 may be arith_operand. (return_subsi): Revmove. From-SVN: r17560
1998-01-30 23:34:15 +00:00 · 1998-01-30 23:34:15 +00:00 · bfd6bc60f5
parent 4b526a9a94
commit bfd6bc60f5
4 changed files with 703 additions and 239 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,40 @@
+Fri Jan 30 22:30:39 1998  John Carr  <jfc@mit.edu>
+
+	* sparc.c (sparc_override_options): Make v8plus and ultrasparc set
+	MASK_V8PLUS.
+	(output_function_epilogue): Omit epilogue if nothing drops through.
+	(output_move_double): Supress int ldd usage on ultrasparc and v9.
+	(registers_ok_for_ldd_peep): Likewise.
+	(print_operand): Supress b,a on ultrasparc.  Let Y accept a constant.
+	(ultrasparc_adjust_cost): New function.
+	(sparc_issue_rate): New function.
+	* sparc.h (MASK_VIS, TARGET_VIS): New
+	(MASK_V8PLUS, TARGET_V8PLUS): New.
+	(TARGET_HARD_MUL32, TARGET_HARD_MUL): New.
+	(TARGET_SWITCHES): Add vis and v8plus.
+	(REG_CLASS_FROM_LETTER): Accept d and b for VIS.
+	(REGISTER_MOVE_COST): FP<->INT move cost 12 for ultrasparc.
+	(RTX_COSTS): Use TARGET_HARD_MUL
+	(ADJUST_COST): Call ultrasparc_adjust_cost.
+	(ISSUE_RATE): New.
+	* sparc.md (attr type): Add sload, fpmove, fpcmove.  Adjust users
+	of load & fp appropritely.
+	(supersparc function units): Adjust for Haifa.
+	(ultrasparc function units): Likewise.
+	(get_pc_via_rdpc): All v9, not just arch64.
+	(movdi_v8plus, movdi_v8plus+1): New.
+	(adddi3_sp32+1): New.
+	(subdi3_sp32+1): New.
+	(movsi_insn, movsf_const_insn, movdf_const_insn): Know VIS.
+	(addsi3, subsi3, anddi3_sp32, andsi3, and_not_di_sp32): Likewise.
+	(and_not_si, iordi3_sp32, iorsi3, or_not_di_sp32, or_not_si): Likewise.
+	(xorsi3_sp32, xorsi3, xor_not_di_sp32, xor_not_si): Likewise.
+	(one_cmpldi2_sp32, one_cmplsi2): Likewise.
+	(ldd peepholes): Suppress for v9.
+	(return_adddi): Kill redundant test.  Arg1 may be arith_operand.
+	(return_subsi): Revmove.
+
+
 Fri Jan 30 18:30:03 1998  John F Carr  <jfc@mit.edu>

 	* mips.c (save_restore_insns): Set RTX_UNCHANGING_P in register
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@ -209,10 +209,10 @@ sparc_override_options ()
    /* TEMIC sparclet */
    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
    /* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's.  */
-    { "v8plus",     PROCESSOR_V8PLUS, MASK_ISA, MASK_V9 },
+    { "v8plus",     PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS },
    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
    /* TI ultrasparc */
-    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
+    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS },
    { 0 }
  };
  struct cpu_table *cpu;
@ -379,6 +379,7 @@ v9_regcmp_p (code)
  return (code == EQ || code == NE || code == GE || code == LT
 	  || code == LE || code == GT);
 }
+

 /* Operand constraints.  */

@ -1257,7 +1258,7 @@ eligible_for_epilogue_delay (trial, slot)

  src = SET_SRC (pat);

-  /* This matches "*return_[qhs]".  */
+  /* This matches "*return_[qhs]i".  */
  if (arith_operand (src, GET_MODE (src)))
    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
    
@ -2009,13 +2010,26 @@ output_move_double (operands)

      /* In v9, ldd can be used for word aligned addresses, so technically
 	 some of this logic is unneeded.  We still avoid ldd if the address
-	 is obviously unaligned though.  */
+	 is obviously unaligned though.

-      if (mem_aligned_8 (mem)
+	 Integer ldd/std are deprecated in V9 and are slow on UltraSPARC.
+	 Use them only if the access is volatile or not offsettable.  */
+
+      if ((mem_aligned_8 (mem)
+	   && (REGNO (reg) >= 32
+	       || MEM_VOLATILE_P (mem)
+	       || ! ((optype0 == OFFSOP || optype1 == OFFSOP)
+		     && (sparc_cpu == PROCESSOR_ULTRASPARC
+			 || sparc_cpu == PROCESSOR_V9))))
 	  /* If this is a floating point register higher than %f31,
 	     then we *must* use an aligned load, since `ld' will not accept
 	     the register number.  */
-	  || (TARGET_V9 && REGNO (reg) >= 64))
+	  || (TARGET_V9 && REGNO (reg) >= 64)
+	  /* Even if two instructions would otherwise be better than ldd/std,
+	     if this insn was put in a delay slot because reorg thought it
+	     was only one machine instruction, make sure it is only one
+	     instruction.  */
+	  || dbr_sequence_length () != 0)
 	{
 	  if (FP_REG_P (reg) || ! TARGET_ARCH64)
 	    return (mem == op1 ? "ldd %1,%0" : "std %1,%0");
@ -3504,6 +3518,16 @@ output_function_epilogue (file, size, leaf_function)
    }
 #endif

+  else if (current_function_epilogue_delay_list == 0)
+    {                                                
+      /* If code does not drop into the epilogue, do nothing.  */
+      rtx insn = get_last_insn ();                               
+      if (GET_CODE (insn) == NOTE)                               
+      insn = prev_nonnote_insn (insn);                           
+      if (insn && GET_CODE (insn) == BARRIER)                    
+      return;                                                    
+    }
+
  /* Restore any call saved registers.  */
  if (num_gfregs)
    {
@ -4631,8 +4655,7 @@ order_regs_for_local_alloc ()
 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
   This makes them candidates for using ldd and std insns. 

-   Note reg1 and reg2 *must* be hard registers.  To be sure we will
-   abort if we are passed pseudo registers.  */
+   Note reg1 and reg2 *must* be hard registers.  */

 int
 registers_ok_for_ldd_peep (reg1, reg2)
@ -4645,6 +4668,10 @@ registers_ok_for_ldd_peep (reg1, reg2)
  if (REGNO (reg1) % 2 != 0)
    return 0;

+  /* Integer ldd is deprecated in SPARC V9 */ 
+  if (TARGET_V9 && REGNO (reg1) < 32)                  
+    return 0;                             
+
  return (REGNO (reg1) == REGNO (reg2) - 1);
 }

@ -4762,13 +4789,17 @@ print_operand (file, x, code)
 	 are optimizing.  This is always used with '(' below.  */
      /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
 	 this is a dbx bug.  So, we only do this when optimizing.  */
-      if (dbr_sequence_length () == 0 && optimize)
+      /* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
+	 Always emit a nop in case the next instruction is a branch.  */
+      if (dbr_sequence_length () == 0
+	  && (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
 	fputs (",a", file);
      return;
    case '(':
      /* Output a 'nop' if there's nothing for the delay slot and we are
 	 not optimizing.  This is always used with '*' above.  */
-      if (dbr_sequence_length () == 0 && ! optimize)
+      if (dbr_sequence_length () == 0
+	  && ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
 	fputs ("\n\tnop", file);
      return;
    case '_':
@ -4783,7 +4814,9 @@ print_operand (file, x, code)
      return;
    case 'Y':
      /* Adjust the operand to take into account a RESTORE operation.  */
-      if (GET_CODE (x) != REG)
+      if (GET_CODE (x) == CONST_INT)
+	break;
+      else if (GET_CODE (x) != REG)
 	output_operand_lossage ("Invalid %%Y operand");
      else if (REGNO (x) < 8)
 	fputs (reg_names[REGNO (x)], file);
@ -6022,3 +6055,150 @@ supersparc_adjust_cost (insn, link, dep_insn, cost)
 	
  return cost;
 }
+
+int
+ultrasparc_adjust_cost (insn, link, dep_insn, cost)
+     rtx insn;                                     
+     rtx link;                                     
+     rtx dep_insn;                                     
+     int cost;                                     
+{
+  enum attr_type insn_type, dep_type;
+  rtx pat = PATTERN(insn);                                                    
+  rtx dep_pat = PATTERN (dep_insn);                                           
+
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)        
+    return cost;                                     
+
+  insn_type = get_attr_type (insn);                     
+  dep_type = get_attr_type (dep_insn);                  
+
+#define SLOW_FP(dep_type) \
+(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)   
+  switch (REG_NOTE_KIND (link))
+    {                                              
+    case 0:                                        
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */                               
+
+      switch (insn_type)
+	{                              
+	  /* UltraSPARC can dual issue a store and an instruction setting       
+	     the value stored, except for divide and square root.  */           
+	case TYPE_FPSTORE:
+	  if (! SLOW_FP (dep_type))        
+	    return 0;                                     
+	  break;
+
+	case TYPE_STORE:                                  
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;     
+
+	  /* The dependency between the two instructions is on the data
+	     that is being stored.  Assume that the address of the store
+	     is not also dependent.  */
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return 0;                                
+	  return cost;                                   
+
+	case TYPE_LOAD:   
+	case TYPE_SLOAD:               
+	case TYPE_FPLOAD:                                                       
+	  /* A load does not return data until at least 11 cycles after         
+	     a store to the same location.  3 cycles are accounted for
+	     in the load latency; add the other 8 here.  */
+	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+	    {   
+	      /* If the addresses are not equal this may be a false
+		 dependency because pointer aliasing could not be
+		 determined.  Add only 2 cycles in that case.  2 is
+		 an arbitrary compromise between 8, which would cause
+		 the scheduler to generate worse code elsewhere to
+		 compensate for a dependency which might not really    
+		 exist, and 0.  */                                      
+	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+		  || GET_CODE (SET_DEST (pat)) != MEM         
+		  || GET_CODE (SET_SRC (dep_pat)) != MEM
+		  || ! rtx_equal_p (XEXP (SET_DEST (pat), 0),
+				    XEXP (SET_SRC (dep_pat), 0)))
+		return cost + 2;
+
+	      return cost + 8;         
+	    }                                                                   
+	  break;                                                                
+
+	case TYPE_BRANCH:                                  
+	  /* Compare to branch latency is 0.  There is no benefit from
+	     separating compare and branch.  */
+	  if (dep_type == TYPE_COMPARE)                            
+	    return 0;                                            
+	  /* Floating point compare to branch latency is less than 
+	     compare to conditional move.  */                        
+	  if (dep_type == TYPE_FPCMP)                             
+	    return cost - 1;                                           
+	  break;                                                        
+
+	case TYPE_FPCMOVE:                                    
+	  /* FMOVR class instructions can not issue in the same cycle
+	     or the cycle after an instruction which writes any
+	     integer register.  Model this as cost 2 for dependent
+	     instructions.  */  
+	  if (GET_CODE (PATTERN (insn)) == SET
+	      && (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode
+	          || GET_MODE (SET_DEST (PATTERN (insn))) == DFmode)            
+	      && cost < 2)                                                      
+	    return 2;
+	  /* Otherwise check as for integer conditional moves. */
+
+	case TYPE_CMOVE:                       
+	  /* Conditional moves involving integer registers wait until
+	     3 cycles after loads return data.  The interlock applies
+	     to all loads, not just dependent loads, but that is hard
+	     to model.  */                        
+	  if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)                  
+	    return cost + 3;                                           
+	  break;                                                        
+	}
+	break;                                                
+
+    case REG_DEP_ANTI:                                       
+      /* Divide and square root lock destination registers for full latency. */
+      if (! SLOW_FP (dep_type))             
+	return 0;                                               
+      break;                                                                  
+    }
+
+  /* Other costs not accounted for:                            
+     - Multiply should be modeled as having no latency because there is
+       nothing the scheduler can do about it.  
+     - Single precision floating point loads lock the other half of  
+       the even/odd register pair.                                   
+     - Several hazards associated with ldd/std are ignored because these
+       instructions are rarely generated for V9.  
+     - A shift following an integer instruction which does not set the
+       condition codes can not issue in the same cycle.
+     - The floating point pipeline can not have both a single and double
+       precision operation active at the same time.  Format conversions
+       and graphics instructions are given honorary double precision status.
+     - call and jmpl are always the first instruction in a group.  */
+
+  return cost;                                                              
+}  
+
+int                                                           
+sparc_issue_rate ()
+{
+  switch (sparc_cpu)
+    {
+    default:                                 
+      return 1;                                                    
+    case PROCESSOR_V8PLUS:                                         
+    case PROCESSOR_V9:                                                
+      /* Assume these generic V9 types are capable of at least dual-issue.  */
+      return 2;
+    case PROCESSOR_SUPERSPARC:                                        
+      return 3;                                                      
+    case PROCESSOR_ULTRASPARC:                                            
+      return 4;                                                    
+    }
+}
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@ -449,6 +449,27 @@ extern int target_flags;
 #define MASK_FPU_SET 0x400000
 #define TARGET_FPU_SET (target_flags & MASK_FPU_SET)

+/* Use the UltraSPARC Visual Instruction Set extensions.  */
+#define MASK_VIS 0x1000000          
+#define TARGET_VIS (target_flags & MASK_VIS)
+
+/* Compile for Solaris V8+.  64 bit instructions are available but the
+   high 32 bits of all registers except the globals and current outs may
+   be cleared at any time.  */                 
+#define MASK_V8PLUS 0x2000000                 
+#define TARGET_V8PLUS (target_flags & MASK_V8PLUS)                            
+
+/* See sparc.md */
+#define TARGET_HARD_MUL32				\
+  ((TARGET_V8 || TARGET_SPARCLITE			\
+    || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS)	\
+   && ! TARGET_V8PLUS)
+
+#define TARGET_HARD_MUL					\
+  (TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET	\
+   || TARGET_DEPRECATED_V8_INSNS || TARGET_V8PLUS)                        
+
+
 /* Macro to define tables used to set the flags.
   This is a list in braces of pairs in braces,
   each pair being { "NAME", VALUE }
@ -474,12 +495,14 @@ extern int target_flags;
    {"no-app-regs", -MASK_APP_REGS},	\
    {"hard-quad-float", MASK_HARD_QUAD}, \
    {"soft-quad-float", -MASK_HARD_QUAD}, \
+    {"vis", MASK_VIS},			\
    /* ??? These are deprecated, coerced to -mcpu=.  Delete in 2.9.  */ \
    {"cypress", 0},			\
    {"sparclite", 0},			\
    {"f930", 0},			\
    {"f934", 0},			\
    {"v8", 0},				\
+    {"v8plus", 0},			\
    {"supersparc", 0},			\
    /* End of deprecated options.  */	\
    /* -mptrNN exists for *experimental* purposes.  */ \
@ -1242,17 +1265,20 @@ extern char leaf_reg_remap[];

 /* Get reg_class from a letter such as appears in the machine description.
   In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the
-   .md file for v8 and v9.  */
+   .md file for v8 and v9.
+   Use 'd' and 'b' for single precision VIS operations if TARGET_VIS.  */

-#define REG_CLASS_FROM_LETTER(C) \
-(TARGET_V9			\
- ? ((C) == 'f' ? FP_REGS	\
-    : (C) == 'e' ? EXTRA_FP_REGS \
-    : (C) == 'c' ? FPCC_REGS	\
-    : NO_REGS)			\
- : ((C) == 'f' ? FP_REGS	\
-    : (C) == 'e' ? FP_REGS	\
-    : (C) == 'c' ? FPCC_REGS	\
+#define REG_CLASS_FROM_LETTER(C)		\
+(TARGET_V9					\
+ ? ((C) == 'f' ? FP_REGS			\
+    : (C) == 'e' ? EXTRA_FP_REGS 		\
+    : (C) == 'c' ? FPCC_REGS			\
+    : ((C) == 'd' && TARGET_VIS) ? FP_REGS	\
+    : ((C) == 'b' && TARGET_VIS) ? FP_REGS	\
+    : NO_REGS)					\
+ : ((C) == 'f' ? FP_REGS			\
+    : (C) == 'e' ? FP_REGS			\
+    : (C) == 'c' ? FPCC_REGS			\
    : NO_REGS))

 /* The letters I, J, K, L and M in a register constraint string
@ -2683,11 +2709,13 @@ extern struct rtx_def *legitimize_pic_address ();
 #define ADDRESS_COST(RTX)  1

 /* Compute extra cost of moving data between one register class
-   and another.
-   ??? v9: We ignore FPCC_REGS on the assumption they'll never be seen.  */
-#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
-  (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS) \
-    || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))) ? 6 : 2)
+   and another.  */
+#define REGISTER_MOVE_COST(CLASS1, CLASS2)			\
+  (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS)	\
+    || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2))	\
+    || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)		\
+   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6)		\
+   : 2)

 /* Provide the costs of a rtl expression.  This is in the body of a
   switch on CODE.  The purpose for the cost of MULT is to encourage
@ -2698,8 +2726,7 @@ extern struct rtx_def *legitimize_pic_address ();

 #define RTX_COSTS(X,CODE,OUTER_CODE)			\
  case MULT:						\
-    return (TARGET_V8 || TARGET_SPARCLITE)              \
-	? COSTS_N_INSNS (5) : COSTS_N_INSNS (25);	\
+    return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
  case DIV:						\
  case UDIV:						\
  case MOD:						\
@ -2711,16 +2738,24 @@ extern struct rtx_def *legitimize_pic_address ();
  case FIX:						\
    return 19;

+#define ISSUE_RATE  sparc_issue_rate()
+
 /* Adjust the cost of dependencies.  */
-#define ADJUST_COST(INSN,LINK,DEP,COST) \
-  if (sparc_cpu == PROCESSOR_SUPERSPARC) \
-    (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST)
+#define ADJUST_COST(INSN,LINK,DEP,COST)				\
+do {								\
+  if (sparc_cpu == PROCESSOR_SUPERSPARC)			\
+    (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST);	\
+  else if (sparc_cpu == PROCESSOR_ULTRASPARC)			\
+    (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST);	\
+} while (0)

 /* Conditional branches with empty delay slots have a length of two.  */
-#define ADJUST_INSN_LENGTH(INSN, LENGTH)	\
+#define ADJUST_INSN_LENGTH(INSN, LENGTH)				\
+do {									\
  if (GET_CODE (INSN) == CALL_INSN					\
      || (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn)))	\
-    LENGTH += 1;
+    LENGTH += 1;							\
+} while (0)

 /* Control the assembler format that we output.  */

--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md