Tidy up previous delta

From-SVN: r25431
1999-02-25 10:20:21 +00:00 · 1999-02-25 10:20:21 +00:00 · 6cc8c0b3b5
commit 6cc8c0b3b5
parent 592e5d21b1
6 changed files with 117 additions and 32 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,31 @@
+Thu Feb 25 10:17:32 1999  Nick Clifton  <nickc@cygnus.com>
+
+	* config/arm/arm.c (return_in_memory): Float fields in unions
+	force a return in memory.
+	(load_multiple_sequence): Add comment explaining why two LDR
+	instructions can be better than an LDMIA instruction.
+
+	* config/arm/arm.h (TARGET_SHORT_BY_BYTES): Add comment
+	describing the real meaning of this option.
+	(FIXED_REGISTERS): Default r10 to not-fixed.
+	(CALL_USED_REGISTERS): Default r10 to not-call-used.
+	(SUBTARGET_CONDITIONAL_REGISTER_USAGE): If not defined, define
+	as empty. 
+	(CONDITIONAL_REGISTER_USAGE): Fix r10 if TARGET_APCS_STACK is
+	true.  Invoke SUBTARGET_CONDITIONAL_REGISTER_USAGE after
+	performing other checks.
+
+	* config/arm/arm.md (zero_extendhisi2): Undo previous change.
+	(extendhisi2): Undo previous change.
+	Also add comments describing why TARGET_SHORT_BY_BYTES can be
+	ignored for armv4(t) architectures. 
+
+	* config/arm/riscix.h (SUBTARGET_CONDITIONAL_REGISTER_USAGE):
+	Define to fix r10.
+
+	* config/arm/riscix1-1.h
+	(SUBTARGET_CONDITIONAL_REGISTER_USAGE): Define to fix r10. 
+
 Thu Feb 25 12:09:04 1999  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>

 	* cse.c (dump_class): Make the function definition static to match
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@ -1328,6 +1328,9 @@ arm_return_in_memory (type)
 	  if (TREE_CODE (field) != FIELD_DECL)
 	    continue;

+	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
+	    return 1;
+	  
 	  if (RETURN_IN_MEMORY (TREE_TYPE (field)))
 	    return 1;
 	}
@ -2700,7 +2703,32 @@ load_multiple_sequence (operands, nops, regs, base, load_offset)
    return 4; /* ldmdb */

  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm if
-     the offset isn't small enough */
+     the offset isn't small enough.  The reason 2 ldrs are faster is because
+     these ARMs are able to do more than one cache access in a single cycle.
+     The ARM9 and StrongARM have Harvard caches, whilst the ARM8 has a double 
+     bandwidth cache.  This means that these cores can do both an instruction 
+     fetch and a data fetch in a single cycle, so the trick of calculating the 
+     address into a scratch register (one of the result regs) and then doing a 
+     load multiple actually becomes slower (and no smaller in code size).  That 
+     is the transformation
+ 
+ 	ldr	rd1, [rbase + offset]
+ 	ldr	rd2, [rbase + offset + 4]
+ 
+     to
+ 
+ 	add	rd1, rbase, offset
+ 	ldmia	rd1, {rd1, rd2}
+ 
+     produces worse code -- '3 cycles + any stalls on rd2' instead of '2 cycles 
+     + any stalls on rd2'.  On ARMs with only one cache access per cycle, the 
+     first sequence could never complete in less than 6 cycles, whereas the ldm 
+     sequence would only take 5 and would make better use of sequential accesses
+     if not hitting the cache.
+
+     We cheat here and test 'arm_ld_sched' which we currently know to only be
+     true for the ARM8, ARM9 and StrongARM.  If this ever changes, then the test
+     below needs to be reworked.  */
  if (nops == 2 && arm_ld_sched)
    return 0;

--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@ -337,6 +337,13 @@ function tries to return. */
 #define TARGET_APCS_STACK		(target_flags & ARM_FLAG_APCS_STACK)
 #define TARGET_APCS_FLOAT		(target_flags & ARM_FLAG_APCS_FLOAT)
 #define TARGET_APCS_REENT		(target_flags & ARM_FLAG_APCS_REENT)
+/* Note: TARGET_SHORT_BY_BYTES is really a misnomer.  What it means is
+   that short values sould not be accessed using word load instructions
+   as there is a possibility that they may not be word aligned and this
+   would generate an MMU fault.  On processors which do not have a 16 bit
+   load instruction therefore, short values must be loaded by individual
+   byte accesses rather than loading a word and then shifting the desired
+   value into place.  */
 #define TARGET_SHORT_BY_BYTES		(target_flags & ARM_FLAG_SHORT_BYTE)
 #define TARGET_SOFT_FLOAT		(target_flags & ARM_FLAG_SOFT_FLOAT)
 #define TARGET_HARD_FLOAT		(! TARGET_SOFT_FLOAT)
@ -677,7 +684,7 @@ extern char * structure_size_string;
 #define FIXED_REGISTERS  \
 {                        \
  0,0,0,0,0,0,0,0,	 \
-  0,0,1,1,0,1,0,1,	 \
+  0,0,0,1,0,1,0,1,	 \
  0,0,0,0,0,0,0,0,	 \
  1,1,1			 \
 }
@ -693,11 +700,15 @@ extern char * structure_size_string;
 #define CALL_USED_REGISTERS  \
 {                            \
  1,1,1,1,0,0,0,0,	     \
-  0,0,1,1,1,1,1,1,	     \
+  0,0,0,1,1,1,1,1,	     \
  1,1,1,1,0,0,0,0,	     \
  1,1,1			     \
 }

+#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#endif
+
 /* If doing stupid life analysis, avoid a bug causing a return value r0 to be
   trampled.  This effectively reduces the number of available registers by 1.
   XXX It is a hack, I know.
@ -717,11 +728,12 @@ extern char * structure_size_string;
      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;		\
      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 0;	\
    }							\
-  else if (! TARGET_APCS_STACK)				\
+  else if (TARGET_APCS_STACK)				\
    {							\
-      fixed_regs[10]     = 0;				\
-      call_used_regs[10] = 0;				\
+      fixed_regs[10]     = 1;				\
+      call_used_regs[10] = 1;				\
    }							\
+  SUBTARGET_CONDITIONAL_REGISTER_USAGE 		        \
 }

 /* Return number of consecutive hard regs needed starting at reg REGNO
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@ -2167,20 +2167,20 @@
  ""
  "
 {
-  if (GET_CODE (operands[1]) == MEM)
+  if (arm_arch4 && GET_CODE (operands[1]) == MEM)
    {
-      if (TARGET_SHORT_BY_BYTES)
-        {
-	  emit_insn (gen_movhi_bytes (operands[0], operands[1]));
-          DONE;
-        }
-      else if (arm_arch4)
-        {
-          emit_insn (gen_rtx_SET (VOIDmode,
-				  operands[0],
-				  gen_rtx_ZERO_EXTEND (SImode, operands[1])));
-          DONE;
-        }
+     /* Note: We do not have to worry about TARGET_SHORT_BY_BYTES
+	here because the insn below will generate an LDRH instruction
+	rather than an LDR instruction, so we cannot get an unaligned
+	word access.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_ZERO_EXTEND (SImode, operands[1])));
+      DONE;
+    }
+  if (TARGET_SHORT_BY_BYTES && GET_CODE (operands[1]) == MEM)
+    {
+      emit_insn (gen_movhi_bytes (operands[0], operands[1]));
+      DONE;
    }
  if (! s_register_operand (operands[1], HImode))
    operands[1] = copy_to_mode_reg (HImode, operands[1]);
@ -2273,20 +2273,22 @@
  ""
  "
 {
-  if (GET_CODE (operands[1]) == MEM)
+  if (arm_arch4 && GET_CODE (operands[1]) == MEM)
    {
-      if (TARGET_SHORT_BY_BYTES)
-        {
-          emit_insn (gen_extendhisi2_mem (operands[0], operands[1]));
-          DONE;
-        }
-      else if (arm_arch4)
-        {
-          emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-		     gen_rtx_SIGN_EXTEND (SImode, operands[1])));
-          DONE;
-        }
-     }
+     /* Note: We do not have to worry about TARGET_SHORT_BY_BYTES
+	here because the insn below will generate an LDRH instruction
+	rather than an LDR instruction, so we cannot get an unaligned
+	word access.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+		 gen_rtx_SIGN_EXTEND (SImode, operands[1])));
+      DONE;
+    }
+
+  if (TARGET_SHORT_BY_BYTES && GET_CODE (operands[1]) == MEM)
+    {
+      emit_insn (gen_extendhisi2_mem (operands[0], operands[1]));
+      DONE;
+    }
  if (! s_register_operand (operands[1], HImode))
    operands[1] = copy_to_mode_reg (HImode, operands[1]);
  operands[1] = gen_lowpart (SImode, operands[1]);
@ -2894,6 +2896,10 @@
 	}
      else if (! arm_arch4)
 	{
+	 /* Note: We do not have to worry about TARGET_SHORT_BY_BYTES
+	    for v4 and up architectures because LDRH instructions will
+	    be used to access the HI values, and these cannot generate
+	    unaligned word access faults in the MMU.  */
 	  if (GET_CODE (operands[1]) == MEM)
 	    {
 	      if (TARGET_SHORT_BY_BYTES)
--- a/gcc/config/arm/riscix.h
+++ b/gcc/config/arm/riscix.h
@ -120,6 +120,11 @@ Boston, MA 02111-1307, USA.  */
 /* Override the normal default CPU */
 #define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm2

+/* r10 is reserved by RISCiX  */
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE 	\
+  fixed_regs[10] = 1;				\
+  call_used_regs[10] = 1;
+
 #include "arm/aout.h"

 /* The RISCiX assembler does not understand .set */
--- a/gcc/config/arm/riscix1-1.h
+++ b/gcc/config/arm/riscix1-1.h
@ -80,6 +80,12 @@ Boston, MA 02111-1307, USA.  */
 /* Override the normal default CPU */
 #define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm2

+/* r10 is reserved by RISCiX  */
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE 	\
+  fixed_regs[10] = 1;				\
+  call_used_regs[10] = 1;
+
+
 #include "arm/aout.h"

 #undef CPP_SPEC