Tidy up previous delta

From-SVN: r25431
This commit is contained in:
Nick Clifton 1999-02-25 10:20:21 +00:00 committed by Nick Clifton
parent 592e5d21b1
commit 6cc8c0b3b5
6 changed files with 117 additions and 32 deletions

View File

@ -1,3 +1,31 @@
Thu Feb 25 10:17:32 1999 Nick Clifton <nickc@cygnus.com>
* config/arm/arm.c (return_in_memory): Float fields in unions
force a return in memory.
(load_multiple_sequence): Add comment explaining why two LDR
instructions can be better than an LDMIA instruction.
* config/arm/arm.h (TARGET_SHORT_BY_BYTES): Add comment
describing the real meaning of this option.
(FIXED_REGISTERS): Default r10 to not-fixed.
(CALL_USED_REGISTERS): Default r10 to not-call-used.
(SUBTARGET_CONDITIONAL_REGISTER_USAGE): If not defined, define
as empty.
(CONDITIONAL_REGISTER_USAGE): Fix r10 if TARGET_APCS_STACK is
true. Invoke SUBTARGET_CONDITIONAL_REGISTER_USAGE after
performing other checks.
* config/arm/arm.md (zero_extendhisi2): Undo previous change.
(extendhisi2): Undo previous change.
Also add comments describing why TARGET_SHORT_BY_BYTES can be
ignored for armv4(t) architectures.
* config/arm/riscix.h (SUBTARGET_CONDITIONAL_REGISTER_USAGE):
Define to fix r10.
* config/arm/riscix1-1.h
(SUBTARGET_CONDITIONAL_REGISTER_USAGE): Define to fix r10.
Thu Feb 25 12:09:04 1999 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* cse.c (dump_class): Make the function definition static to match

View File

@ -1328,6 +1328,9 @@ arm_return_in_memory (type)
if (TREE_CODE (field) != FIELD_DECL)
continue;
if (FLOAT_TYPE_P (TREE_TYPE (field)))
return 1;
if (RETURN_IN_MEMORY (TREE_TYPE (field)))
return 1;
}
@ -2700,7 +2703,32 @@ load_multiple_sequence (operands, nops, regs, base, load_offset)
return 4; /* ldmdb */
/* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm if
the offset isn't small enough */
the offset isn't small enough. The reason 2 ldrs are faster is because
these ARMs are able to do more than one cache access in a single cycle.
The ARM9 and StrongARM have Harvard caches, whilst the ARM8 has a double
bandwidth cache. This means that these cores can do both an instruction
fetch and a data fetch in a single cycle, so the trick of calculating the
address into a scratch register (one of the result regs) and then doing a
load multiple actually becomes slower (and no smaller in code size). That
is the transformation
ldr rd1, [rbase + offset]
ldr rd2, [rbase + offset + 4]
to
add rd1, rbase, offset
ldmia rd1, {rd1, rd2}
produces worse code -- '3 cycles + any stalls on rd2' instead of '2 cycles
+ any stalls on rd2'. On ARMs with only one cache access per cycle, the
first sequence could never complete in less than 6 cycles, whereas the ldm
sequence would only take 5 and would make better use of sequential accesses
if not hitting the cache.
We cheat here and test 'arm_ld_sched' which we currently know to only be
true for the ARM8, ARM9 and StrongARM. If this ever changes, then the test
below needs to be reworked. */
if (nops == 2 && arm_ld_sched)
return 0;

View File

@ -337,6 +337,13 @@ function tries to return. */
#define TARGET_APCS_STACK (target_flags & ARM_FLAG_APCS_STACK)
#define TARGET_APCS_FLOAT (target_flags & ARM_FLAG_APCS_FLOAT)
#define TARGET_APCS_REENT (target_flags & ARM_FLAG_APCS_REENT)
/* Note: TARGET_SHORT_BY_BYTES is really a misnomer. What it means is
that short values sould not be accessed using word load instructions
as there is a possibility that they may not be word aligned and this
would generate an MMU fault. On processors which do not have a 16 bit
load instruction therefore, short values must be loaded by individual
byte accesses rather than loading a word and then shifting the desired
value into place. */
#define TARGET_SHORT_BY_BYTES (target_flags & ARM_FLAG_SHORT_BYTE)
#define TARGET_SOFT_FLOAT (target_flags & ARM_FLAG_SOFT_FLOAT)
#define TARGET_HARD_FLOAT (! TARGET_SOFT_FLOAT)
@ -677,7 +684,7 @@ extern char * structure_size_string;
#define FIXED_REGISTERS \
{ \
0,0,0,0,0,0,0,0, \
0,0,1,1,0,1,0,1, \
0,0,0,1,0,1,0,1, \
0,0,0,0,0,0,0,0, \
1,1,1 \
}
@ -693,11 +700,15 @@ extern char * structure_size_string;
#define CALL_USED_REGISTERS \
{ \
1,1,1,1,0,0,0,0, \
0,0,1,1,1,1,1,1, \
0,0,0,1,1,1,1,1, \
1,1,1,1,0,0,0,0, \
1,1,1 \
}
#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
#define SUBTARGET_CONDITIONAL_REGISTER_USAGE
#endif
/* If doing stupid life analysis, avoid a bug causing a return value r0 to be
trampled. This effectively reduces the number of available registers by 1.
XXX It is a hack, I know.
@ -717,11 +728,12 @@ extern char * structure_size_string;
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 0; \
} \
else if (! TARGET_APCS_STACK) \
else if (TARGET_APCS_STACK) \
{ \
fixed_regs[10] = 0; \
call_used_regs[10] = 0; \
fixed_regs[10] = 1; \
call_used_regs[10] = 1; \
} \
SUBTARGET_CONDITIONAL_REGISTER_USAGE \
}
/* Return number of consecutive hard regs needed starting at reg REGNO

View File

@ -2167,20 +2167,20 @@
""
"
{
if (GET_CODE (operands[1]) == MEM)
if (arm_arch4 && GET_CODE (operands[1]) == MEM)
{
if (TARGET_SHORT_BY_BYTES)
{
emit_insn (gen_movhi_bytes (operands[0], operands[1]));
DONE;
}
else if (arm_arch4)
{
emit_insn (gen_rtx_SET (VOIDmode,
operands[0],
gen_rtx_ZERO_EXTEND (SImode, operands[1])));
DONE;
}
/* Note: We do not have to worry about TARGET_SHORT_BY_BYTES
here because the insn below will generate an LDRH instruction
rather than an LDR instruction, so we cannot get an unaligned
word access. */
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_ZERO_EXTEND (SImode, operands[1])));
DONE;
}
if (TARGET_SHORT_BY_BYTES && GET_CODE (operands[1]) == MEM)
{
emit_insn (gen_movhi_bytes (operands[0], operands[1]));
DONE;
}
if (! s_register_operand (operands[1], HImode))
operands[1] = copy_to_mode_reg (HImode, operands[1]);
@ -2273,20 +2273,22 @@
""
"
{
if (GET_CODE (operands[1]) == MEM)
if (arm_arch4 && GET_CODE (operands[1]) == MEM)
{
if (TARGET_SHORT_BY_BYTES)
{
emit_insn (gen_extendhisi2_mem (operands[0], operands[1]));
DONE;
}
else if (arm_arch4)
{
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_SIGN_EXTEND (SImode, operands[1])));
DONE;
}
}
/* Note: We do not have to worry about TARGET_SHORT_BY_BYTES
here because the insn below will generate an LDRH instruction
rather than an LDR instruction, so we cannot get an unaligned
word access. */
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_SIGN_EXTEND (SImode, operands[1])));
DONE;
}
if (TARGET_SHORT_BY_BYTES && GET_CODE (operands[1]) == MEM)
{
emit_insn (gen_extendhisi2_mem (operands[0], operands[1]));
DONE;
}
if (! s_register_operand (operands[1], HImode))
operands[1] = copy_to_mode_reg (HImode, operands[1]);
operands[1] = gen_lowpart (SImode, operands[1]);
@ -2894,6 +2896,10 @@
}
else if (! arm_arch4)
{
/* Note: We do not have to worry about TARGET_SHORT_BY_BYTES
for v4 and up architectures because LDRH instructions will
be used to access the HI values, and these cannot generate
unaligned word access faults in the MMU. */
if (GET_CODE (operands[1]) == MEM)
{
if (TARGET_SHORT_BY_BYTES)

View File

@ -120,6 +120,11 @@ Boston, MA 02111-1307, USA. */
/* Override the normal default CPU */
#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm2
/* r10 is reserved by RISCiX */
#define SUBTARGET_CONDITIONAL_REGISTER_USAGE \
fixed_regs[10] = 1; \
call_used_regs[10] = 1;
#include "arm/aout.h"
/* The RISCiX assembler does not understand .set */

View File

@ -80,6 +80,12 @@ Boston, MA 02111-1307, USA. */
/* Override the normal default CPU */
#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm2
/* r10 is reserved by RISCiX */
#define SUBTARGET_CONDITIONAL_REGISTER_USAGE \
fixed_regs[10] = 1; \
call_used_regs[10] = 1;
#include "arm/aout.h"
#undef CPP_SPEC