pa.md (abssi2): New pattern.

* pa.md (abssi2): New pattern.

	* pa.c (secondary_reload_class): Loads from reg+d addresses into
	FP registers don't need secondary reloads.
	* pa.h: Delete soem #if 0 code.  Update some comments.
	(EXTRA_CONSTRAINT, case 'Q'): Only accept valid memory addresses.

        * pa.h (RTX_COSTS): Tege's rewrite.

	* pa.c (hppa_legitimize_address): Generate unscaled indexed
	addressing for (plus (symbol_ref) (reg)).
	(emit_move_sequence): Set REGNO_POINTER_FLAG appropriately
	to encourage unscaled indexing modes.
	(basereg_operand): New function for unscaled index address support.
	* pa.md (unscaled indexing patterns): New patterns for unscaled
	index address support.

	* pa.h (MOVE_RATIO): Define.
	* pa.md (movstrsi expander): Refine tests for when to use the
	library routine instead of an inlined loop copy.  Provide an
	additional scratch register for use in the inlined loop copy.
	(movstrsi_internal): Name the pattern for ease of use.  Add
	additional scratch register.
	* pa.c (output_block_move): Greatly simplify.  Use 2X unrolled
	copy loops to improve performance.
	(compute_movstrsi_length): Corresponding changes.

	* pa.c (print_operand): Handle 'y' case for reversed FP
	comparisons.  Delete some #if 0 code.  Fix various comment typos.
	* pa.md (fcmp patterns): Try and reverse the comparison to avoid
	useless add,tr insns.

From-SVN: r10609
This commit is contained in:
Jeff Law 1995-11-27 00:33:58 -07:00
parent 926d1ca5a3
commit 68944452e4
3 changed files with 515 additions and 334 deletions

View File

@ -637,6 +637,16 @@ hppa_legitimize_address (x, oldx, mode)
if (GET_CODE (x) == CONST)
x = XEXP (x, 0);
/* Special case. Get the SYMBOL_REF into a register and use indexing.
That should always be safe. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == REG
&& GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
{
rtx reg = force_reg (SImode, XEXP (x, 1));
return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
}
/* Note we must reject symbols which represent function addresses
since the assembler/linker can't handle arithmetic on plabels. */
if (GET_CODE (x) == PLUS
@ -793,7 +803,7 @@ emit_move_sequence (operands, mode, scratch_reg)
/* Handle secondary reloads for loads/stores of FP registers from
REG+D addresses where D does not fit in 5 bits, including
(subreg (mem (addr)) cases. */
(subreg (mem (addr))) cases. */
if (fp_reg_operand (operand0, mode)
&& ((GET_CODE (operand1) == MEM
&& ! memory_address_p (DFmode, XEXP (operand1, 0)))
@ -975,9 +985,9 @@ emit_move_sequence (operands, mode, scratch_reg)
operands[1] = force_const_mem (mode, operand1);
emit_move_sequence (operands, mode, temp);
}
/* Likewise for (const (plus (symbol) (const_int)) when generating
pic code during or after reload and const_int will not fit
in 14 bits. */
/* Likewise for (const (plus (symbol) (const_int))) when
generating pic code during or after reload and const_int
will not fit in 14 bits. */
else if (GET_CODE (operand1) == CONST
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
@ -1008,6 +1018,14 @@ emit_move_sequence (operands, mode, scratch_reg)
else
temp = gen_reg_rtx (mode);
/* Loading a SYMBOL_REF into a register makes that register
safe to be used as the base in an indexed address.
Don't mark hard registers though. That loses. */
if (REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
REGNO_POINTER_FLAG (REGNO (temp)) = 1;
if (ishighonly)
set = gen_rtx (SET, mode, operand0, temp);
else
@ -1457,18 +1475,13 @@ find_addr_reg (addr)
/* Emit code to perform a block move.
Restriction: If the length argument is non-constant, alignment
must be 4.
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is the source pointer as a REG, clobbered.
if SIZE_IS_CONSTANT
OPERANDS[2] is a register for temporary storage.
OPERANDS[4] is the size as a CONST_INT
else
OPERANDS[2] is a REG which will contain the size, clobbered.
OPERANDS[2] is a register for temporary storage.
OPERANDS[4] is the size as a CONST_INT
OPERANDS[3] is a register for temporary storage.
OPERANDS[5] is the alignment safe to use, as a CONST_INT. */
OPERANDS[5] is the alignment safe to use, as a CONST_INT.
OPERNADS[6] is another temporary register. */
char *
output_block_move (operands, size_is_constant)
@ -1476,153 +1489,94 @@ output_block_move (operands, size_is_constant)
int size_is_constant;
{
int align = INTVAL (operands[5]);
unsigned long n_bytes;
unsigned long n_bytes = INTVAL (operands[4]);
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
if (size_is_constant)
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
unsigned long offset;
rtx temp;
case 4:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 8);
output_asm_insn ("ldi %4,%2", operands);
n_bytes = INTVAL (operands[4]);
if (n_bytes == 0)
/* Copying loop. */
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
output_asm_insn ("stws,ma %3,4(0,%0)", operands);
output_asm_insn ("addib,>= -8,%2,.-12", operands);
output_asm_insn ("stws,ma %6,4(0,%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 8 != 0)
{
operands[4] = GEN_INT (n_bytes % 4);
if (n_bytes % 8 >= 4)
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("ldw 0(0,%1),%6", operands);
if (n_bytes % 8 >= 4)
output_asm_insn ("stws,ma %3,4(0,%0)", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
}
return "";
if (align >= 4)
{
/* Don't unroll too large blocks. */
if (n_bytes > 32)
goto copy_with_loop;
case 2:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 4);
output_asm_insn ("ldi %4,%2", operands);
/* Read and store using two registers, and hide latency
by deferring the stores until three instructions after
the corresponding load. The last load insn will read
the entire word were the last bytes are, possibly past
the end of the source block, but since loads are aligned,
this is harmless. */
/* Copying loop. */
output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
output_asm_insn ("sths,ma %3,2(0,%0)", operands);
output_asm_insn ("addib,>= -4,%2,.-12", operands);
output_asm_insn ("sths,ma %6,2(0,%0)", operands);
output_asm_insn ("ldws,ma 4(0,%1),%2", operands);
for (offset = 4; offset < n_bytes; offset += 4)
{
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
output_asm_insn ("stws,ma %2,4(0,%0)", operands);
temp = operands[2];
operands[2] = operands[3];
operands[3] = temp;
}
if (n_bytes % 4 == 0)
/* Store the last word. */
output_asm_insn ("stw %2,0(0,%0)", operands);
else
{
/* Store the last, partial word. */
operands[4] = GEN_INT (n_bytes % 4);
output_asm_insn ("stbys,e %2,%4(0,%0)", operands);
}
return "";
}
if (align >= 2 && n_bytes >= 2)
{
output_asm_insn ("ldhs,ma 2(0,%1),%2", operands);
for (offset = 2; offset + 2 <= n_bytes; offset += 2)
{
/* Handle the residual. */
if (n_bytes % 4 != 0)
{
if (n_bytes % 4 >= 2)
output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
output_asm_insn ("sths,ma %2,2(0,%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("ldb 0(0,%1),%6", operands);
if (n_bytes % 4 >= 2)
output_asm_insn ("sths,ma %3,2(0,%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("stb %6,0(0,%0)", operands);
}
return "";
temp = operands[2];
operands[2] = operands[3];
operands[3] = temp;
}
if (n_bytes % 2 != 0)
case 1:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 2);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
output_asm_insn ("addib,>= -2,%2,.-12", operands);
output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
/* Handle the residual. */
if (n_bytes % 2 != 0)
{
output_asm_insn ("ldb 0(0,%1),%3", operands);
output_asm_insn ("sths,ma %2,2(0,%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("stb %3,0(0,%0)", operands);
}
return "";
return "";
}
output_asm_insn ("ldbs,ma 1(0,%1),%2", operands);
for (offset = 1; offset + 1 <= n_bytes; offset += 1)
{
output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
output_asm_insn ("stbs,ma %2,1(0,%0)", operands);
temp = operands[2];
operands[2] = operands[3];
operands[3] = temp;
}
output_asm_insn ("stb %2,0(0,%0)", operands);
return "";
default:
abort ();
}
if (align != 4)
abort();
copy_with_loop:
if (size_is_constant)
{
/* Size is compile-time determined, and also not
very small (such small cases are handled above). */
operands[4] = GEN_INT (n_bytes - 4);
output_asm_insn ("ldo %4(0),%2", operands);
}
else
{
/* Decrement counter by 4, and if it becomes negative, jump past the
word copying loop. */
output_asm_insn ("addib,<,n -4,%2,.+16", operands);
}
/* Copying loop. Note that the first load is in the annulled delay slot
of addib. Is it OK on PA to have a load in a delay slot, i.e. is a
possible page fault stopped in time? */
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
output_asm_insn ("addib,>= -4,%2,.-4", operands);
output_asm_insn ("stws,ma %3,4(0,%0)", operands);
/* The counter is negative, >= -4. The remaining number of bytes are
determined by the two least significant bits. */
if (size_is_constant)
{
if (n_bytes % 4 != 0)
{
/* Read the entire word of the source block tail. */
output_asm_insn ("ldw 0(0,%1),%3", operands);
operands[4] = GEN_INT (n_bytes % 4);
output_asm_insn ("stbys,e %3,%4(0,%0)", operands);
}
}
else
{
/* Add 4 to counter. If it becomes zero, we're done. */
output_asm_insn ("addib,=,n 4,%2,.+16", operands);
/* Read the entire word of the source block tail. (Also this
load is in an annulled delay slot.) */
output_asm_insn ("ldw 0(0,%1),%3", operands);
/* Make %0 point at the first byte after the destination block. */
output_asm_insn ("addl %2,%0,%0", operands);
/* Store the leftmost bytes, up to, but not including, the address
in %0. */
output_asm_insn ("stbys,e %3,0(0,%0)", operands);
}
return "";
}
/* Count the number of insns necessary to handle this block move.
@ -1635,106 +1589,33 @@ compute_movstrsi_length (insn)
rtx insn;
{
rtx pat = PATTERN (insn);
int size_is_constant;
int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
unsigned long n_bytes;
int insn_count = 0;
if (GET_CODE (XEXP (XVECEXP (pat, 0, 5), 0)) == CONST_INT)
{
size_is_constant = 1;
n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
}
else
{
size_is_constant = 0;
n_bytes = 0;
}
unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
unsigned int n_insns = 0;
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
if (size_is_constant)
/* The basic opying loop. */
n_insns = 6;
/* Residuals. */
if (n_bytes % (2 * align) != 0)
{
unsigned long offset;
/* Any residual caused by unrolling the copy loop. */
if (n_bytes % (2 * align) > align)
n_insns += 1;
if (n_bytes == 0)
return 0;
if (align >= 4)
{
/* Don't unroll too large blocks. */
if (n_bytes > 32)
goto copy_with_loop;
/* first load */
insn_count = 1;
/* Count the unrolled insns. */
for (offset = 4; offset < n_bytes; offset += 4)
insn_count += 2;
/* Count last store or partial store. */
insn_count += 1;
return insn_count * 4;
}
if (align >= 2 && n_bytes >= 2)
{
/* initial load. */
insn_count = 1;
/* Unrolled loop. */
for (offset = 2; offset + 2 <= n_bytes; offset += 2)
insn_count += 2;
/* ??? odd load/store */
if (n_bytes % 2 != 0)
insn_count += 2;
/* ??? final store from loop. */
insn_count += 1;
return insn_count * 4;
}
/* First load. */
insn_count = 1;
/* The unrolled loop. */
for (offset = 1; offset + 1 <= n_bytes; offset += 1)
insn_count += 2;
/* Final store. */
insn_count += 1;
return insn_count * 4;
/* Any residual because the number of bytes was not a
multiple of the alignment. */
if (n_bytes % align != 0)
n_insns += 1;
}
if (align != 4)
abort();
copy_with_loop:
/* setup for constant and non-constant case. */
insn_count = 1;
/* The copying loop. */
insn_count += 3;
/* The counter is negative, >= -4. The remaining number of bytes are
determined by the two least significant bits. */
if (size_is_constant)
{
if (n_bytes % 4 != 0)
insn_count += 2;
}
else
insn_count += 4;
return insn_count * 4;
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
return n_insns * 4;
}
@ -2363,7 +2244,7 @@ hppa_expand_prologue()
even be more efficient.
Avoid this if the callee saved register wasn't used (these are
leaf functions. */
leaf functions). */
if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
@ -2511,9 +2392,8 @@ hppa_expand_epilogue ()
load_reg (2, - 20, STACK_POINTER_REGNUM);
}
/* Reset stack pointer (and possibly frame pointer). The stack */
/* pointer is initially set to fp + 64 to avoid a race condition.
??? What race condition?!? */
/* Reset stack pointer (and possibly frame pointer). The stack
pointer is initially set to fp + 64 to avoid a race condition. */
else if (frame_pointer_needed)
{
/* Emit a blockage insn here to keep these insns from being moved
@ -3004,6 +2884,27 @@ print_operand (file, x, code)
abort ();
}
return;
/* Reversed floating point comparison. Need special conditions to
deal with NaNs properly. */
case 'y':
switch (GET_CODE (x))
{
case EQ:
fprintf (file, "?="); break;
case NE:
fprintf (file, "!?="); break;
case GT:
fprintf (file, "!<="); break;
case GE:
fprintf (file, "!<"); break;
case LT:
fprintf (file, "!>="); break;
case LE:
fprintf (file, "!>"); break;
default:
abort ();
}
return;
case 'S': /* Condition, operands are (S)wapped. */
switch (GET_CODE (x))
{
@ -3161,30 +3062,6 @@ print_operand (file, x, code)
break;
}
}
#if 0
/* The code here is completely wrong. It attempts to extract parts of
a CONST_DOUBLE which is wrong since REAL_ARITHMETIC is defined, and it
extracts the wrong indices (0 instead of 2 and 1 instead of 3) using
the wrong macro (XINT instead of XWINT).
Just disable it for now, since the code will never be used anyway! */
else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
{
union { double d; int i[2]; } u;
union { float f; int i; } u1;
u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1);
u1.f = u.d;
if (code == 'f')
fprintf (file, "0r%.9g", u1.f);
else
fprintf (file, "0x%x", u1.i);
}
else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
{
union { double d; int i[2]; } u;
u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1);
fprintf (file, "0r%.20g", u.d);
}
#endif
else
output_addr_const (file, x);
}
@ -3527,12 +3404,6 @@ secondary_reload_class (class, mode, in)
if (GET_CODE (in) == SUBREG)
in = SUBREG_REG (in);
if (FP_REG_CLASS_P (class)
&& GET_CODE (in) == MEM
&& !memory_address_p (DFmode, XEXP (in, 0))
&& memory_address_p (SImode, XEXP (in, 0)))
return GENERAL_REGS;
return NO_REGS;
}
@ -4431,6 +4302,38 @@ shadd_operand (op, mode)
return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
}
/* Return 1 if OP is valid as a base register in a reg + reg address. */
int
basereg_operand (op, mode)
rtx op;
enum machine_mode mode;
{
/* Once reload has started everything is considered valid. Reload should
only create indexed addresses using the stack/frame pointer, and any
others were checked for validity when created by the combine pass.
Also allow any register when TARGET_NO_SPACE_REGS is in effect since
we don't have to worry about the braindamaged implicit space register
selection using the basereg only (rather than effective address)
screwing us over. */
if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
return (GET_CODE (op) == REG || GET_CODE (op) == CONST_INT);
/* Stack and frame pointers are always OK for indexing. */
if (op == stack_pointer_rtx || op == frame_pointer_rtx)
return 1;
/* The only other valid OPs are pseudo registers with
REGNO_POINTER_FLAG set. */
if (GET_CODE (op) != REG
|| REGNO (op) < FIRST_PSEUDO_REGISTER
|| ! register_operand (op, mode))
return 0;
return REGNO_POINTER_FLAG (REGNO (op));
}
/* Return 1 if this operand is anything other than a hard register. */
int

View File

@ -1309,10 +1309,6 @@ extern struct rtx_def *hppa_builtin_saveregs ();
these things in insns and then not re-recognize the insns, causing
constrain_operands to fail.
Also note `Q' accepts any memory operand during the reload pass.
This includes out-of-range displacements in reg+d addressing.
This makes for better code. (??? For 2.5 address this issue).
`R' is unused.
`S' is unused.
@ -1321,8 +1317,6 @@ extern struct rtx_def *hppa_builtin_saveregs ();
#define EXTRA_CONSTRAINT(OP, C) \
((C) == 'Q' ? \
(IS_RELOADING_PSEUDO_P (OP) \
|| (GET_CODE (OP) == MEM \
&& reload_in_progress) \
|| (GET_CODE (OP) == MEM \
&& memory_address_p (GET_MODE (OP), XEXP (OP, 0))\
&& ! symbolic_memory_operand (OP, VOIDmode))) \
@ -1571,6 +1565,11 @@ while (0)
in one reasonably fast instruction. */
#define MOVE_MAX 8
/* Higher than the default as we prefer to use simple move insns
(better scheduling and delay slot filling) and because our
built-in block move is really a 2X unrolled loop. */
#define MOVE_RATIO 4
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
#define WORD_REGISTER_OPERATIONS
@ -1685,22 +1684,28 @@ while (0)
switch on CODE. The purpose for the cost of MULT is to encourage
`synth_mult' to find a synthetic multiply when reasonable. */
#define RTX_COSTS(X,CODE,OUTER_CODE) \
case MULT: \
return (TARGET_SNAKE && ! TARGET_DISABLE_FPREGS \
&& ! TARGET_SOFT_FLOAT \
? COSTS_N_INSNS (8) : COSTS_N_INSNS (20)); \
case DIV: \
case UDIV: \
case MOD: \
case UMOD: \
return COSTS_N_INSNS (60); \
case PLUS: \
if (GET_CODE (XEXP (X, 0)) == MULT \
&& shadd_operand (XEXP (XEXP (X, 0), 1), VOIDmode)) \
return (2 + rtx_cost (XEXP (XEXP (X, 0), 0), OUTER_CODE) \
+ rtx_cost (XEXP (X, 1), OUTER_CODE)); \
break;
#define RTX_COSTS(X,CODE,OUTER_CODE) \
case MULT: \
if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \
return COSTS_N_INSNS (3); \
return (TARGET_SNAKE && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT) \
? COSTS_N_INSNS (8) : COSTS_N_INSNS (20); \
case DIV: \
if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \
return COSTS_N_INSNS (14); \
case UDIV: \
case MOD: \
case UMOD: \
return COSTS_N_INSNS (60); \
case PLUS: /* this includes shNadd insns */ \
case MINUS: \
if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \
return COSTS_N_INSNS (3); \
return COSTS_N_INSNS (1); \
case ASHIFT: \
case ASHIFTRT: \
case LSHIFTRT: \
return COSTS_N_INSNS (1);
/* Adjust the cost of dependencies. */
@ -2154,41 +2159,6 @@ extern struct rtx_def *legitimize_pic_address ();
extern struct rtx_def *gen_cmp_fp ();
extern void hppa_encode_label ();
#if 0
#define PREDICATE_CODES \
{"reg_or_0_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}}, \
{"reg_or_cint_move_operand", {SUBREG, REG, CONST_INT}}, \
{"arith_operand", {SUBREG, REG, CONST_INT}}, \
{"arith32_operand", {SUBREG, REG, CONST_INT}}, \
{"arith11_operand", {SUBREG, REG, CONST_INT}}, \
{"arith5_operand", {SUBREG, REG, CONST_INT}}, \
{"pre_cint_operand", {CONST_INT}}, \
{"post_cint_operand", {CONST_INT}}, \
{"int5_operand", {CONST_INT}}, \
{"uint5_operand", {CONST_INT}}, \
{"uint32_operand", {CONST_INT}}, \
{"int11_operand", {CONST_INT}}, \
{"and_operand", {SUBREG, REG, CONST_INT}}, \
{"ior_operand", {CONST_INT}}, \
{"lhs_lshift_operand", {SUBREG, REG, CONST_INT}}, \
{"lhs_lshift_cint_operand", {CONST_INT}}, \
{"plus_xor_ior_operator", {PLUS, XOR, IOR}}, \
{"shadd_operand", {CONST_INT}}, \
{"eq_neq_comparison_operator", {EQ, NE}}, \
{"movb_comparison_operator", {EQ, NE, LT, GE}}, \
{"pc_or_label_operand", {LABEL_REF, PC}}, \
{"symbolic_operand", {SYMBOL_REF, LABEL_REF, CONST}}, \
{"reg_or_nonsymb_mem_operand", {SUBREG, REG, MEM}}, \
{"move_operand", {SUBREG, REG, CONST_INT, MEM}}, \
{"pic_label_operand", {LABEL_REF, CONST}}, \
{"function_label_operand", {SYMBOL_REF}}, \
{"reg_or_0_or_nonsymb_mem_operand", {SUBREG, REG, CONST_INT, \
CONST_DOUBLE, MEM}}, \
{"div_operand", {REG, CONST_INT}}, \
{"call_operand_address", {SYMBOL_REF, LABEL_REF, CONST_INT, \
CONST_DOUBLE, CONST, HIGH}},
#endif
/* We want __gcc_plt_call to appear in every program built by
gcc, so we make a reference to it out of __main.
We use the asm statement to fool the optimizer into not

View File

@ -386,7 +386,71 @@
[(match_operand:SF 0 "reg_or_0_operand" "fG")
(match_operand:SF 1 "reg_or_0_operand" "fG")]))]
"! TARGET_SOFT_FLOAT"
"fcmp,sgl,%Y2 %r0,%r1"
"*
{
rtx next_insn;
/* See if this is later used in a reversed FP branch. If so, reverse our
condition and the branch. Doing so avoids a useless add,tr.
Don't do this if fcmp is in a delay slot since it's too much of a
headache to track down things on multiple paths. */
if (dbr_sequence_length ())
next_insn = NULL;
else
next_insn = NEXT_INSN (insn);
while (next_insn)
{
/* Jumps, calls and labels stop our search. */
if (GET_CODE (next_insn) == JUMP_INSN
|| GET_CODE (next_insn) == CALL_INSN
|| GET_CODE (next_insn) == CODE_LABEL)
break;
/* As does another fcmp insn. */
if (GET_CODE (next_insn) == INSN
&& GET_CODE (PATTERN (next_insn)) == SET
&& GET_CODE (SET_DEST (PATTERN (next_insn))) == REG
&& REGNO (SET_DEST (PATTERN (next_insn))) == 0)
break;
if (GET_CODE (next_insn) == INSN
&& GET_CODE (PATTERN (next_insn)) == SEQUENCE)
next_insn = XVECEXP (PATTERN (next_insn), 0, 0);
else
next_insn = NEXT_INSN (next_insn);
}
/* Is NEXT_INSN a branch? */
if (next_insn
&& GET_CODE (next_insn) == JUMP_INSN)
{
rtx pattern = PATTERN (next_insn);
/* Is it a reversed fp conditional branch (eg uses add,tr) and
CCFP dies, then reverse our conditional and the branch to
avoid the add,tr. */
if (GET_CODE (pattern) == SET
&& SET_DEST (pattern) == pc_rtx
&& GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
&& GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
&& GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
&& REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
&& GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
&& find_regno_note (next_insn, REG_DEAD, 0))
{
rtx tmp;
tmp = XEXP (SET_SRC (pattern), 1);
XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
XEXP (SET_SRC (pattern), 2) = tmp;
INSN_CODE (next_insn) = -1;
return \"fcmp,sgl,%y2 %r0,%r1\";
}
}
return \"fcmp,sgl,%Y2 %r0,%r1\";
}"
[(set_attr "length" "4")
(set_attr "type" "fpcc")])
@ -396,7 +460,71 @@
[(match_operand:DF 0 "reg_or_0_operand" "fG")
(match_operand:DF 1 "reg_or_0_operand" "fG")]))]
"! TARGET_SOFT_FLOAT"
"fcmp,dbl,%Y2 %r0,%r1"
"*
{
rtx next_insn;
/* See if this is later used in a reversed FP branch. If so, reverse our
condition and the branch. Doing so avoids a useless add,tr.
Don't do this if fcmp is in a delay slot since it's too much of a
headache to track down things on multiple paths. */
if (dbr_sequence_length ())
next_insn = NULL;
else
next_insn = NEXT_INSN (insn);
while (next_insn)
{
/* Jumps, calls and labels stop our search. */
if (GET_CODE (next_insn) == JUMP_INSN
|| GET_CODE (next_insn) == CALL_INSN
|| GET_CODE (next_insn) == CODE_LABEL)
break;
/* As does another fcmp insn. */
if (GET_CODE (next_insn) == INSN
&& GET_CODE (PATTERN (next_insn)) == SET
&& GET_CODE (SET_DEST (PATTERN (next_insn))) == REG
&& REGNO (SET_DEST (PATTERN (next_insn))) == 0)
break;
if (GET_CODE (next_insn) == INSN
&& GET_CODE (PATTERN (next_insn)) == SEQUENCE)
next_insn = XVECEXP (PATTERN (next_insn), 0, 0);
else
next_insn = NEXT_INSN (next_insn);
}
/* Is NEXT_INSN a branch? */
if (next_insn
&& GET_CODE (next_insn) == JUMP_INSN)
{
rtx pattern = PATTERN (next_insn);
/* Is it a reversed fp conditional branch (eg uses add,tr) and
CCFP dies, then reverse our conditional and the branch to
avoid the add,tr. */
if (GET_CODE (pattern) == SET
&& SET_DEST (pattern) == pc_rtx
&& GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
&& GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
&& GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
&& REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
&& GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
&& find_regno_note (next_insn, REG_DEAD, 0))
{
rtx tmp;
tmp = XEXP (SET_SRC (pattern), 1);
XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
XEXP (SET_SRC (pattern), 2) = tmp;
INSN_CODE (next_insn) = -1;
return \"fcmp,dbl,%y2 %r0,%r1\";
}
}
return \"fcmp,dbl,%Y2 %r0,%r1\";
}"
[(set_attr "length" "4")
(set_attr "type" "fpcc")])
@ -761,6 +889,15 @@
comiclr,<< %2,%0,0\;ldi %2,%0"
[(set_attr "type" "multi,multi")
(set_attr "length" "8,8")])
(define_insn "abssi2"
[(set (match_operand:SI 0 "register_operand" "=r")
(abs:SI (match_operand:SI 1 "register_operand" "0")))]
""
"comiclr,< 0,%0,0\;subi 0,%0,%0"
[(set_attr "type" "multi")
(set_attr "length" "8")])
;;; Experimental conditional move patterns
(define_expand "movsicc"
@ -1302,6 +1439,25 @@
[(set_attr "type" "load")
(set_attr "length" "8")])
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=r")
(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r"))))]
"! TARGET_DISABLE_INDEXING"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"ldwx %2(0,%1),%0\";
else
return \"ldwx %1(0,%2),%0\";
}"
[(set_attr "type" "load")
(set_attr "length" "4")])
;; Load or store with base-register modification.
(define_insn "pre_ldwm"
@ -1623,6 +1779,25 @@
[(set_attr "type" "load")
(set_attr "length" "8")])
(define_insn ""
[(set (match_operand:HI 0 "register_operand" "=r")
(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r"))))]
"! TARGET_DISABLE_INDEXING"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"ldhx %2(0,%1),%0\";
else
return \"ldhx %1(0,%2),%0\";
}"
[(set_attr "type" "load")
(set_attr "length" "4")])
(define_insn ""
[(set (match_operand:HI 3 "register_operand" "=r")
(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "0")
@ -1689,6 +1864,25 @@
[(set_attr "type" "move,move,move,shift,load,store,move,fpalu")
(set_attr "length" "4,4,4,4,4,4,4,4")])
(define_insn ""
[(set (match_operand:QI 0 "register_operand" "=r")
(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r"))))]
"! TARGET_DISABLE_INDEXING"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"ldbx %2(0,%1),%0\";
else
return \"ldbx %1(0,%2),%0\";
}"
[(set_attr "type" "load")
(set_attr "length" "4")])
(define_insn ""
[(set (match_operand:QI 3 "register_operand" "=r")
(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "0")
@ -1727,19 +1921,55 @@
""
"
{
/* If the blocks are not at least word-aligned and rather big (>16 items),
or the size is indeterminate, don't inline the copy code. A
procedure call is better since it can check the alignment at
runtime and make the optimal decisions. */
if (INTVAL (operands[3]) < 4
&& (GET_CODE (operands[2]) != CONST_INT
|| (INTVAL (operands[2]) / INTVAL (operands[3]) > 8)))
FAIL;
int size, align;
/* HP provides very fast block move library routine for the PA;
this routine includes:
4x4 byte at a time block moves,
1x4 byte at a time with alignment checked at runtime with
attempts to align the source and destination as needed
1x1 byte loop
With that in mind, here's the heuristics to try and guess when
the inlined block move will be better than the library block
move:
If the size isn't constant, then always use the library routines.
If the size is large in respect to the known alignment, then use
the library routines.
If the size is small in repsect to the known alignment, then open
code the copy (since that will lead to better scheduling).
Else use the block move pattern. */
/* Undetermined size, use the library routine. */
if (GET_CODE (operands[2]) != CONST_INT)
FAIL;
size = INTVAL (operands[2]);
align = INTVAL (operands[3]);
align = align > 4 ? 4 : align;
/* If size/alignment > 8 (eg size is large in respect to alignment),
then use the library routines. */
if (size/align > 16)
FAIL;
/* This does happen, but not often enough to worry much about. */
if (size/align < MOVE_RATIO)
FAIL;
/* Fall through means we're going to use our block move pattern. */
operands[0] = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
emit_insn (gen_movstrsi_internal (operands[0], operands[1], operands[4],
operands[5], operands[2], operands[3],
gen_reg_rtx (SImode)));
DONE;
}")
;; The operand constraints are written like this to support both compile-time
@ -1747,13 +1977,14 @@
;; the register with the byte count is clobbered by the copying code, and
;; therefore it is forced to operand 2. If the count is compile-time
;; determined, we need two scratch registers for the unrolled code.
(define_insn ""
(define_insn "movstrsi_internal"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r"))
(mem:BLK (match_operand:SI 1 "register_operand" "+r,r")))
(clobber (match_dup 0))
(clobber (match_dup 1))
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp
(clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
""
@ -1778,7 +2009,7 @@
&& operands[1] != CONST0_RTX (DFmode)
&& ! TARGET_SOFT_FLOAT"
"* return (which_alternative == 0 ? output_move_double (operands)
: \" fldds%F1 %1,%0\");"
: \"fldds%F1 %1,%0\");"
[(set_attr "type" "move,fpload")
(set_attr "length" "16,4")])
@ -1897,6 +2128,25 @@
[(set_attr "type" "fpload")
(set_attr "length" "8")])
(define_insn ""
[(set (match_operand:DF 0 "register_operand" "=fx")
(mem:DF (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r"))))]
"! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"flddx %2(0,%1),%0\";
else
return \"flddx %1(0,%2),%0\";
}"
[(set_attr "type" "fpload")
(set_attr "length" "4")])
(define_insn ""
[(set (mem:DF (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
(const_int 8))
@ -1936,6 +2186,25 @@
[(set_attr "type" "fpstore")
(set_attr "length" "8")])
(define_insn ""
[(set (mem:DF (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r")))
(match_operand:DF 0 "register_operand" "fx"))]
"! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"fstdx %0,%2(0,%1)\";
else
return \"fstdx %0,%1(0,%2)\";
}"
[(set_attr "type" "fpstore")
(set_attr "length" "4")])
(define_expand "movdi"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "")
(match_operand:DI 1 "general_operand" ""))]
@ -2202,6 +2471,25 @@
[(set_attr "type" "fpload")
(set_attr "length" "8")])
(define_insn ""
[(set (match_operand:SF 0 "register_operand" "=fx")
(mem:SF (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r"))))]
"! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"fldwx %2(0,%1),%0\";
else
return \"fldwx %1(0,%2),%0\";
}"
[(set_attr "type" "fpload")
(set_attr "length" "4")])
(define_insn ""
[(set (mem:SF (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
(const_int 4))
@ -2240,7 +2528,27 @@
}"
[(set_attr "type" "fpstore")
(set_attr "length" "8")])
(define_insn ""
[(set (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "basereg_operand" "r")))
(match_operand:SF 0 "register_operand" "fx"))]
"! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
"*
{
/* Reload can create backwards (relative to cse) unscaled index
address modes when eliminating registers and possibly for
pseudos that don't get hard registers. Deal with it. */
if (operands[1] == hard_frame_pointer_rtx
|| operands[1] == stack_pointer_rtx)
return \"fstwx %0,%2(0,%1)\";
else
return \"fstwx %0,%1(0,%2)\";
}"
[(set_attr "type" "fpstore")
(set_attr "length" "4")])
;;- zero extension instructions
(define_insn "zero_extendhisi2"