re PR rtl-optimization/12630 (Various unrecognizable insns and ICEs at -O3)
PR optimization/12630 * pa.c (compute_movstrsi_length): Rename to compute_movstr_length. Handle length computation 64-bit moves. (compute_clrstr_length, output_block_clear): Implement block clear. (output_block_move): Handle 64-bit moves. (pa_adjust_insn_length): Use compute_movstr_length and compute_clrstr_length. * pa.md (movstrsi): Revise operand order and comments. Don't use match_scratch. (movstrsi_internal): Delete. (movstrsi_prereload, movstrsi_postreload): New insns. Define splitter and peephole2 patterns to transform prereload to postreload form. (movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi, clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload, clrstrdi_postreload): New patterns for 64-bit block move, and block clear. * pa-protos.h (output_block_clear): New prototype. From-SVN: r73375
This commit is contained in:
parent
ac47cc13ab
commit
cdc9103cba
@ -1,3 +1,23 @@
|
||||
2003-11-08 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
PR optimization/12630
|
||||
* pa.c (compute_movstrsi_length): Rename to compute_movstr_length.
|
||||
Handle length computation 64-bit moves.
|
||||
(compute_clrstr_length, output_block_clear): Implement block clear.
|
||||
(output_block_move): Handle 64-bit moves.
|
||||
(pa_adjust_insn_length): Use compute_movstr_length and
|
||||
compute_clrstr_length.
|
||||
* pa.md (movstrsi): Revise operand order and comments. Don't use
|
||||
match_scratch.
|
||||
(movstrsi_internal): Delete.
|
||||
(movstrsi_prereload, movstrsi_postreload): New insns. Define splitter
|
||||
and peephole2 patterns to transform prereload to postreload form.
|
||||
(movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi,
|
||||
clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload,
|
||||
clrstrdi_postreload): New patterns for 64-bit block move, and block
|
||||
clear.
|
||||
* pa-protos.h (output_block_clear): New prototype.
|
||||
|
||||
2003-11-08 Andreas Schwab <schwab@suse.de>
|
||||
|
||||
* dbxout.c (current_file): Also wrap inside DBX_DEBUGGING_INFO ||
|
||||
|
@ -40,6 +40,7 @@ extern const char *output_ior (rtx *);
|
||||
extern const char *output_move_double (rtx *);
|
||||
extern const char *output_fp_move_double (rtx *);
|
||||
extern const char *output_block_move (rtx *, int);
|
||||
extern const char *output_block_clear (rtx *, int);
|
||||
extern const char *output_cbranch (rtx *, int, int, int, rtx);
|
||||
extern const char *output_lbranch (rtx, rtx);
|
||||
extern const char *output_bb (rtx *, int, int, int, rtx, int);
|
||||
|
@ -103,7 +103,8 @@ static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
|
||||
static int forward_branch_p (rtx);
|
||||
static int shadd_constant_p (int);
|
||||
static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
|
||||
static int compute_movstrsi_length (rtx);
|
||||
static int compute_movstr_length (rtx);
|
||||
static int compute_clrstr_length (rtx);
|
||||
static bool pa_assemble_integer (rtx, unsigned int, int);
|
||||
static void remove_useless_addtr_insns (int);
|
||||
static void store_reg (int, int, int);
|
||||
@ -2431,8 +2432,8 @@ find_addr_reg (rtx addr)
|
||||
OPERANDS[0] is the destination pointer as a REG, clobbered.
|
||||
OPERANDS[1] is the source pointer as a REG, clobbered.
|
||||
OPERANDS[2] is a register for temporary storage.
|
||||
OPERANDS[4] is the size as a CONST_INT
|
||||
OPERANDS[3] is a register for temporary storage.
|
||||
OPERANDS[4] is the size as a CONST_INT
|
||||
OPERANDS[5] is the alignment safe to use, as a CONST_INT.
|
||||
OPERANDS[6] is another temporary register. */
|
||||
|
||||
@ -2442,15 +2443,43 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
|
||||
int align = INTVAL (operands[5]);
|
||||
unsigned long n_bytes = INTVAL (operands[4]);
|
||||
|
||||
/* We can't move more than four bytes at a time because the PA
|
||||
/* We can't move more than a word at a time because the PA
|
||||
has no longer integer move insns. (Could use fp mem ops?) */
|
||||
if (align > 4)
|
||||
align = 4;
|
||||
if (align > (TARGET_64BIT ? 8 : 4))
|
||||
align = (TARGET_64BIT ? 8 : 4);
|
||||
|
||||
/* Note that we know each loop below will execute at least twice
|
||||
(else we would have open-coded the copy). */
|
||||
switch (align)
|
||||
{
|
||||
case 8:
|
||||
/* Pre-adjust the loop counter. */
|
||||
operands[4] = GEN_INT (n_bytes - 16);
|
||||
output_asm_insn ("ldi %4,%2", operands);
|
||||
|
||||
/* Copying loop. */
|
||||
output_asm_insn ("ldd,ma 8(%1),%3", operands);
|
||||
output_asm_insn ("ldd,ma 8(%1),%6", operands);
|
||||
output_asm_insn ("std,ma %3,8(%0)", operands);
|
||||
output_asm_insn ("addib,>= -16,%2,.-12", operands);
|
||||
output_asm_insn ("std,ma %6,8(%0)", operands);
|
||||
|
||||
/* Handle the residual. There could be up to 7 bytes of
|
||||
residual to copy! */
|
||||
if (n_bytes % 16 != 0)
|
||||
{
|
||||
operands[4] = GEN_INT (n_bytes % 8);
|
||||
if (n_bytes % 16 >= 8)
|
||||
output_asm_insn ("ldd,ma 8(%1),%3", operands);
|
||||
if (n_bytes % 8 != 0)
|
||||
output_asm_insn ("ldd 0(%1),%6", operands);
|
||||
if (n_bytes % 16 >= 8)
|
||||
output_asm_insn ("std,ma %3,8(%0)", operands);
|
||||
if (n_bytes % 8 != 0)
|
||||
output_asm_insn ("stdby,e %6,%4(%0)", operands);
|
||||
}
|
||||
return "";
|
||||
|
||||
case 4:
|
||||
/* Pre-adjust the loop counter. */
|
||||
operands[4] = GEN_INT (n_bytes - 8);
|
||||
@ -2536,7 +2565,7 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
|
||||
count insns rather than emit them. */
|
||||
|
||||
static int
|
||||
compute_movstrsi_length (rtx insn)
|
||||
compute_movstr_length (rtx insn)
|
||||
{
|
||||
rtx pat = PATTERN (insn);
|
||||
unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
|
||||
@ -2545,8 +2574,8 @@ compute_movstrsi_length (rtx insn)
|
||||
|
||||
/* We can't move more than four bytes at a time because the PA
|
||||
has no longer integer move insns. (Could use fp mem ops?) */
|
||||
if (align > 4)
|
||||
align = 4;
|
||||
if (align > (TARGET_64BIT ? 8 : 4))
|
||||
align = (TARGET_64BIT ? 8 : 4);
|
||||
|
||||
/* The basic copying loop. */
|
||||
n_insns = 6;
|
||||
@ -2564,6 +2593,148 @@ compute_movstrsi_length (rtx insn)
|
||||
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
|
||||
return n_insns * 4;
|
||||
}
|
||||
|
||||
/* Emit code to perform a block clear.
|
||||
|
||||
OPERANDS[0] is the destination pointer as a REG, clobbered.
|
||||
OPERANDS[1] is a register for temporary storage.
|
||||
OPERANDS[2] is the size as a CONST_INT
|
||||
OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
|
||||
|
||||
const char *
|
||||
output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
|
||||
{
|
||||
int align = INTVAL (operands[3]);
|
||||
unsigned long n_bytes = INTVAL (operands[2]);
|
||||
|
||||
/* We can't clear more than a word at a time because the PA
|
||||
has no longer integer move insns. */
|
||||
if (align > (TARGET_64BIT ? 8 : 4))
|
||||
align = (TARGET_64BIT ? 8 : 4);
|
||||
|
||||
/* Note that we know each loop below will execute at least twice
|
||||
(else we would have open-coded the copy). */
|
||||
switch (align)
|
||||
{
|
||||
case 8:
|
||||
/* Pre-adjust the loop counter. */
|
||||
operands[2] = GEN_INT (n_bytes - 16);
|
||||
output_asm_insn ("ldi %2,%1", operands);
|
||||
|
||||
/* Loop. */
|
||||
output_asm_insn ("std,ma %%r0,8(%0)", operands);
|
||||
output_asm_insn ("addib,>= -16,%1,.-4", operands);
|
||||
output_asm_insn ("std,ma %%r0,8(%0)", operands);
|
||||
|
||||
/* Handle the residual. There could be up to 7 bytes of
|
||||
residual to copy! */
|
||||
if (n_bytes % 16 != 0)
|
||||
{
|
||||
operands[2] = GEN_INT (n_bytes % 8);
|
||||
if (n_bytes % 16 >= 8)
|
||||
output_asm_insn ("std,ma %%r0,8(%0)", operands);
|
||||
if (n_bytes % 8 != 0)
|
||||
output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
|
||||
}
|
||||
return "";
|
||||
|
||||
case 4:
|
||||
/* Pre-adjust the loop counter. */
|
||||
operands[2] = GEN_INT (n_bytes - 8);
|
||||
output_asm_insn ("ldi %2,%1", operands);
|
||||
|
||||
/* Loop. */
|
||||
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
|
||||
output_asm_insn ("addib,>= -8,%1,.-4", operands);
|
||||
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
|
||||
|
||||
/* Handle the residual. There could be up to 7 bytes of
|
||||
residual to copy! */
|
||||
if (n_bytes % 8 != 0)
|
||||
{
|
||||
operands[2] = GEN_INT (n_bytes % 4);
|
||||
if (n_bytes % 8 >= 4)
|
||||
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
|
||||
if (n_bytes % 4 != 0)
|
||||
output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
|
||||
}
|
||||
return "";
|
||||
|
||||
case 2:
|
||||
/* Pre-adjust the loop counter. */
|
||||
operands[2] = GEN_INT (n_bytes - 4);
|
||||
output_asm_insn ("ldi %2,%1", operands);
|
||||
|
||||
/* Loop. */
|
||||
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
|
||||
output_asm_insn ("addib,>= -4,%1,.-4", operands);
|
||||
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
|
||||
|
||||
/* Handle the residual. */
|
||||
if (n_bytes % 4 != 0)
|
||||
{
|
||||
if (n_bytes % 4 >= 2)
|
||||
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
|
||||
if (n_bytes % 2 != 0)
|
||||
output_asm_insn ("stb %%r0,0(%0)", operands);
|
||||
}
|
||||
return "";
|
||||
|
||||
case 1:
|
||||
/* Pre-adjust the loop counter. */
|
||||
operands[2] = GEN_INT (n_bytes - 2);
|
||||
output_asm_insn ("ldi %2,%1", operands);
|
||||
|
||||
/* Loop. */
|
||||
output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
|
||||
output_asm_insn ("addib,>= -2,%1,.-4", operands);
|
||||
output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
|
||||
|
||||
/* Handle the residual. */
|
||||
if (n_bytes % 2 != 0)
|
||||
output_asm_insn ("stb %%r0,0(%0)", operands);
|
||||
|
||||
return "";
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Count the number of insns necessary to handle this block move.
|
||||
|
||||
Basic structure is the same as emit_block_move, except that we
|
||||
count insns rather than emit them. */
|
||||
|
||||
static int
|
||||
compute_clrstr_length (rtx insn)
|
||||
{
|
||||
rtx pat = PATTERN (insn);
|
||||
unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
|
||||
unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
|
||||
unsigned int n_insns = 0;
|
||||
|
||||
/* We can't clear more than a word at a time because the PA
|
||||
has no longer integer move insns. */
|
||||
if (align > (TARGET_64BIT ? 8 : 4))
|
||||
align = (TARGET_64BIT ? 8 : 4);
|
||||
|
||||
/* The basic loop. */
|
||||
n_insns = 4;
|
||||
|
||||
/* Residuals. */
|
||||
if (n_bytes % (2 * align) != 0)
|
||||
{
|
||||
if ((n_bytes % (2 * align)) >= align)
|
||||
n_insns++;
|
||||
|
||||
if ((n_bytes % align) != 0)
|
||||
n_insns++;
|
||||
}
|
||||
|
||||
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
|
||||
return n_insns * 4;
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
@ -4337,7 +4508,15 @@ pa_adjust_insn_length (rtx insn, int length)
|
||||
&& GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
|
||||
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
|
||||
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
|
||||
return compute_movstrsi_length (insn) - 4;
|
||||
return compute_movstr_length (insn) - 4;
|
||||
/* Block clear pattern. */
|
||||
else if (GET_CODE (insn) == INSN
|
||||
&& GET_CODE (pat) == PARALLEL
|
||||
&& GET_CODE (XVECEXP (pat, 0, 0)) == SET
|
||||
&& GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
|
||||
&& XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
|
||||
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
|
||||
return compute_clrstr_length (insn) - 4;
|
||||
/* Conditional branch with an unfilled delay slot. */
|
||||
else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
|
||||
{
|
||||
|
@ -2955,20 +2955,20 @@
|
||||
(set_attr "length" "4")])
|
||||
|
||||
;; The definition of this insn does not really explain what it does,
|
||||
;; but it should suffice
|
||||
;; that anything generated as this insn will be recognized as one
|
||||
;; and that it will not successfully combine with anything.
|
||||
;; but it should suffice that anything generated as this insn will be
|
||||
;; recognized as a movstrsi operation, and that it will not successfully
|
||||
;; combine with anything.
|
||||
(define_expand "movstrsi"
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(match_operand:BLK 1 "" ""))
|
||||
(clobber (match_scratch:SI 7 ""))
|
||||
(clobber (match_scratch:SI 8 ""))
|
||||
(clobber (match_dup 4))
|
||||
(clobber (match_dup 5))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(clobber (match_dup 8))
|
||||
(use (match_operand:SI 2 "arith_operand" ""))
|
||||
(use (match_operand:SI 3 "const_int_operand" ""))])]
|
||||
"!TARGET_64BIT"
|
||||
"!TARGET_64BIT && optimize > 0"
|
||||
"
|
||||
{
|
||||
int size, align;
|
||||
@ -2990,7 +2990,7 @@
|
||||
If the size is large in respect to the known alignment, then use
|
||||
the library routines.
|
||||
|
||||
If the size is small in repsect to the known alignment, then open
|
||||
If the size is small in respect to the known alignment, then open
|
||||
code the copy (since that will lead to better scheduling).
|
||||
|
||||
Else use the block move pattern. */
|
||||
@ -3003,8 +3003,7 @@
|
||||
align = INTVAL (operands[3]);
|
||||
align = align > 4 ? 4 : align;
|
||||
|
||||
/* If size/alignment > 8 (eg size is large in respect to alignment),
|
||||
then use the library routines. */
|
||||
/* If size/alignment is large, then use the library routines. */
|
||||
if (size / align > 16)
|
||||
FAIL;
|
||||
|
||||
@ -3022,28 +3021,470 @@
|
||||
operands[4] = gen_reg_rtx (SImode);
|
||||
operands[5] = gen_reg_rtx (SImode);
|
||||
operands[6] = gen_reg_rtx (SImode);
|
||||
operands[7] = XEXP (operands[0], 0);
|
||||
operands[8] = XEXP (operands[1], 0);
|
||||
operands[7] = gen_reg_rtx (SImode);
|
||||
operands[8] = gen_reg_rtx (SImode);
|
||||
}")
|
||||
|
||||
;; The operand constraints are written like this to support both compile-time
|
||||
;; and run-time determined byte count. If the count is run-time determined,
|
||||
;; the register with the byte count is clobbered by the copying code, and
|
||||
;; therefore it is forced to operand 2. If the count is compile-time
|
||||
;; determined, we need two scratch registers for the unrolled code.
|
||||
(define_insn "movstrsi_internal"
|
||||
;; and run-time determined byte counts. The expander and output_block_move
|
||||
;; only support compile-time determined counts at this time.
|
||||
;;
|
||||
;; If the count is run-time determined, the register with the byte count
|
||||
;; is clobbered by the copying code, and therefore it is forced to operand 2.
|
||||
;;
|
||||
;; We used to clobber operands 0 and 1. However, a change to regrename.c
|
||||
;; broke this semantic for pseudo registers. We can't use match_scratch
|
||||
;; as this requires two registers in the class R1_REGS when the MEMs for
|
||||
;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are
|
||||
;; forced to internally copy operands 0 and 1 to operands 7 and 8,
|
||||
;; respectively. We then split or peephole optimize after reload.
|
||||
(define_insn "movstrsi_prereload"
|
||||
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
|
||||
(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
|
||||
(clobber (match_scratch:SI 7 "=0,0"))
|
||||
(clobber (match_scratch:SI 8 "=1,1"))
|
||||
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp
|
||||
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1
|
||||
(clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
|
||||
(clobber (match_operand:SI 7 "register_operand" "=&r,&r")) ;item tmp3
|
||||
(clobber (match_operand:SI 8 "register_operand" "=&r,&r")) ;item tmp4
|
||||
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
|
||||
(use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
|
||||
"!TARGET_64BIT"
|
||||
"#"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_split
|
||||
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
|
||||
(mem:BLK (match_operand:SI 1 "register_operand" "")))
|
||||
(clobber (match_operand:SI 2 "register_operand" ""))
|
||||
(clobber (match_operand:SI 3 "register_operand" ""))
|
||||
(clobber (match_operand:SI 6 "register_operand" ""))
|
||||
(clobber (match_operand:SI 7 "register_operand" ""))
|
||||
(clobber (match_operand:SI 8 "register_operand" ""))
|
||||
(use (match_operand:SI 4 "arith_operand" ""))
|
||||
(use (match_operand:SI 5 "const_int_operand" ""))])]
|
||||
"!TARGET_64BIT && reload_completed && !flag_peephole2"
|
||||
[(set (match_dup 7) (match_dup 0))
|
||||
(set (match_dup 8) (match_dup 1))
|
||||
(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(clobber (match_dup 8))
|
||||
(use (match_dup 4))
|
||||
(use (match_dup 5))
|
||||
(const_int 0)])]
|
||||
"")
|
||||
|
||||
(define_peephole2
|
||||
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
|
||||
(mem:BLK (match_operand:SI 1 "register_operand" "")))
|
||||
(clobber (match_operand:SI 2 "register_operand" ""))
|
||||
(clobber (match_operand:SI 3 "register_operand" ""))
|
||||
(clobber (match_operand:SI 6 "register_operand" ""))
|
||||
(clobber (match_operand:SI 7 "register_operand" ""))
|
||||
(clobber (match_operand:SI 8 "register_operand" ""))
|
||||
(use (match_operand:SI 4 "arith_operand" ""))
|
||||
(use (match_operand:SI 5 "const_int_operand" ""))])]
|
||||
"!TARGET_64BIT"
|
||||
[(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(clobber (match_dup 8))
|
||||
(use (match_dup 4))
|
||||
(use (match_dup 5))
|
||||
(const_int 0)])]
|
||||
"
|
||||
{
|
||||
if (dead_or_set_p (curr_insn, operands[0]))
|
||||
operands[7] = operands[0];
|
||||
else
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0]));
|
||||
|
||||
if (dead_or_set_p (curr_insn, operands[1]))
|
||||
operands[8] = operands[1];
|
||||
else
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1]));
|
||||
}")
|
||||
|
||||
(define_insn "movstrsi_postreload"
|
||||
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
|
||||
(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
|
||||
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1
|
||||
(clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
|
||||
(clobber (match_dup 0))
|
||||
(clobber (match_dup 1))
|
||||
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
|
||||
(use (match_operand:SI 5 "const_int_operand" "n,n")) ;alignment
|
||||
(const_int 0)]
|
||||
"!TARGET_64BIT && reload_completed"
|
||||
"* return output_block_move (operands, !which_alternative);"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_expand "movstrdi"
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(match_operand:BLK 1 "" ""))
|
||||
(clobber (match_dup 4))
|
||||
(clobber (match_dup 5))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(clobber (match_dup 8))
|
||||
(use (match_operand:DI 2 "arith_operand" ""))
|
||||
(use (match_operand:DI 3 "const_int_operand" ""))])]
|
||||
"TARGET_64BIT && optimize > 0"
|
||||
"
|
||||
{
|
||||
int size, align;
|
||||
|
||||
/* HP provides very fast block move library routine for the PA;
|
||||
this routine includes:
|
||||
|
||||
4x4 byte at a time block moves,
|
||||
1x4 byte at a time with alignment checked at runtime with
|
||||
attempts to align the source and destination as needed
|
||||
1x1 byte loop
|
||||
|
||||
With that in mind, here's the heuristics to try and guess when
|
||||
the inlined block move will be better than the library block
|
||||
move:
|
||||
|
||||
If the size isn't constant, then always use the library routines.
|
||||
|
||||
If the size is large in respect to the known alignment, then use
|
||||
the library routines.
|
||||
|
||||
If the size is small in respect to the known alignment, then open
|
||||
code the copy (since that will lead to better scheduling).
|
||||
|
||||
Else use the block move pattern. */
|
||||
|
||||
/* Undetermined size, use the library routine. */
|
||||
if (GET_CODE (operands[2]) != CONST_INT)
|
||||
FAIL;
|
||||
|
||||
size = INTVAL (operands[2]);
|
||||
align = INTVAL (operands[3]);
|
||||
align = align > 8 ? 8 : align;
|
||||
|
||||
/* If size/alignment is large, then use the library routines. */
|
||||
if (size / align > 16)
|
||||
FAIL;
|
||||
|
||||
/* This does happen, but not often enough to worry much about. */
|
||||
if (size / align < MOVE_RATIO)
|
||||
FAIL;
|
||||
|
||||
/* Fall through means we're going to use our block move pattern. */
|
||||
operands[0]
|
||||
= replace_equiv_address (operands[0],
|
||||
copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
|
||||
operands[1]
|
||||
= replace_equiv_address (operands[1],
|
||||
copy_to_mode_reg (DImode, XEXP (operands[1], 0)));
|
||||
operands[4] = gen_reg_rtx (DImode);
|
||||
operands[5] = gen_reg_rtx (DImode);
|
||||
operands[6] = gen_reg_rtx (DImode);
|
||||
operands[7] = gen_reg_rtx (DImode);
|
||||
operands[8] = gen_reg_rtx (DImode);
|
||||
}")
|
||||
|
||||
;; The operand constraints are written like this to support both compile-time
|
||||
;; and run-time determined byte counts. The expander and output_block_move
|
||||
;; only support compile-time determined counts at this time.
|
||||
;;
|
||||
;; If the count is run-time determined, the register with the byte count
|
||||
;; is clobbered by the copying code, and therefore it is forced to operand 2.
|
||||
;;
|
||||
;; We used to clobber operands 0 and 1. However, a change to regrename.c
|
||||
;; broke this semantic for pseudo registers. We can't use match_scratch
|
||||
;; as this requires two registers in the class R1_REGS when the MEMs for
|
||||
;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are
|
||||
;; forced to internally copy operands 0 and 1 to operands 7 and 8,
|
||||
;; respectively. We then split or peephole optimize after reload.
|
||||
(define_insn "movstrdi_prereload"
|
||||
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
|
||||
(mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
|
||||
(clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1
|
||||
(clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2
|
||||
(clobber (match_operand:DI 7 "register_operand" "=&r,&r")) ;item tmp3
|
||||
(clobber (match_operand:DI 8 "register_operand" "=&r,&r")) ;item tmp4
|
||||
(use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count
|
||||
(use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment
|
||||
"TARGET_64BIT"
|
||||
"#"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_split
|
||||
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
|
||||
(mem:BLK (match_operand:DI 1 "register_operand" "")))
|
||||
(clobber (match_operand:DI 2 "register_operand" ""))
|
||||
(clobber (match_operand:DI 3 "register_operand" ""))
|
||||
(clobber (match_operand:DI 6 "register_operand" ""))
|
||||
(clobber (match_operand:DI 7 "register_operand" ""))
|
||||
(clobber (match_operand:DI 8 "register_operand" ""))
|
||||
(use (match_operand:DI 4 "arith_operand" ""))
|
||||
(use (match_operand:DI 5 "const_int_operand" ""))])]
|
||||
"TARGET_64BIT && reload_completed && !flag_peephole2"
|
||||
[(set (match_dup 7) (match_dup 0))
|
||||
(set (match_dup 8) (match_dup 1))
|
||||
(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(clobber (match_dup 8))
|
||||
(use (match_dup 4))
|
||||
(use (match_dup 5))
|
||||
(const_int 0)])]
|
||||
"")
|
||||
|
||||
(define_peephole2
|
||||
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
|
||||
(mem:BLK (match_operand:DI 1 "register_operand" "")))
|
||||
(clobber (match_operand:DI 2 "register_operand" ""))
|
||||
(clobber (match_operand:DI 3 "register_operand" ""))
|
||||
(clobber (match_operand:DI 6 "register_operand" ""))
|
||||
(clobber (match_operand:DI 7 "register_operand" ""))
|
||||
(clobber (match_operand:DI 8 "register_operand" ""))
|
||||
(use (match_operand:DI 4 "arith_operand" ""))
|
||||
(use (match_operand:DI 5 "const_int_operand" ""))])]
|
||||
"TARGET_64BIT"
|
||||
[(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
|
||||
(clobber (match_dup 2))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (match_dup 6))
|
||||
(clobber (match_dup 7))
|
||||
(clobber (match_dup 8))
|
||||
(use (match_dup 4))
|
||||
(use (match_dup 5))
|
||||
(const_int 0)])]
|
||||
"
|
||||
{
|
||||
if (dead_or_set_p (curr_insn, operands[0]))
|
||||
operands[7] = operands[0];
|
||||
else
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0]));
|
||||
|
||||
if (dead_or_set_p (curr_insn, operands[1]))
|
||||
operands[8] = operands[1];
|
||||
else
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1]));
|
||||
}")
|
||||
|
||||
(define_insn "movstrdi_postreload"
|
||||
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
|
||||
(mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
|
||||
(clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1
|
||||
(clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2
|
||||
(clobber (match_dup 0))
|
||||
(clobber (match_dup 1))
|
||||
(use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count
|
||||
(use (match_operand:DI 5 "const_int_operand" "n,n")) ;alignment
|
||||
(const_int 0)]
|
||||
"TARGET_64BIT && reload_completed"
|
||||
"* return output_block_move (operands, !which_alternative);"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_expand "clrstrsi"
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(const_int 0))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (match_dup 4))
|
||||
(use (match_operand:SI 1 "arith_operand" ""))
|
||||
(use (match_operand:SI 2 "const_int_operand" ""))])]
|
||||
"!TARGET_64BIT && optimize > 0"
|
||||
"
|
||||
{
|
||||
int size, align;
|
||||
|
||||
/* Undetermined size, use the library routine. */
|
||||
if (GET_CODE (operands[1]) != CONST_INT)
|
||||
FAIL;
|
||||
|
||||
size = INTVAL (operands[1]);
|
||||
align = INTVAL (operands[2]);
|
||||
align = align > 4 ? 4 : align;
|
||||
|
||||
/* If size/alignment is large, then use the library routines. */
|
||||
if (size / align > 16)
|
||||
FAIL;
|
||||
|
||||
/* This does happen, but not often enough to worry much about. */
|
||||
if (size / align < MOVE_RATIO)
|
||||
FAIL;
|
||||
|
||||
/* Fall through means we're going to use our block clear pattern. */
|
||||
operands[0]
|
||||
= replace_equiv_address (operands[0],
|
||||
copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
|
||||
operands[3] = gen_reg_rtx (SImode);
|
||||
operands[4] = gen_reg_rtx (SImode);
|
||||
}")
|
||||
|
||||
(define_insn "clrstrsi_prereload"
|
||||
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_operand:SI 4 "register_operand" "=&r,&r")) ;tmp1
|
||||
(use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count
|
||||
(use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment
|
||||
"!TARGET_64BIT"
|
||||
"#"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_split
|
||||
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:SI 1 "register_operand" ""))
|
||||
(clobber (match_operand:SI 4 "register_operand" ""))
|
||||
(use (match_operand:SI 2 "arith_operand" ""))
|
||||
(use (match_operand:SI 3 "const_int_operand" ""))])]
|
||||
"!TARGET_64BIT && reload_completed && !flag_peephole2"
|
||||
[(set (match_dup 4) (match_dup 0))
|
||||
(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
|
||||
(clobber (match_dup 1))
|
||||
(clobber (match_dup 4))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(const_int 0)])]
|
||||
"")
|
||||
|
||||
(define_peephole2
|
||||
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:SI 1 "register_operand" ""))
|
||||
(clobber (match_operand:SI 4 "register_operand" ""))
|
||||
(use (match_operand:SI 2 "arith_operand" ""))
|
||||
(use (match_operand:SI 3 "const_int_operand" ""))])]
|
||||
"!TARGET_64BIT"
|
||||
[(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
|
||||
(clobber (match_dup 1))
|
||||
(clobber (match_dup 4))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(const_int 0)])]
|
||||
"
|
||||
{
|
||||
if (dead_or_set_p (curr_insn, operands[0]))
|
||||
operands[4] = operands[0];
|
||||
else
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0]));
|
||||
}")
|
||||
|
||||
(define_insn "clrstrsi_postreload"
|
||||
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_dup 0))
|
||||
(use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count
|
||||
(use (match_operand:SI 3 "const_int_operand" "n,n")) ;alignment
|
||||
(const_int 0)]
|
||||
"!TARGET_64BIT && reload_completed"
|
||||
"* return output_block_clear (operands, !which_alternative);"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_expand "clrstrdi"
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(const_int 0))
|
||||
(clobber (match_dup 3))
|
||||
(clobber (match_dup 4))
|
||||
(use (match_operand:DI 1 "arith_operand" ""))
|
||||
(use (match_operand:DI 2 "const_int_operand" ""))])]
|
||||
"TARGET_64BIT && optimize > 0"
|
||||
"
|
||||
{
|
||||
int size, align;
|
||||
|
||||
/* Undetermined size, use the library routine. */
|
||||
if (GET_CODE (operands[1]) != CONST_INT)
|
||||
FAIL;
|
||||
|
||||
size = INTVAL (operands[1]);
|
||||
align = INTVAL (operands[2]);
|
||||
align = align > 8 ? 8 : align;
|
||||
|
||||
/* If size/alignment is large, then use the library routines. */
|
||||
if (size / align > 16)
|
||||
FAIL;
|
||||
|
||||
/* This does happen, but not often enough to worry much about. */
|
||||
if (size / align < MOVE_RATIO)
|
||||
FAIL;
|
||||
|
||||
/* Fall through means we're going to use our block clear pattern. */
|
||||
operands[0]
|
||||
= replace_equiv_address (operands[0],
|
||||
copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
|
||||
operands[3] = gen_reg_rtx (DImode);
|
||||
operands[4] = gen_reg_rtx (DImode);
|
||||
}")
|
||||
|
||||
(define_insn "clrstrdi_prereload"
|
||||
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_operand:DI 4 "register_operand" "=&r,&r")) ;item tmp1
|
||||
(use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count
|
||||
(use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment
|
||||
"TARGET_64BIT"
|
||||
"#"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
(define_split
|
||||
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:DI 1 "register_operand" ""))
|
||||
(clobber (match_operand:DI 4 "register_operand" ""))
|
||||
(use (match_operand:DI 2 "arith_operand" ""))
|
||||
(use (match_operand:DI 3 "const_int_operand" ""))])]
|
||||
"TARGET_64BIT && reload_completed && !flag_peephole2"
|
||||
[(set (match_dup 4) (match_dup 0))
|
||||
(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
|
||||
(clobber (match_dup 1))
|
||||
(clobber (match_dup 4))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(const_int 0)])]
|
||||
"")
|
||||
|
||||
(define_peephole2
|
||||
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:DI 1 "register_operand" ""))
|
||||
(clobber (match_operand:DI 4 "register_operand" ""))
|
||||
(use (match_operand:DI 2 "arith_operand" ""))
|
||||
(use (match_operand:DI 3 "const_int_operand" ""))])]
|
||||
"TARGET_64BIT"
|
||||
[(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
|
||||
(clobber (match_dup 1))
|
||||
(clobber (match_dup 4))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(const_int 0)])]
|
||||
"
|
||||
{
|
||||
if (dead_or_set_p (curr_insn, operands[0]))
|
||||
operands[4] = operands[0];
|
||||
else
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0]));
|
||||
}")
|
||||
|
||||
(define_insn "clrstrdi_postreload"
|
||||
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
|
||||
(const_int 0))
|
||||
(clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp
|
||||
(clobber (match_dup 0))
|
||||
(use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count
|
||||
(use (match_operand:DI 3 "const_int_operand" "n,n")) ;alignment
|
||||
(const_int 0)]
|
||||
"TARGET_64BIT && reload_completed"
|
||||
"* return output_block_clear (operands, !which_alternative);"
|
||||
[(set_attr "type" "multi,multi")])
|
||||
|
||||
;; Floating point move insns
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user