re PR rtl-optimization/12630 (Various unrecognizable insns and ICEs at -O3)

PR optimization/12630
	* pa.c (compute_movstrsi_length): Rename to compute_movstr_length.
	Handle length computation 64-bit moves.
	(compute_clrstr_length, output_block_clear): Implement block clear.
	(output_block_move): Handle 64-bit moves.
	(pa_adjust_insn_length): Use compute_movstr_length and
	compute_clrstr_length.
	* pa.md (movstrsi): Revise operand order and comments.  Don't use
	match_scratch.
	(movstrsi_internal): Delete.
	(movstrsi_prereload, movstrsi_postreload): New insns.  Define splitter
	and peephole2 patterns to transform prereload to postreload form.
	(movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi,
	clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload,
	clrstrdi_postreload): New patterns for 64-bit block move, and block
	clear.
	* pa-protos.h (output_block_clear): New prototype.

From-SVN: r73375
This commit is contained in:
John David Anglin 2003-11-08 20:36:30 +00:00 committed by John David Anglin
parent ac47cc13ab
commit cdc9103cba
4 changed files with 669 additions and 28 deletions

View File

@ -1,3 +1,23 @@
2003-11-08 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
PR optimization/12630
* pa.c (compute_movstrsi_length): Rename to compute_movstr_length.
Handle length computation 64-bit moves.
(compute_clrstr_length, output_block_clear): Implement block clear.
(output_block_move): Handle 64-bit moves.
(pa_adjust_insn_length): Use compute_movstr_length and
compute_clrstr_length.
* pa.md (movstrsi): Revise operand order and comments. Don't use
match_scratch.
(movstrsi_internal): Delete.
(movstrsi_prereload, movstrsi_postreload): New insns. Define splitter
and peephole2 patterns to transform prereload to postreload form.
(movstrdi, movstrdi_prereload, movstrdi_postreload, clrstrsi,
clrstrsi_prereload, clrstrsi_postreload, clrstrdi, clrstrdi_prereload,
clrstrdi_postreload): New patterns for 64-bit block move, and block
clear.
* pa-protos.h (output_block_clear): New prototype.
2003-11-08 Andreas Schwab <schwab@suse.de>
* dbxout.c (current_file): Also wrap inside DBX_DEBUGGING_INFO ||

View File

@ -40,6 +40,7 @@ extern const char *output_ior (rtx *);
extern const char *output_move_double (rtx *);
extern const char *output_fp_move_double (rtx *);
extern const char *output_block_move (rtx *, int);
extern const char *output_block_clear (rtx *, int);
extern const char *output_cbranch (rtx *, int, int, int, rtx);
extern const char *output_lbranch (rtx, rtx);
extern const char *output_bb (rtx *, int, int, int, rtx, int);

View File

@ -103,7 +103,8 @@ static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
static int forward_branch_p (rtx);
static int shadd_constant_p (int);
static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
static int compute_movstrsi_length (rtx);
static int compute_movstr_length (rtx);
static int compute_clrstr_length (rtx);
static bool pa_assemble_integer (rtx, unsigned int, int);
static void remove_useless_addtr_insns (int);
static void store_reg (int, int, int);
@ -2431,8 +2432,8 @@ find_addr_reg (rtx addr)
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is the source pointer as a REG, clobbered.
OPERANDS[2] is a register for temporary storage.
OPERANDS[4] is the size as a CONST_INT
OPERANDS[3] is a register for temporary storage.
OPERANDS[4] is the size as a CONST_INT
OPERANDS[5] is the alignment safe to use, as a CONST_INT.
OPERANDS[6] is another temporary register. */
@ -2442,15 +2443,43 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
int align = INTVAL (operands[5]);
unsigned long n_bytes = INTVAL (operands[4]);
/* We can't move more than four bytes at a time because the PA
/* We can't move more than a word at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
case 8:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 16);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("ldd,ma 8(%1),%3", operands);
output_asm_insn ("ldd,ma 8(%1),%6", operands);
output_asm_insn ("std,ma %3,8(%0)", operands);
output_asm_insn ("addib,>= -16,%2,.-12", operands);
output_asm_insn ("std,ma %6,8(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 16 != 0)
{
operands[4] = GEN_INT (n_bytes % 8);
if (n_bytes % 16 >= 8)
output_asm_insn ("ldd,ma 8(%1),%3", operands);
if (n_bytes % 8 != 0)
output_asm_insn ("ldd 0(%1),%6", operands);
if (n_bytes % 16 >= 8)
output_asm_insn ("std,ma %3,8(%0)", operands);
if (n_bytes % 8 != 0)
output_asm_insn ("stdby,e %6,%4(%0)", operands);
}
return "";
case 4:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 8);
@ -2536,7 +2565,7 @@ output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
count insns rather than emit them. */
static int
compute_movstrsi_length (rtx insn)
compute_movstr_length (rtx insn)
{
rtx pat = PATTERN (insn);
unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
@ -2545,8 +2574,8 @@ compute_movstrsi_length (rtx insn)
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* The basic copying loop. */
n_insns = 6;
@ -2564,6 +2593,148 @@ compute_movstrsi_length (rtx insn)
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
return n_insns * 4;
}
/* Emit code to perform a block clear.
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is a register for temporary storage.
OPERANDS[2] is the size as a CONST_INT
OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
const char *
output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
{
int align = INTVAL (operands[3]);
unsigned long n_bytes = INTVAL (operands[2]);
/* We can't clear more than a word at a time because the PA
has no longer integer move insns. */
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
case 8:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 16);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("std,ma %%r0,8(%0)", operands);
output_asm_insn ("addib,>= -16,%1,.-4", operands);
output_asm_insn ("std,ma %%r0,8(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 16 != 0)
{
operands[2] = GEN_INT (n_bytes % 8);
if (n_bytes % 16 >= 8)
output_asm_insn ("std,ma %%r0,8(%0)", operands);
if (n_bytes % 8 != 0)
output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
}
return "";
case 4:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 8);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
output_asm_insn ("addib,>= -8,%1,.-4", operands);
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 8 != 0)
{
operands[2] = GEN_INT (n_bytes % 4);
if (n_bytes % 8 >= 4)
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
}
return "";
case 2:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 4);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
output_asm_insn ("addib,>= -4,%1,.-4", operands);
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
/* Handle the residual. */
if (n_bytes % 4 != 0)
{
if (n_bytes % 4 >= 2)
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("stb %%r0,0(%0)", operands);
}
return "";
case 1:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 2);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
output_asm_insn ("addib,>= -2,%1,.-4", operands);
output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
/* Handle the residual. */
if (n_bytes % 2 != 0)
output_asm_insn ("stb %%r0,0(%0)", operands);
return "";
default:
abort ();
}
}
/* Count the number of insns necessary to handle this block move.
Basic structure is the same as emit_block_move, except that we
count insns rather than emit them. */
static int
compute_clrstr_length (rtx insn)
{
rtx pat = PATTERN (insn);
unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
unsigned int n_insns = 0;
/* We can't clear more than a word at a time because the PA
has no longer integer move insns. */
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* The basic loop. */
n_insns = 4;
/* Residuals. */
if (n_bytes % (2 * align) != 0)
{
if ((n_bytes % (2 * align)) >= align)
n_insns++;
if ((n_bytes % align) != 0)
n_insns++;
}
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
return n_insns * 4;
}
const char *
@ -4337,7 +4508,15 @@ pa_adjust_insn_length (rtx insn, int length)
&& GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
return compute_movstrsi_length (insn) - 4;
return compute_movstr_length (insn) - 4;
/* Block clear pattern. */
else if (GET_CODE (insn) == INSN
&& GET_CODE (pat) == PARALLEL
&& GET_CODE (XVECEXP (pat, 0, 0)) == SET
&& GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
&& XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
&& GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
return compute_clrstr_length (insn) - 4;
/* Conditional branch with an unfilled delay slot. */
else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
{

View File

@ -2955,20 +2955,20 @@
(set_attr "length" "4")])
;; The definition of this insn does not really explain what it does,
;; but it should suffice
;; that anything generated as this insn will be recognized as one
;; and that it will not successfully combine with anything.
;; but it should suffice that anything generated as this insn will be
;; recognized as a movstrsi operation, and that it will not successfully
;; combine with anything.
(define_expand "movstrsi"
[(parallel [(set (match_operand:BLK 0 "" "")
(match_operand:BLK 1 "" ""))
(clobber (match_scratch:SI 7 ""))
(clobber (match_scratch:SI 8 ""))
(clobber (match_dup 4))
(clobber (match_dup 5))
(clobber (match_dup 6))
(clobber (match_dup 7))
(clobber (match_dup 8))
(use (match_operand:SI 2 "arith_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))])]
"!TARGET_64BIT"
"!TARGET_64BIT && optimize > 0"
"
{
int size, align;
@ -2990,7 +2990,7 @@
If the size is large in respect to the known alignment, then use
the library routines.
If the size is small in repsect to the known alignment, then open
If the size is small in respect to the known alignment, then open
code the copy (since that will lead to better scheduling).
Else use the block move pattern. */
@ -3003,8 +3003,7 @@
align = INTVAL (operands[3]);
align = align > 4 ? 4 : align;
/* If size/alignment > 8 (eg size is large in respect to alignment),
then use the library routines. */
/* If size/alignment is large, then use the library routines. */
if (size / align > 16)
FAIL;
@ -3022,28 +3021,470 @@
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
operands[6] = gen_reg_rtx (SImode);
operands[7] = XEXP (operands[0], 0);
operands[8] = XEXP (operands[1], 0);
operands[7] = gen_reg_rtx (SImode);
operands[8] = gen_reg_rtx (SImode);
}")
;; The operand constraints are written like this to support both compile-time
;; and run-time determined byte count. If the count is run-time determined,
;; the register with the byte count is clobbered by the copying code, and
;; therefore it is forced to operand 2. If the count is compile-time
;; determined, we need two scratch registers for the unrolled code.
(define_insn "movstrsi_internal"
;; and run-time determined byte counts. The expander and output_block_move
;; only support compile-time determined counts at this time.
;;
;; If the count is run-time determined, the register with the byte count
;; is clobbered by the copying code, and therefore it is forced to operand 2.
;;
;; We used to clobber operands 0 and 1. However, a change to regrename.c
;; broke this semantic for pseudo registers. We can't use match_scratch
;; as this requires two registers in the class R1_REGS when the MEMs for
;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are
;; forced to internally copy operands 0 and 1 to operands 7 and 8,
;; respectively. We then split or peephole optimize after reload.
(define_insn "movstrsi_prereload"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
(clobber (match_scratch:SI 7 "=0,0"))
(clobber (match_scratch:SI 8 "=1,1"))
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1
(clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
(clobber (match_operand:SI 7 "register_operand" "=&r,&r")) ;item tmp3
(clobber (match_operand:SI 8 "register_operand" "=&r,&r")) ;item tmp4
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
"!TARGET_64BIT"
"#"
[(set_attr "type" "multi,multi")])
(define_split
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
(mem:BLK (match_operand:SI 1 "register_operand" "")))
(clobber (match_operand:SI 2 "register_operand" ""))
(clobber (match_operand:SI 3 "register_operand" ""))
(clobber (match_operand:SI 6 "register_operand" ""))
(clobber (match_operand:SI 7 "register_operand" ""))
(clobber (match_operand:SI 8 "register_operand" ""))
(use (match_operand:SI 4 "arith_operand" ""))
(use (match_operand:SI 5 "const_int_operand" ""))])]
"!TARGET_64BIT && reload_completed && !flag_peephole2"
[(set (match_dup 7) (match_dup 0))
(set (match_dup 8) (match_dup 1))
(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
(clobber (match_dup 2))
(clobber (match_dup 3))
(clobber (match_dup 6))
(clobber (match_dup 7))
(clobber (match_dup 8))
(use (match_dup 4))
(use (match_dup 5))
(const_int 0)])]
"")
(define_peephole2
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
(mem:BLK (match_operand:SI 1 "register_operand" "")))
(clobber (match_operand:SI 2 "register_operand" ""))
(clobber (match_operand:SI 3 "register_operand" ""))
(clobber (match_operand:SI 6 "register_operand" ""))
(clobber (match_operand:SI 7 "register_operand" ""))
(clobber (match_operand:SI 8 "register_operand" ""))
(use (match_operand:SI 4 "arith_operand" ""))
(use (match_operand:SI 5 "const_int_operand" ""))])]
"!TARGET_64BIT"
[(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
(clobber (match_dup 2))
(clobber (match_dup 3))
(clobber (match_dup 6))
(clobber (match_dup 7))
(clobber (match_dup 8))
(use (match_dup 4))
(use (match_dup 5))
(const_int 0)])]
"
{
if (dead_or_set_p (curr_insn, operands[0]))
operands[7] = operands[0];
else
emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0]));
if (dead_or_set_p (curr_insn, operands[1]))
operands[8] = operands[1];
else
emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1]));
}")
(define_insn "movstrsi_postreload"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp1
(clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
(clobber (match_dup 0))
(clobber (match_dup 1))
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:SI 5 "const_int_operand" "n,n")) ;alignment
(const_int 0)]
"!TARGET_64BIT && reload_completed"
"* return output_block_move (operands, !which_alternative);"
[(set_attr "type" "multi,multi")])
(define_expand "movstrdi"
[(parallel [(set (match_operand:BLK 0 "" "")
(match_operand:BLK 1 "" ""))
(clobber (match_dup 4))
(clobber (match_dup 5))
(clobber (match_dup 6))
(clobber (match_dup 7))
(clobber (match_dup 8))
(use (match_operand:DI 2 "arith_operand" ""))
(use (match_operand:DI 3 "const_int_operand" ""))])]
"TARGET_64BIT && optimize > 0"
"
{
int size, align;
/* HP provides very fast block move library routine for the PA;
this routine includes:
4x4 byte at a time block moves,
1x4 byte at a time with alignment checked at runtime with
attempts to align the source and destination as needed
1x1 byte loop
With that in mind, here's the heuristics to try and guess when
the inlined block move will be better than the library block
move:
If the size isn't constant, then always use the library routines.
If the size is large in respect to the known alignment, then use
the library routines.
If the size is small in respect to the known alignment, then open
code the copy (since that will lead to better scheduling).
Else use the block move pattern. */
/* Undetermined size, use the library routine. */
if (GET_CODE (operands[2]) != CONST_INT)
FAIL;
size = INTVAL (operands[2]);
align = INTVAL (operands[3]);
align = align > 8 ? 8 : align;
/* If size/alignment is large, then use the library routines. */
if (size / align > 16)
FAIL;
/* This does happen, but not often enough to worry much about. */
if (size / align < MOVE_RATIO)
FAIL;
/* Fall through means we're going to use our block move pattern. */
operands[0]
= replace_equiv_address (operands[0],
copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
operands[1]
= replace_equiv_address (operands[1],
copy_to_mode_reg (DImode, XEXP (operands[1], 0)));
operands[4] = gen_reg_rtx (DImode);
operands[5] = gen_reg_rtx (DImode);
operands[6] = gen_reg_rtx (DImode);
operands[7] = gen_reg_rtx (DImode);
operands[8] = gen_reg_rtx (DImode);
}")
;; The operand constraints are written like this to support both compile-time
;; and run-time determined byte counts. The expander and output_block_move
;; only support compile-time determined counts at this time.
;;
;; If the count is run-time determined, the register with the byte count
;; is clobbered by the copying code, and therefore it is forced to operand 2.
;;
;; We used to clobber operands 0 and 1. However, a change to regrename.c
;; broke this semantic for pseudo registers. We can't use match_scratch
;; as this requires two registers in the class R1_REGS when the MEMs for
;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are
;; forced to internally copy operands 0 and 1 to operands 7 and 8,
;; respectively. We then split or peephole optimize after reload.
(define_insn "movstrdi_prereload"
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
(mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
(clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1
(clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2
(clobber (match_operand:DI 7 "register_operand" "=&r,&r")) ;item tmp3
(clobber (match_operand:DI 8 "register_operand" "=&r,&r")) ;item tmp4
(use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment
"TARGET_64BIT"
"#"
[(set_attr "type" "multi,multi")])
(define_split
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
(mem:BLK (match_operand:DI 1 "register_operand" "")))
(clobber (match_operand:DI 2 "register_operand" ""))
(clobber (match_operand:DI 3 "register_operand" ""))
(clobber (match_operand:DI 6 "register_operand" ""))
(clobber (match_operand:DI 7 "register_operand" ""))
(clobber (match_operand:DI 8 "register_operand" ""))
(use (match_operand:DI 4 "arith_operand" ""))
(use (match_operand:DI 5 "const_int_operand" ""))])]
"TARGET_64BIT && reload_completed && !flag_peephole2"
[(set (match_dup 7) (match_dup 0))
(set (match_dup 8) (match_dup 1))
(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
(clobber (match_dup 2))
(clobber (match_dup 3))
(clobber (match_dup 6))
(clobber (match_dup 7))
(clobber (match_dup 8))
(use (match_dup 4))
(use (match_dup 5))
(const_int 0)])]
"")
(define_peephole2
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
(mem:BLK (match_operand:DI 1 "register_operand" "")))
(clobber (match_operand:DI 2 "register_operand" ""))
(clobber (match_operand:DI 3 "register_operand" ""))
(clobber (match_operand:DI 6 "register_operand" ""))
(clobber (match_operand:DI 7 "register_operand" ""))
(clobber (match_operand:DI 8 "register_operand" ""))
(use (match_operand:DI 4 "arith_operand" ""))
(use (match_operand:DI 5 "const_int_operand" ""))])]
"TARGET_64BIT"
[(parallel [(set (mem:BLK (match_dup 7)) (mem:BLK (match_dup 8)))
(clobber (match_dup 2))
(clobber (match_dup 3))
(clobber (match_dup 6))
(clobber (match_dup 7))
(clobber (match_dup 8))
(use (match_dup 4))
(use (match_dup 5))
(const_int 0)])]
"
{
if (dead_or_set_p (curr_insn, operands[0]))
operands[7] = operands[0];
else
emit_insn (gen_rtx_SET (VOIDmode, operands[7], operands[0]));
if (dead_or_set_p (curr_insn, operands[1]))
operands[8] = operands[1];
else
emit_insn (gen_rtx_SET (VOIDmode, operands[8], operands[1]));
}")
(define_insn "movstrdi_postreload"
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
(mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
(clobber (match_operand:DI 2 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:DI 3 "register_operand" "=&r,&r")) ;item tmp1
(clobber (match_operand:DI 6 "register_operand" "=&r,&r")) ;item tmp2
(clobber (match_dup 0))
(clobber (match_dup 1))
(use (match_operand:DI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:DI 5 "const_int_operand" "n,n")) ;alignment
(const_int 0)]
"TARGET_64BIT && reload_completed"
"* return output_block_move (operands, !which_alternative);"
[(set_attr "type" "multi,multi")])
(define_expand "clrstrsi"
[(parallel [(set (match_operand:BLK 0 "" "")
(const_int 0))
(clobber (match_dup 3))
(clobber (match_dup 4))
(use (match_operand:SI 1 "arith_operand" ""))
(use (match_operand:SI 2 "const_int_operand" ""))])]
"!TARGET_64BIT && optimize > 0"
"
{
int size, align;
/* Undetermined size, use the library routine. */
if (GET_CODE (operands[1]) != CONST_INT)
FAIL;
size = INTVAL (operands[1]);
align = INTVAL (operands[2]);
align = align > 4 ? 4 : align;
/* If size/alignment is large, then use the library routines. */
if (size / align > 16)
FAIL;
/* This does happen, but not often enough to worry much about. */
if (size / align < MOVE_RATIO)
FAIL;
/* Fall through means we're going to use our block clear pattern. */
operands[0]
= replace_equiv_address (operands[0],
copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
operands[3] = gen_reg_rtx (SImode);
operands[4] = gen_reg_rtx (SImode);
}")
(define_insn "clrstrsi_prereload"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
(const_int 0))
(clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:SI 4 "register_operand" "=&r,&r")) ;tmp1
(use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count
(use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment
"!TARGET_64BIT"
"#"
[(set_attr "type" "multi,multi")])
(define_split
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
(const_int 0))
(clobber (match_operand:SI 1 "register_operand" ""))
(clobber (match_operand:SI 4 "register_operand" ""))
(use (match_operand:SI 2 "arith_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))])]
"!TARGET_64BIT && reload_completed && !flag_peephole2"
[(set (match_dup 4) (match_dup 0))
(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
(clobber (match_dup 1))
(clobber (match_dup 4))
(use (match_dup 2))
(use (match_dup 3))
(const_int 0)])]
"")
(define_peephole2
[(parallel [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
(const_int 0))
(clobber (match_operand:SI 1 "register_operand" ""))
(clobber (match_operand:SI 4 "register_operand" ""))
(use (match_operand:SI 2 "arith_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))])]
"!TARGET_64BIT"
[(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
(clobber (match_dup 1))
(clobber (match_dup 4))
(use (match_dup 2))
(use (match_dup 3))
(const_int 0)])]
"
{
if (dead_or_set_p (curr_insn, operands[0]))
operands[4] = operands[0];
else
emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0]));
}")
(define_insn "clrstrsi_postreload"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
(const_int 0))
(clobber (match_operand:SI 1 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_dup 0))
(use (match_operand:SI 2 "arith_operand" "J,1")) ;byte count
(use (match_operand:SI 3 "const_int_operand" "n,n")) ;alignment
(const_int 0)]
"!TARGET_64BIT && reload_completed"
"* return output_block_clear (operands, !which_alternative);"
[(set_attr "type" "multi,multi")])
(define_expand "clrstrdi"
[(parallel [(set (match_operand:BLK 0 "" "")
(const_int 0))
(clobber (match_dup 3))
(clobber (match_dup 4))
(use (match_operand:DI 1 "arith_operand" ""))
(use (match_operand:DI 2 "const_int_operand" ""))])]
"TARGET_64BIT && optimize > 0"
"
{
int size, align;
/* Undetermined size, use the library routine. */
if (GET_CODE (operands[1]) != CONST_INT)
FAIL;
size = INTVAL (operands[1]);
align = INTVAL (operands[2]);
align = align > 8 ? 8 : align;
/* If size/alignment is large, then use the library routines. */
if (size / align > 16)
FAIL;
/* This does happen, but not often enough to worry much about. */
if (size / align < MOVE_RATIO)
FAIL;
/* Fall through means we're going to use our block clear pattern. */
operands[0]
= replace_equiv_address (operands[0],
copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
operands[3] = gen_reg_rtx (DImode);
operands[4] = gen_reg_rtx (DImode);
}")
(define_insn "clrstrdi_prereload"
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
(const_int 0))
(clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:DI 4 "register_operand" "=&r,&r")) ;item tmp1
(use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count
(use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment
"TARGET_64BIT"
"#"
[(set_attr "type" "multi,multi")])
(define_split
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
(const_int 0))
(clobber (match_operand:DI 1 "register_operand" ""))
(clobber (match_operand:DI 4 "register_operand" ""))
(use (match_operand:DI 2 "arith_operand" ""))
(use (match_operand:DI 3 "const_int_operand" ""))])]
"TARGET_64BIT && reload_completed && !flag_peephole2"
[(set (match_dup 4) (match_dup 0))
(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
(clobber (match_dup 1))
(clobber (match_dup 4))
(use (match_dup 2))
(use (match_dup 3))
(const_int 0)])]
"")
(define_peephole2
[(parallel [(set (mem:BLK (match_operand:DI 0 "register_operand" ""))
(const_int 0))
(clobber (match_operand:DI 1 "register_operand" ""))
(clobber (match_operand:DI 4 "register_operand" ""))
(use (match_operand:DI 2 "arith_operand" ""))
(use (match_operand:DI 3 "const_int_operand" ""))])]
"TARGET_64BIT"
[(parallel [(set (mem:BLK (match_dup 4)) (const_int 0))
(clobber (match_dup 1))
(clobber (match_dup 4))
(use (match_dup 2))
(use (match_dup 3))
(const_int 0)])]
"
{
if (dead_or_set_p (curr_insn, operands[0]))
operands[4] = operands[0];
else
emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[0]));
}")
(define_insn "clrstrdi_postreload"
[(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
(const_int 0))
(clobber (match_operand:DI 1 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_dup 0))
(use (match_operand:DI 2 "arith_operand" "J,1")) ;byte count
(use (match_operand:DI 3 "const_int_operand" "n,n")) ;alignment
(const_int 0)]
"TARGET_64BIT && reload_completed"
"* return output_block_clear (operands, !which_alternative);"
[(set_attr "type" "multi,multi")])
;; Floating point move insns