i386.md (memstr): Do not use rep stosb for counts divisible by 4 when optimize_size.
* i386.md (memstr): Do not use rep stosb for counts divisible by 4 when optimize_size. (clrstrsi): Rewrite. (strsethi, strsetqi): New expanders. (strsethi_1, strsetqi_1, rep_stossi, rep_stosqi): New insn patterns. (cmpstrsi): Emit compare insn before cmpstrsi_1 (cmpstrsi_nz): use flags, set type to str, prefix_length to 1. (strlensi_1): Likewise. (cmpstrsi_1): Likewise; do not output compare. (strlen expander): Do not unroll when optimizing for size. (*subsi3_carry): Rename to subsi3_carry (addqi3_cc): New pattern. * i386.h (processor_costs): Add move_ratio field. (MOVE_RATIO): Use move_ratio field, set to 3 for OPTIMIZE_SIZE * i386.c (*_cost): Set move_ratio. (x86_unroll_strlen): Enable for Athlon, PPro and K6 too. (x86_expand_strlensi_1): Rewrite the main loop. From-SVN: r31488
This commit is contained in:
parent
b9f243c201
commit
e2e52e1be7
|
@ -1,3 +1,23 @@
|
|||
Tue Jan 18 16:19:55 MET 2000 Jan Hubicka <hubicka@freesoft.cz>
|
||||
|
||||
* i386.md (memstr): Do not use rep stosb for counts divisible by 4
|
||||
when optimize_size.
|
||||
(clrstrsi): Rewrite.
|
||||
(strsethi, strsetqi): New expanders.
|
||||
(strsethi_1, strsetqi_1, rep_stossi, rep_stosqi): New insn patterns.
|
||||
(cmpstrsi): Emit compare insn before cmpstrsi_1
|
||||
(cmpstrsi_nz): use flags, set type to str, prefix_length to 1.
|
||||
(strlensi_1): Likewise.
|
||||
(cmpstrsi_1): Likewise; do not output compare.
|
||||
(strlen expander): Do not unroll when optimizing for size.
|
||||
(*subsi3_carry): Rename to subsi3_carry
|
||||
(addqi3_cc): New pattern.
|
||||
* i386.h (processor_costs): Add move_ratio field.
|
||||
(MOVE_RATIO): Use move_ratio field, set to 3 for OPTIMIZE_SIZE
|
||||
* i386.c (*_cost): Set move_ratio.
|
||||
(x86_unroll_strlen): Enable for Athlon, PPro and K6 too.
|
||||
(x86_expand_strlensi_1): Rewrite the main loop.
|
||||
|
||||
2000-01-17 Richard Henderson <rth@cygnus.com>
|
||||
|
||||
* combine.c (combine_simplify_rtx): Give FLOAT_STORE_FLAG_VALUE a mode.
|
||||
|
|
|
@ -64,6 +64,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
|||
1, /* cost of multiply per each bit set */
|
||||
23, /* cost of a divide/mod */
|
||||
15, /* "large" insn */
|
||||
3, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
|
@ -84,6 +85,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
|||
1, /* cost of multiply per each bit set */
|
||||
40, /* cost of a divide/mod */
|
||||
15, /* "large" insn */
|
||||
3, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
|
@ -104,6 +106,7 @@ struct processor_costs pentium_cost = {
|
|||
0, /* cost of multiply per each bit set */
|
||||
25, /* cost of a divide/mod */
|
||||
8, /* "large" insn */
|
||||
6, /* MOVE_RATIO */
|
||||
6, /* cost for loading QImode using movzbl */
|
||||
{2, 4, 2}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
|
@ -124,6 +127,7 @@ struct processor_costs pentiumpro_cost = {
|
|||
0, /* cost of multiply per each bit set */
|
||||
17, /* cost of a divide/mod */
|
||||
8, /* "large" insn */
|
||||
6, /* MOVE_RATIO */
|
||||
2, /* cost for loading QImode using movzbl */
|
||||
{4, 4, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
|
@ -144,6 +148,7 @@ struct processor_costs k6_cost = {
|
|||
0, /* cost of multiply per each bit set */
|
||||
18, /* cost of a divide/mod */
|
||||
8, /* "large" insn */
|
||||
4, /* MOVE_RATIO */
|
||||
3, /* cost for loading QImode using movzbl */
|
||||
{4, 5, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
|
@ -164,6 +169,7 @@ struct processor_costs athlon_cost = {
|
|||
0, /* cost of multiply per each bit set */
|
||||
19, /* cost of a divide/mod */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{4, 5, 4}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
|
@ -191,7 +197,7 @@ const int x86_zero_extend_with_and = m_486 | m_PENT;
|
|||
const int x86_movx = m_ATHLON /* m_386 | m_PPRO | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
|
||||
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
|
||||
const int x86_use_any_reg = m_486;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON;
|
||||
|
@ -5149,10 +5155,9 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
|
|||
rtx align_3_label = NULL_RTX;
|
||||
rtx align_4_label = gen_label_rtx ();
|
||||
rtx end_0_label = gen_label_rtx ();
|
||||
rtx end_2_label = gen_label_rtx ();
|
||||
rtx end_3_label = gen_label_rtx ();
|
||||
rtx mem;
|
||||
rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
|
||||
rtx tmpreg = gen_reg_rtx (SImode);
|
||||
|
||||
align = 0;
|
||||
if (GET_CODE (align_rtx) == CONST_INT)
|
||||
|
@ -5269,48 +5274,69 @@ ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
|
|||
|
||||
mem = gen_rtx_MEM (SImode, out);
|
||||
emit_move_insn (scratch, mem);
|
||||
|
||||
/* Check first byte. */
|
||||
emit_insn (gen_cmpqi_0 (gen_lowpart (QImode, scratch), const0_rtx));
|
||||
tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
|
||||
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
||||
gen_rtx_LABEL_REF (VOIDmode, end_0_label),
|
||||
pc_rtx);
|
||||
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
|
||||
|
||||
/* Check second byte. */
|
||||
emit_insn (gen_cmpqi_ext_3 (scratch, const0_rtx));
|
||||
tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
|
||||
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
||||
gen_rtx_LABEL_REF (VOIDmode, end_3_label),
|
||||
pc_rtx);
|
||||
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
|
||||
|
||||
/* Check third byte. */
|
||||
emit_insn (gen_testsi_1 (scratch, GEN_INT (0x00ff0000)));
|
||||
tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
|
||||
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
||||
gen_rtx_LABEL_REF (VOIDmode, end_2_label),
|
||||
pc_rtx);
|
||||
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
|
||||
|
||||
/* Check fourth byte and increment address. */
|
||||
emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
|
||||
emit_insn (gen_testsi_1 (scratch, GEN_INT (0xff000000)));
|
||||
tmp = gen_rtx_NE (VOIDmode, flags, const0_rtx);
|
||||
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
||||
gen_rtx_LABEL_REF (VOIDmode, align_4_label),
|
||||
pc_rtx);
|
||||
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
|
||||
|
||||
/* Now generate fixups when the compare stops within a 4-byte word. */
|
||||
emit_insn (gen_subsi3 (out, out, GEN_INT (3)));
|
||||
|
||||
emit_label (end_2_label);
|
||||
emit_insn (gen_addsi3 (out, out, const1_rtx));
|
||||
/* This formula yields a nonzero result iff one of the bytes is zero.
|
||||
This saves three branches inside loop and many cycles. */
|
||||
|
||||
emit_label (end_3_label);
|
||||
emit_insn (gen_addsi3 (out, out, const1_rtx));
|
||||
emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
|
||||
emit_insn (gen_one_cmplsi2 (scratch, scratch));
|
||||
emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
|
||||
emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
|
||||
emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
|
||||
|
||||
if (TARGET_CMOVE)
|
||||
{
|
||||
rtx reg = gen_reg_rtx (SImode);
|
||||
emit_move_insn (reg, tmpreg);
|
||||
emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
|
||||
|
||||
/* If zero is not in the first two bytes, move two bytes forward. */
|
||||
emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
|
||||
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
|
||||
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
|
||||
gen_rtx_IF_THEN_ELSE (SImode, tmp,
|
||||
reg,
|
||||
tmpreg)));
|
||||
/* Emit lea manually to avoid clobbering of flags. */
|
||||
emit_insn (gen_rtx_SET (SImode, reg,
|
||||
gen_rtx_PLUS (SImode, out, GEN_INT (2))));
|
||||
|
||||
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
|
||||
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, out,
|
||||
gen_rtx_IF_THEN_ELSE (SImode, tmp,
|
||||
reg,
|
||||
out)));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx end_2_label = gen_label_rtx ();
|
||||
/* Is zero in the first two bytes? */
|
||||
|
||||
emit_insn (gen_testsi_1 (tmpreg, GEN_INT (0x8080)));
|
||||
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
|
||||
tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
|
||||
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
||||
gen_rtx_LABEL_REF (VOIDmode, end_2_label),
|
||||
pc_rtx);
|
||||
tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
|
||||
JUMP_LABEL (tmp) = end_2_label;
|
||||
|
||||
/* Not in the first two. Move two bytes forward. */
|
||||
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
|
||||
emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
|
||||
|
||||
emit_label (end_2_label);
|
||||
|
||||
}
|
||||
|
||||
/* Avoid branch in fixing the byte. */
|
||||
tmpreg = gen_lowpart (QImode, tmpreg);
|
||||
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
|
||||
emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
|
||||
|
||||
emit_label (end_0_label);
|
||||
}
|
||||
|
|
|
@ -62,6 +62,8 @@ struct processor_costs {
|
|||
int mult_bit; /* cost of multiply per each bit set */
|
||||
int divide; /* cost of a divide/mod */
|
||||
int large_insn; /* insns larger than this cost more */
|
||||
int move_ratio; /* The threshold of number of scalar memory-to-memory
|
||||
move insns. */
|
||||
int movzbl_load; /* cost of loading using movzbl */
|
||||
int int_load[3]; /* cost of loading integer registers
|
||||
in QImode, HImode and SImode relative
|
||||
|
@ -1709,13 +1711,9 @@ while (0)
|
|||
Increasing the value will always make code faster, but eventually
|
||||
incurs high cost in increased code size.
|
||||
|
||||
If you don't define this, a reasonable default is used.
|
||||
If you don't define this, a reasonable default is used. */
|
||||
|
||||
Make this large on i386, since the block move is very inefficient with small
|
||||
blocks, and the hard register needs of the block move require much reload
|
||||
work. */
|
||||
|
||||
#define MOVE_RATIO 5
|
||||
#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
|
||||
|
||||
/* Define if shifts truncate the shift count
|
||||
which implies one can omit a sign-extension or zero-extension
|
||||
|
|
|
@ -3235,6 +3235,15 @@
|
|||
"add{l}\\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "alu")])
|
||||
|
||||
(define_insn "addqi3_cc"
|
||||
[(set (reg:CC 17) (plus:CC (match_operand:QI 1 "nonimmediate_operand" "%0,0")
|
||||
(match_operand:QI 2 "general_operand" "ri,rm")))
|
||||
(set (match_operand:QI 0 "nonimmediate_operand" "=rm,r")
|
||||
(plus:QI (match_dup 1) (match_dup 2)))]
|
||||
"ix86_binary_operator_ok (PLUS, QImode, operands)"
|
||||
"add{b}\\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "alu")])
|
||||
|
||||
(define_insn "*addsi3_carry"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
|
||||
(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
|
||||
|
@ -3736,7 +3745,7 @@
|
|||
"sub{l}\\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "alu")])
|
||||
|
||||
(define_insn "*subsi3_carry"
|
||||
(define_insn "subsi3_carry"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
|
||||
(minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
|
||||
(plus:SI (match_operand:SI 2 "general_operand" "ri,rm")
|
||||
|
@ -7841,8 +7850,9 @@
|
|||
srcreg = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
|
||||
|
||||
emit_insn (gen_cld());
|
||||
/* When optimizing for size emit simple rep ; movsb instruction. */
|
||||
if (!optimize || optimize_size)
|
||||
/* When optimizing for size emit simple rep ; movsb instruction for
|
||||
counts not divisible by 4. */
|
||||
if ((!optimize || optimize_size) && (INTVAL (operands[2]) & 0x03))
|
||||
{
|
||||
countreg = copy_to_mode_reg (SImode, operands[2]);
|
||||
emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
|
||||
|
@ -7983,84 +7993,143 @@
|
|||
(set_attr "memory" "both")])
|
||||
|
||||
(define_expand "clrstrsi"
|
||||
[(set (reg:SI 19) (const_int 0))
|
||||
(set (match_dup 3) (const_int 0))
|
||||
(parallel [(set (match_operand:BLK 0 "memory_operand" "")
|
||||
(const_int 0))
|
||||
(use (match_operand:SI 1 "const_int_operand" ""))
|
||||
(use (match_operand:SI 2 "const_int_operand" ""))
|
||||
(use (match_dup 3))
|
||||
(use (reg:SI 19))
|
||||
(clobber (match_scratch:SI 4 ""))
|
||||
(clobber (match_dup 5))])]
|
||||
[(use (match_operand:BLK 0 "memory_operand" ""))
|
||||
(use (match_operand:SI 1 "const_int_operand" ""))
|
||||
(use (match_operand:SI 2 "const_int_operand" ""))]
|
||||
""
|
||||
"
|
||||
{
|
||||
rtx addr0;
|
||||
rtx destreg, zeroreg, countreg;
|
||||
|
||||
if (GET_CODE (operands[1]) != CONST_INT)
|
||||
FAIL;
|
||||
|
||||
addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
|
||||
destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
|
||||
|
||||
operands[3] = gen_reg_rtx (SImode);
|
||||
operands[5] = addr0;
|
||||
emit_insn (gen_cld());
|
||||
|
||||
operands[0] = gen_rtx_MEM (BLKmode, addr0);
|
||||
/* When optimizing for size emit simple rep ; movsb instruction for
|
||||
counts not divisible by 4. */
|
||||
if ((!optimize || optimize_size) && (INTVAL (operands[1]) & 0x03))
|
||||
{
|
||||
countreg = copy_to_mode_reg (SImode, operands[1]);
|
||||
zeroreg = copy_to_mode_reg (QImode, const0_rtx);
|
||||
emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
|
||||
destreg, countreg));
|
||||
}
|
||||
else
|
||||
{
|
||||
zeroreg = copy_to_mode_reg (SImode, const0_rtx);
|
||||
if (INTVAL (operands[1]) & ~0x03)
|
||||
{
|
||||
countreg = copy_to_mode_reg (SImode,
|
||||
GEN_INT ((INTVAL (operands[1]) >> 2)
|
||||
& 0x3fffffff));
|
||||
emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
|
||||
destreg, countreg));
|
||||
}
|
||||
if (INTVAL (operands[1]) & 0x02)
|
||||
emit_insn (gen_strsethi (destreg,
|
||||
gen_rtx_SUBREG (HImode, zeroreg, 0)));
|
||||
if (INTVAL (operands[1]) & 0x01)
|
||||
emit_insn (gen_strsetqi (destreg,
|
||||
gen_rtx_SUBREG (QImode, zeroreg, 0)));
|
||||
}
|
||||
DONE;
|
||||
}")
|
||||
|
||||
;; Most CPUs don't like single string operations
|
||||
;; Handle this case here to simplify previous expander.
|
||||
|
||||
(define_expand "strsethi"
|
||||
[(set (mem:HI (match_operand:SI 0 "register_operand" ""))
|
||||
(match_operand:HI 1 "register_operand" ""))
|
||||
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
|
||||
(clobber (reg:CC 17))])]
|
||||
""
|
||||
"
|
||||
{
|
||||
if (TARGET_SINGLE_STRINGOP || optimize_size)
|
||||
{
|
||||
emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1]));
|
||||
DONE;
|
||||
}
|
||||
}")
|
||||
|
||||
(define_expand "strsetqi"
|
||||
[(set (mem:QI (match_operand:SI 0 "register_operand" ""))
|
||||
(match_operand:QI 1 "register_operand" ""))
|
||||
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
|
||||
(clobber (reg:CC 17))])]
|
||||
""
|
||||
"
|
||||
{
|
||||
if (TARGET_SINGLE_STRINGOP || optimize_size)
|
||||
{
|
||||
emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1]));
|
||||
DONE;
|
||||
}
|
||||
}")
|
||||
|
||||
(define_insn "strsethi_1"
|
||||
[(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
|
||||
(match_operand:HI 2 "register_operand" "a"))
|
||||
(set (match_operand:SI 0 "register_operand" "=D")
|
||||
(plus:SI (match_dup 0)
|
||||
(const_int 2)))
|
||||
(use (reg:SI 19))]
|
||||
"TARGET_SINGLE_STRINGOP || optimize_size"
|
||||
"stosw"
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "memory" "store")
|
||||
(set_attr "length_prefix" "1")])
|
||||
|
||||
(define_insn "strsetqi_1"
|
||||
[(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
|
||||
(match_operand:QI 2 "register_operand" "a"))
|
||||
(set (match_operand:SI 0 "register_operand" "=D")
|
||||
(plus:SI (match_dup 0)
|
||||
(const_int 1)))
|
||||
(use (reg:SI 19))]
|
||||
"TARGET_SINGLE_STRINGOP || optimize_size"
|
||||
"stosb"
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "memory" "store")])
|
||||
|
||||
;; It might seem that operand 0 could use predicate register_operand.
|
||||
;; But strength reduction might offset the MEM expression. So we let
|
||||
;; reload put the address into %edi.
|
||||
|
||||
(define_insn "*clrstrsi_1"
|
||||
[(set (mem:BLK (match_operand:SI 0 "address_operand" "D"))
|
||||
(define_insn "rep_stossi"
|
||||
[(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
|
||||
(use (match_operand:SI 2 "register_operand" "a"))
|
||||
(use (match_operand:SI 4 "register_operand" "1"))
|
||||
(set (match_operand:SI 0 "register_operand" "=D")
|
||||
(plus:SI (match_operand:SI 3 "address_operand" "0")
|
||||
(ashift:SI (match_dup 3) (const_int 2))))
|
||||
(set (mem:BLK (match_dup 3))
|
||||
(const_int 0))
|
||||
(use (match_operand:SI 1 "const_int_operand" "n"))
|
||||
(use (match_operand:SI 2 "immediate_operand" "i"))
|
||||
(use (match_operand:SI 3 "register_operand" "a"))
|
||||
(use (reg:SI 19))
|
||||
(clobber (match_scratch:SI 4 "=&c"))
|
||||
(clobber (match_dup 0))]
|
||||
(use (reg:SI 19))]
|
||||
""
|
||||
"*
|
||||
{
|
||||
rtx xops[2];
|
||||
"rep\;stosl|rep stosd"
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "length_prefix" "1")
|
||||
(set_attr "memory" "store")])
|
||||
|
||||
if (GET_CODE (operands[1]) == CONST_INT)
|
||||
{
|
||||
unsigned int count = INTVAL (operands[1]) & 0xffffffff;
|
||||
if (count & ~0x03)
|
||||
{
|
||||
xops[0] = GEN_INT (count / 4);
|
||||
xops[1] = operands[4];
|
||||
|
||||
/* K6: stos takes 1 cycle, rep stos takes 8 + %ecx cycles.
|
||||
80386: 4/5+5n (+2 for set of ecx)
|
||||
80486: 5/7+5n (+1 for set of ecx)
|
||||
*/
|
||||
if (count / 4 < ((int) ix86_cpu < (int)PROCESSOR_PENTIUM ? 4 : 6))
|
||||
{
|
||||
do
|
||||
output_asm_insn (\"{stosl|stosd}\", xops);
|
||||
while ((count -= 4) > 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_asm_insn (\"mov{l}\\t{%0, %1|%1, %0}\", xops);
|
||||
output_asm_insn (\"{rep\;stosl|rep stosd}\", xops);
|
||||
}
|
||||
}
|
||||
if (INTVAL (operands[1]) & 0x02)
|
||||
output_asm_insn (\"stosw\", operands);
|
||||
if (INTVAL (operands[1]) & 0x01)
|
||||
output_asm_insn (\"stosb\", operands);
|
||||
}
|
||||
else
|
||||
abort ();
|
||||
RET;
|
||||
}"
|
||||
[(set_attr "type" "multi")])
|
||||
(define_insn "rep_stosqi"
|
||||
[(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
|
||||
(use (match_operand:QI 2 "register_operand" "a"))
|
||||
(use (match_operand:SI 4 "register_operand" "1"))
|
||||
(set (match_operand:SI 0 "register_operand" "=D")
|
||||
(plus:SI (match_operand:SI 3 "address_operand" "0") (match_dup 3)))
|
||||
(set (mem:BLK (match_dup 3))
|
||||
(const_int 0))
|
||||
(use (reg:SI 19))]
|
||||
""
|
||||
"rep\;stosb|rep stosb"
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "length_prefix" "1")
|
||||
(set_attr "memory" "store")])
|
||||
|
||||
(define_expand "cmpstrsi"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
|
@ -8099,7 +8168,10 @@
|
|||
emit_insn (gen_cmpstrsi_nz_1 (addr1, addr2, countreg, align));
|
||||
}
|
||||
else
|
||||
emit_insn (gen_cmpstrsi_1 (addr1, addr2, countreg, align));
|
||||
{
|
||||
emit_insn (gen_cmpsi_1 (countreg, countreg));
|
||||
emit_insn (gen_cmpstrsi_1 (addr1, addr2, countreg, align));
|
||||
}
|
||||
|
||||
outlow = gen_lowpart (QImode, out);
|
||||
emit_insn (gen_cmpintqi (outlow));
|
||||
|
@ -8145,8 +8217,8 @@
|
|||
(clobber (match_dup 2))]
|
||||
""
|
||||
"repz{\;| }cmpsb"
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "length" "3")])
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "length_prefix" "1")])
|
||||
|
||||
;; The same, but the count is not known to not be zero.
|
||||
|
||||
|
@ -8158,15 +8230,15 @@
|
|||
(mem:BLK (match_operand:SI 1 "address_operand" "D")))
|
||||
(const_int 0)))
|
||||
(use (match_operand:SI 3 "immediate_operand" "i"))
|
||||
(use (reg:CC 17))
|
||||
(use (reg:SI 19))
|
||||
(clobber (match_dup 0))
|
||||
(clobber (match_dup 1))
|
||||
(clobber (match_dup 2))]
|
||||
""
|
||||
;; The initial compare sets the zero flag.
|
||||
"cmp{l}\\t%2, %2\;repz{\;| }cmpsb"
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "length" "5")])
|
||||
"repz{\;| }cmpsb"
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "length_prefix" "1")])
|
||||
|
||||
(define_expand "strlensi"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
|
@ -8184,7 +8256,8 @@
|
|||
align = operands[3];
|
||||
scratch1 = gen_reg_rtx (SImode);
|
||||
|
||||
if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1)
|
||||
if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
|
||||
&& !optimize_size)
|
||||
{
|
||||
/* Well it seems that some optimizer does not combine a call like
|
||||
foo(strlen(bar), strlen(bar));
|
||||
|
@ -8236,8 +8309,8 @@
|
|||
(clobber (reg:CC 17))]
|
||||
""
|
||||
"repnz{\;| }scasb"
|
||||
[(set_attr "type" "multi")
|
||||
(set_attr "length" "3")])
|
||||
[(set_attr "type" "str")
|
||||
(set_attr "length_prefix" "1")])
|
||||
|
||||
;; Conditional move instructions.
|
||||
|
||||
|
|
Loading…
Reference in New Issue