i386.c (x86_double_with_add): Turn off for Pentium and PPro.
* i386.c (x86_double_with_add): Turn off for Pentium and PPro. (small_shift_operand, output_ashlsi3): New functions. * i386.h (small_shift_operand, output_ashlsi3): Declare. * i386.md (ashlsi3): Simplify ahlsi3 patterns. Remove splitters that are no longer needed. From-SVN: r26189
This commit is contained in:
parent
b5d9b9ab00
commit
0a726ef13b
@ -1,12 +1,16 @@
|
|||||||
|
Mon Apr 5 04:47:14 1999 Jeffrey A Law (law@cygnus.com)
|
||||||
|
|
||||||
|
* i386.c (x86_double_with_add): Turn off for Pentium and PPro.
|
||||||
|
(small_shift_operand, output_ashlsi3): New functions.
|
||||||
|
* i386.h (small_shift_operand, output_ashlsi3): Declare.
|
||||||
|
* i386.md (ashlsi3): Simplify ahlsi3 patterns. Remove splitters
|
||||||
|
that are no longer needed.
|
||||||
|
|
||||||
Sun Apr 4 04:05:04 1999 Jeffrey A Law (law@cygnus.com)
|
Sun Apr 4 04:05:04 1999 Jeffrey A Law (law@cygnus.com)
|
||||||
|
|
||||||
* stmt.c (expand_loop_end): When copying the loop exit test,
|
* stmt.c (expand_loop_end): When copying the loop exit test,
|
||||||
do not walk into a nested loop.
|
do not walk into a nested loop.
|
||||||
|
|
||||||
Sun Apr 4 01:15:04 PST 1999 Jeff Law (law@cygnus.com)
|
|
||||||
|
|
||||||
* version.c: Bump for snapshot.
|
|
||||||
|
|
||||||
Sun Apr 4 00:14:54 1999 Jeffrey A Law (law@cygnus.com)
|
Sun Apr 4 00:14:54 1999 Jeffrey A Law (law@cygnus.com)
|
||||||
|
|
||||||
* fixinc/hackshell.tpl: Skip links to directories, to avoid
|
* fixinc/hackshell.tpl: Skip links to directories, to avoid
|
||||||
|
@ -123,7 +123,7 @@ const int x86_use_leave = m_386 | m_K6;
|
|||||||
const int x86_push_memory = m_386 | m_K6;
|
const int x86_push_memory = m_386 | m_K6;
|
||||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||||
const int x86_movx = m_386 | m_PPRO | m_K6;
|
const int x86_movx = m_386 | m_PPRO | m_K6;
|
||||||
const int x86_double_with_add = ~m_386;
|
const int x86_double_with_add = ~(m_386 | m_PENT | m_PPRO);
|
||||||
const int x86_use_bit_test = m_386;
|
const int x86_use_bit_test = m_386;
|
||||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO;
|
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO;
|
||||||
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
|
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
|
||||||
@ -1687,6 +1687,17 @@ symbolic_operand (op, mode)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return nonzero if OP is a constant shift count small enough to
|
||||||
|
encode into an lea instruction. */
|
||||||
|
|
||||||
|
int
|
||||||
|
small_shift_operand (op, mode)
|
||||||
|
rtx op;
|
||||||
|
enum machine_mode mode ATTRIBUTE_UNUSED;
|
||||||
|
{
|
||||||
|
return (GET_CODE (op) == CONST_INT && INTVAL (op) > 0 && INTVAL (op) < 4);
|
||||||
|
}
|
||||||
|
|
||||||
/* Test for a valid operand for a call instruction.
|
/* Test for a valid operand for a call instruction.
|
||||||
Don't allow the arg pointer register or virtual regs
|
Don't allow the arg pointer register or virtual regs
|
||||||
since they may change into reg + const, which the patterns
|
since they may change into reg + const, which the patterns
|
||||||
@ -5595,3 +5606,89 @@ x86_adjust_cost (insn, link, dep_insn, cost)
|
|||||||
|
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Output assembly code for a left shift.
|
||||||
|
|
||||||
|
Always use "sal" when shifting a memory operand or for a non constant
|
||||||
|
shift count.
|
||||||
|
|
||||||
|
When optimizing for size, we know that src == dest, and we should always
|
||||||
|
use "sal". If src != dest, then copy src to dest and use "sal".
|
||||||
|
|
||||||
|
Pentium and PPro (speed):
|
||||||
|
|
||||||
|
When src == dest, use "add" for a shift counts of one, else use
|
||||||
|
"sal". If we modeled Pentium AGI stalls and U/V pipelining better we
|
||||||
|
would want to generate lea for some shifts on the Pentium.
|
||||||
|
|
||||||
|
When src != dest, use "lea" for small shift counts. Otherwise,
|
||||||
|
copy src to dest and use the normal shifting code. Exception for
|
||||||
|
TARGET_DOUBLE_WITH_ADD. */
|
||||||
|
|
||||||
|
char *
|
||||||
|
output_ashlsi3 (operands)
|
||||||
|
rtx *operands;
|
||||||
|
{
|
||||||
|
/* Handle case where srcreg != dstreg. */
|
||||||
|
if (REG_P (operands[0]) && REGNO (operands[0]) != REGNO (operands[1]))
|
||||||
|
{
|
||||||
|
if (TARGET_DOUBLE_WITH_ADD && INTVAL (operands[2]) == 1)
|
||||||
|
{
|
||||||
|
output_asm_insn (AS2 (mov%L0,%1,%0), operands);
|
||||||
|
return AS2 (add%L0,%1,%0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CC_STATUS_INIT;
|
||||||
|
|
||||||
|
/* This should be extremely rare (impossible?). We can not encode a
|
||||||
|
shift of the stack pointer using an lea instruction. So copy the
|
||||||
|
stack pointer into the destination register and use an lea. */
|
||||||
|
if (operands[1] == stack_pointer_rtx)
|
||||||
|
{
|
||||||
|
output_asm_insn (AS2 (mov%L0,%1,%0), operands);
|
||||||
|
operands[1] = operands[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For shifts up to and including 3 bits, use lea. */
|
||||||
|
operands[1] = gen_rtx_MULT (SImode, operands[1],
|
||||||
|
GEN_INT (1 << INTVAL (operands[2])));
|
||||||
|
return AS2 (lea%L0,%a1,%0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Source and destination match. */
|
||||||
|
|
||||||
|
/* Handle variable shift. */
|
||||||
|
if (REG_P (operands[2]))
|
||||||
|
return AS2 (sal%L0,%b2,%0);
|
||||||
|
|
||||||
|
/* Always perform shift by 1 using an add instruction. */
|
||||||
|
if (REG_P (operands[0]) && operands[2] == const1_rtx)
|
||||||
|
return AS2 (add%L0,%0,%0);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* ??? Currently disabled. reg-stack currently stomps on the mode of
|
||||||
|
each insn. Thus, we can not easily detect when we should use lea to
|
||||||
|
improve issue characteristics. Until reg-stack is fixed, fall back to
|
||||||
|
sal instruction for Pentiums to avoid AGI stall. */
|
||||||
|
/* Shift reg by 2 or 3 use an lea instruction for Pentium if this is
|
||||||
|
insn is expected to issue into the V pipe (the insn's mode will be
|
||||||
|
TImode for a U pipe, and !TImode for a V pipe instruction). */
|
||||||
|
if (! optimize_size
|
||||||
|
&& REG_P (operands[0])
|
||||||
|
&& GET_CODE (operands[2]) == CONST_INT
|
||||||
|
&& INTVAL (operands[2]) <= 3
|
||||||
|
&& (int)ix86_cpu == (int)PROCESSOR_PENTIUM
|
||||||
|
&& GET_MODE (insn) != TImode)
|
||||||
|
{
|
||||||
|
CC_STATUS_INIT;
|
||||||
|
operands[1] = gen_rtx_MULT (SImode, operands[1],
|
||||||
|
GEN_INT (1 << INTVAL (operands[2])));
|
||||||
|
return AS2 (lea%L0,%a1,%0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Otherwise use a shift instruction. */
|
||||||
|
return AS2 (sal%L0,%2,%0);
|
||||||
|
}
|
||||||
|
@ -2765,6 +2765,8 @@ extern int reg_mentioned_in_mem ();
|
|||||||
extern char *output_int_conditional_move ();
|
extern char *output_int_conditional_move ();
|
||||||
extern char *output_fp_conditional_move ();
|
extern char *output_fp_conditional_move ();
|
||||||
extern int ix86_can_use_return_insn_p ();
|
extern int ix86_can_use_return_insn_p ();
|
||||||
|
extern int small_shift_operand ();
|
||||||
|
extern char *output_ashlsi3 ();
|
||||||
|
|
||||||
#ifdef NOTYET
|
#ifdef NOTYET
|
||||||
extern struct rtx_def *copy_all_rtx ();
|
extern struct rtx_def *copy_all_rtx ();
|
||||||
|
@ -4753,205 +4753,27 @@ byte_xor_operation:
|
|||||||
""
|
""
|
||||||
"")
|
"")
|
||||||
|
|
||||||
;; Optimizing for code size:
|
;; Pattern for shifts which can be encoded into an lea instruction.
|
||||||
;; For regsiter destinations:
|
;; This is kept as a separate pattern so that regmove can optimize cases
|
||||||
;; add == 2 bytes, move == 2 bytes, shift == 3 bytes, lea == 7 bytes
|
;; where we know the source and destination must match.
|
||||||
;;
|
;;
|
||||||
;; lea loses when optimizing for size
|
;; Do not expose this pattern when optimizing for size since we never want
|
||||||
;;
|
;; to use lea when optimizing for size since mov+sal is smaller than lea.
|
||||||
;; Do the math. If the count is 1, using add, else using sal will
|
(define_insn ""
|
||||||
;; produce the smallest possible code, even when the source and
|
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,r")
|
||||||
;; dest do not match. For a memory destination, sal is the only
|
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r")
|
||||||
;; choice.
|
(match_operand:SI 2 "small_shift_operand" "M,M")))]
|
||||||
;;
|
"! optimize_size"
|
||||||
;; Do not try to handle case where src and dest do not match. Let regmove
|
"* return output_ashlsi3 (operands);")
|
||||||
;; and reload handle them. A mov followed by this insn will generate the
|
|
||||||
;; desired size optimized results.
|
;; Generic left shift pattern to catch all cases not handled by the
|
||||||
|
;; shift pattern above.
|
||||||
(define_insn ""
|
(define_insn ""
|
||||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
|
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
|
||||||
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
|
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
|
||||||
(match_operand:SI 2 "nonmemory_operand" "cI")))]
|
(match_operand:SI 2 "nonmemory_operand" "cI")))]
|
||||||
"optimize_size"
|
""
|
||||||
"*
|
"* return output_ashlsi3 (operands);")
|
||||||
{
|
|
||||||
if (REG_P (operands[0]) && operands[2] == const1_rtx)
|
|
||||||
return AS2 (add%L0,%0,%0);
|
|
||||||
|
|
||||||
if (REG_P (operands[2]))
|
|
||||||
return AS2 (sal%L0,%b2,%0);
|
|
||||||
return AS2 (sal%L0,%2,%0);
|
|
||||||
}")
|
|
||||||
|
|
||||||
;; For Pentium/Pentium MMX:
|
|
||||||
;;
|
|
||||||
;; We want to optimize for pairability, but avoid generating AGI stalls.
|
|
||||||
;;
|
|
||||||
;; If this insn is expected to issue in the U pipe, then prefer sal,
|
|
||||||
;; else prefer lea for small shifts when srcreg == dstreg.
|
|
||||||
;;
|
|
||||||
;; For PPro/PII
|
|
||||||
;;
|
|
||||||
;; There's more than one approach to optimizing for this family; it is
|
|
||||||
;; unclear which approach is best. For now, we will try to minimize
|
|
||||||
;; uops. Note that sal and lea have the same characteristics, so we
|
|
||||||
;; prefer sal as it takes less space.
|
|
||||||
;;
|
|
||||||
;; We can actually share code for these two cases since the basic techniques
|
|
||||||
;; for generating good code on these chips is the same, even if the final
|
|
||||||
;; code sequences are different.
|
|
||||||
;;
|
|
||||||
;; I do not know what is most appropriate for the AMD or Cyrix chips.
|
|
||||||
;;
|
|
||||||
;; srcreg == dstreg, constant shift count:
|
|
||||||
;;
|
|
||||||
;; For a shift count of one, use "add".
|
|
||||||
;; For a shift count of two or three, use "sal"/"lea" for Pentium and
|
|
||||||
;; Pentium MMX depending on which pipe the insn will execute.
|
|
||||||
;; All others use "sar".
|
|
||||||
;;
|
|
||||||
;; srcreg != dstreg, constant shift count:
|
|
||||||
;;
|
|
||||||
;; For shift counts of one to three, use "lea".
|
|
||||||
;; All others use "lea" for the first shift into the destination reg,
|
|
||||||
;; then fall back on the srcreg == dstreg for the residual shifts.
|
|
||||||
;;
|
|
||||||
;; memory destinations or nonconstant shift count:
|
|
||||||
;;
|
|
||||||
;; Use "sal".
|
|
||||||
;;
|
|
||||||
(define_insn ""
|
|
||||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
|
|
||||||
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r")
|
|
||||||
(match_operand:SI 2 "nonmemory_operand" "cI,I")))]
|
|
||||||
"! optimize_size
|
|
||||||
&& ((int)ix86_cpu == (int)PROCESSOR_PENTIUM
|
|
||||||
|| (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)"
|
|
||||||
"*
|
|
||||||
{
|
|
||||||
/* This should be extremely rare (impossible?). We can not encode a shift
|
|
||||||
of the stack pointer using an lea instruction. So copy the stack pointer
|
|
||||||
into the destination register and fall into the srcreg == dstreg shifting
|
|
||||||
support. */
|
|
||||||
if (operands[1] == stack_pointer_rtx)
|
|
||||||
{
|
|
||||||
output_asm_insn (AS2 (mov%L0,%1,%0), operands);
|
|
||||||
operands[1] = operands[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle case where srcreg != dstreg. */
|
|
||||||
if (REG_P (operands[0]) && REGNO (operands[0]) != REGNO (operands[1]))
|
|
||||||
{
|
|
||||||
/* For counts > 3, it is easiest to split into component insns. */
|
|
||||||
if (INTVAL (operands[2]) > 3)
|
|
||||||
return \"#\";
|
|
||||||
|
|
||||||
/* For shifts up to and including 3 bits, use lea. */
|
|
||||||
operands[1] = gen_rtx_MULT (SImode, operands[1],
|
|
||||||
GEN_INT (1 << INTVAL (operands[2])));
|
|
||||||
return AS2 (lea%L0,%a1,%0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Source and destination match. */
|
|
||||||
|
|
||||||
/* Handle variable shift. */
|
|
||||||
if (REG_P (operands[2]))
|
|
||||||
return AS2 (sal%L0,%b2,%0);
|
|
||||||
|
|
||||||
/* Always perform shift by 1 using an add instruction. */
|
|
||||||
if (REG_P (operands[0]) && operands[2] == const1_rtx)
|
|
||||||
return AS2 (add%L0,%0,%0);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* ??? Currently disabled. reg-stack currently stomps on the mode of
|
|
||||||
each insn. Thus, we can not easily detect when we should use lea to
|
|
||||||
improve issue characteristics. Until reg-stack is fixed, fall back to
|
|
||||||
sal instruction for Pentiums to avoid AGI stall. */
|
|
||||||
/* Shift reg by 2 or 3 use an lea instruction for Pentium if this is
|
|
||||||
insn is expected to issue into the V pipe (the insn's mode will be
|
|
||||||
TImode for a U pipe, and !TImode for a V pipe instruction). */
|
|
||||||
if (REG_P (operands[0])
|
|
||||||
&& GET_CODE (operands[2]) == CONST_INT
|
|
||||||
&& INTVAL (operands[2]) <= 3
|
|
||||||
&& (int)ix86_cpu == (int)PROCESSOR_PENTIUM
|
|
||||||
&& GET_MODE (insn) != TImode)
|
|
||||||
{
|
|
||||||
operands[1] = gen_rtx_MULT (SImode, operands[1],
|
|
||||||
GEN_INT (1 << INTVAL (operands[2])));
|
|
||||||
return AS2 (lea%L0,%a1,%0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Otherwise use a shift instruction. */
|
|
||||||
return AS2 (sal%L0,%2,%0);
|
|
||||||
}")
|
|
||||||
|
|
||||||
;; Pentium/PPro/PII Splitter used when srcreg != destreg and shift
|
|
||||||
;; count is > 3. In each case we use lea to perform the first three
|
|
||||||
;; shifts into the destination register, then we fall back to the
|
|
||||||
;; normal shifting code for the residual shifts.
|
|
||||||
(define_split
|
|
||||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
|
||||||
(ashift:SI (match_operand:SI 1 "register_operand" "r")
|
|
||||||
(match_operand:SI 2 "immediate_operand" "I")))]
|
|
||||||
"reload_completed
|
|
||||||
&& ! optimize_size
|
|
||||||
&& ((int)ix86_cpu == (int)PROCESSOR_PENTIUM
|
|
||||||
|| (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)
|
|
||||||
&& GET_CODE (operands[2]) == CONST_INT
|
|
||||||
&& INTVAL (operands[2]) > 3
|
|
||||||
&& true_regnum (operands[0]) != true_regnum (operands[1])"
|
|
||||||
[(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
|
|
||||||
(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))]
|
|
||||||
"
|
|
||||||
{
|
|
||||||
operands[3] = GEN_INT (INTVAL (operands[2]) - 3);
|
|
||||||
operands[2] = GEN_INT (3);
|
|
||||||
}")
|
|
||||||
|
|
||||||
|
|
||||||
;; On i386 and i486, "addl reg,reg" is faster than "sall $1,reg"
|
|
||||||
;; On i486, movl/sall appears slightly faster than leal, but the leal
|
|
||||||
;; is smaller - use leal for now unless the shift count is 1.
|
|
||||||
;;
|
|
||||||
(define_insn ""
|
|
||||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
|
|
||||||
(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r")
|
|
||||||
(match_operand:SI 2 "nonmemory_operand" "cI,M")))]
|
|
||||||
"! optimize_size
|
|
||||||
&& ! ((int)ix86_cpu == (int)PROCESSOR_PENTIUM
|
|
||||||
|| (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)"
|
|
||||||
"*
|
|
||||||
{
|
|
||||||
if (REG_P (operands[0]) && REGNO (operands[0]) != REGNO (operands[1]))
|
|
||||||
{
|
|
||||||
if (TARGET_DOUBLE_WITH_ADD && INTVAL (operands[2]) == 1)
|
|
||||||
{
|
|
||||||
output_asm_insn (AS2 (mov%L0,%1,%0), operands);
|
|
||||||
return AS2 (add%L0,%1,%0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CC_STATUS_INIT;
|
|
||||||
|
|
||||||
if (operands[1] == stack_pointer_rtx)
|
|
||||||
{
|
|
||||||
output_asm_insn (AS2 (mov%L0,%1,%0), operands);
|
|
||||||
operands[1] = operands[0];
|
|
||||||
}
|
|
||||||
operands[1] = gen_rtx_MULT (SImode, operands[1],
|
|
||||||
GEN_INT (1 << INTVAL (operands[2])));
|
|
||||||
return AS2 (lea%L0,%a1,%0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (REG_P (operands[2]))
|
|
||||||
return AS2 (sal%L0,%b2,%0);
|
|
||||||
|
|
||||||
if (REG_P (operands[0]) && operands[2] == const1_rtx)
|
|
||||||
return AS2 (add%L0,%0,%0);
|
|
||||||
|
|
||||||
return AS2 (sal%L0,%2,%0);
|
|
||||||
}")
|
|
||||||
|
|
||||||
(define_insn "ashlhi3"
|
(define_insn "ashlhi3"
|
||||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
|
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
|
||||||
|
Loading…
Reference in New Issue
Block a user