re PR target/17990 (sse used for negate without -mfpmath=sse)
PR target/17990 * config/i386/i386.c (x86_use_bt): New. (ix86_expand_unary_operator): Use MEM_P. (ix86_expand_fp_absneg_operator): New. * config/i386/i386.h (x86_use_bt): Declare. (TARGET_USE_BT): New. * config/i386/i386-protos.h: Update. * config/i386/i386.md (negsf2): Use ix86_expand_fp_absneg_operator. (negdf2, negxf2, abssf2, absdf2, absxf2): Likewise. (negsf2_memory, negsf2_ifs, negsf2_if, negdf2_memory, negdf2_ifs, negdf2_ifs_rex64, negdf2_if, negdf2_if_rex64, negxf2_if, abssf2_memory, abssf2_ifs, abssf2_if, absdf2_memory, absdf2_ifs, absdf2_ifs_rex64, absdf2_if, absxf2_if): Remove. (absnegsf2_mixed, absnegsf2_sse, absnegsf2_i387, absnegdf2_mixed, absnegdf2_sse, absnegdf2_i387, absnegxf2_i387): New. Merge all neg and abs splitters. Handle DFmode in general regs in 64-bit mode. (negextendsfdf2, absextendsfdf2): Disable for non-mixed sse math. (btsq, btrq, btcq): New. Add peepholes as well. (movv4sf_internal splitter): Postpone til after reload. (movv2di_internal splitter): Likewise. * config/i386/predicates.md (const_0_to_63_operand): New. (absneg_operator): New. From-SVN: r92165
This commit is contained in:
parent
6af713e46c
commit
7cacf53e50
|
@ -1,3 +1,28 @@
|
|||
2004-12-14 Richard Henderson <rth@redhat.com>
|
||||
|
||||
PR target/17990
|
||||
* config/i386/i386.c (x86_use_bt): New.
|
||||
(ix86_expand_unary_operator): Use MEM_P.
|
||||
(ix86_expand_fp_absneg_operator): New.
|
||||
* config/i386/i386.h (x86_use_bt): Declare.
|
||||
(TARGET_USE_BT): New.
|
||||
* config/i386/i386-protos.h: Update.
|
||||
* config/i386/i386.md (negsf2): Use ix86_expand_fp_absneg_operator.
|
||||
(negdf2, negxf2, abssf2, absdf2, absxf2): Likewise.
|
||||
(negsf2_memory, negsf2_ifs, negsf2_if, negdf2_memory, negdf2_ifs,
|
||||
negdf2_ifs_rex64, negdf2_if, negdf2_if_rex64, negxf2_if,
|
||||
abssf2_memory, abssf2_ifs, abssf2_if, absdf2_memory, absdf2_ifs,
|
||||
absdf2_ifs_rex64, absdf2_if, absxf2_if): Remove.
|
||||
(absnegsf2_mixed, absnegsf2_sse, absnegsf2_i387, absnegdf2_mixed,
|
||||
absnegdf2_sse, absnegdf2_i387, absnegxf2_i387): New. Merge all
|
||||
neg and abs splitters. Handle DFmode in general regs in 64-bit mode.
|
||||
(negextendsfdf2, absextendsfdf2): Disable for non-mixed sse math.
|
||||
(btsq, btrq, btcq): New. Add peepholes as well.
|
||||
(movv4sf_internal splitter): Postpone til after reload.
|
||||
(movv2di_internal splitter): Likewise.
|
||||
* config/i386/predicates.md (const_0_to_63_operand): New.
|
||||
(absneg_operator): New.
|
||||
|
||||
2004-12-14 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
* pa.h (REGNO_OK_FOR_INDEX_P, REGNO_OK_FOR_BASE_P, REGNO_OK_FOR_FP_P):
|
||||
|
|
|
@ -130,6 +130,8 @@ extern void ix86_expand_binary_operator (enum rtx_code,
|
|||
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
|
||||
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
|
||||
rtx[]);
|
||||
extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
|
||||
rtx[]);
|
||||
extern int ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
|
||||
extern int ix86_match_ccmode (rtx, enum machine_mode);
|
||||
extern rtx ix86_expand_compare (enum rtx_code, rtx *, rtx *);
|
||||
|
|
|
@ -579,6 +579,7 @@ const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPR
|
|||
the 16 byte window. */
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
|
||||
const int x86_use_bt = m_ATHLON_K8;
|
||||
|
||||
/* In case the average insn count for single function invocation is
|
||||
lower than this constant, emit fast (but longer) prologue and
|
||||
|
@ -7636,7 +7637,7 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
|
|||
/* If the destination is memory, and we do not have matching source
|
||||
operands, do things in registers. */
|
||||
matching_memory = 0;
|
||||
if (GET_CODE (dst) == MEM)
|
||||
if (MEM_P (dst))
|
||||
{
|
||||
if (rtx_equal_p (dst, src))
|
||||
matching_memory = 1;
|
||||
|
@ -7645,10 +7646,10 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
|
|||
}
|
||||
|
||||
/* When source operand is memory, destination must match. */
|
||||
if (!matching_memory && GET_CODE (src) == MEM)
|
||||
if (MEM_P (src) && !matching_memory)
|
||||
src = force_reg (mode, src);
|
||||
|
||||
/* If optimizing, copy to regs to improve CSE */
|
||||
/* If optimizing, copy to regs to improve CSE. */
|
||||
if (optimize && ! no_new_pseudos)
|
||||
{
|
||||
if (GET_CODE (dst) == MEM)
|
||||
|
@ -7695,6 +7696,91 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/* Generate code for floating point ABS or NEG. */
|
||||
|
||||
void
|
||||
ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
|
||||
rtx operands[])
|
||||
{
|
||||
rtx mask, set, use, clob, dst, src;
|
||||
bool matching_memory;
|
||||
bool use_sse = false;
|
||||
|
||||
if (TARGET_SSE_MATH)
|
||||
{
|
||||
if (mode == SFmode)
|
||||
use_sse = true;
|
||||
else if (mode == DFmode && TARGET_SSE2)
|
||||
use_sse = true;
|
||||
}
|
||||
|
||||
/* NEG and ABS performed with SSE use bitwise mask operations.
|
||||
Create the appropriate mask now. */
|
||||
if (use_sse)
|
||||
{
|
||||
HOST_WIDE_INT hi, lo;
|
||||
int shift = 63;
|
||||
|
||||
/* Find the sign bit, sign extended to 2*HWI. */
|
||||
if (mode == SFmode)
|
||||
lo = 0x80000000, hi = lo < 0;
|
||||
else if (HOST_BITS_PER_WIDE_INT >= 64)
|
||||
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
|
||||
else
|
||||
lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
|
||||
|
||||
/* If we're looking for the absolute value, then we want
|
||||
the compliment. */
|
||||
if (code == ABS)
|
||||
lo = ~lo, hi = ~hi;
|
||||
|
||||
/* Force this value into the low part of a fp vector constant. */
|
||||
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
|
||||
mask = gen_lowpart (mode, mask);
|
||||
if (mode == SFmode)
|
||||
mask = gen_rtx_CONST_VECTOR (V4SFmode,
|
||||
gen_rtvec (4, mask, CONST0_RTX (SFmode),
|
||||
CONST0_RTX (SFmode),
|
||||
CONST0_RTX (SFmode)));
|
||||
else
|
||||
mask = gen_rtx_CONST_VECTOR (V2DFmode,
|
||||
gen_rtvec (2, mask, CONST0_RTX (DFmode)));
|
||||
mask = force_reg (GET_MODE (mask), mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* When not using SSE, we don't use the mask, but prefer to keep the
|
||||
same general form of the insn pattern to reduce duplication when
|
||||
it comes time to split. */
|
||||
mask = const0_rtx;
|
||||
}
|
||||
|
||||
dst = operands[0];
|
||||
src = operands[1];
|
||||
|
||||
/* If the destination is memory, and we don't have matching source
|
||||
operands, do things in registers. */
|
||||
matching_memory = false;
|
||||
if (MEM_P (dst))
|
||||
{
|
||||
if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
|
||||
matching_memory = true;
|
||||
else
|
||||
dst = gen_reg_rtx (mode);
|
||||
}
|
||||
if (MEM_P (src) && !matching_memory)
|
||||
src = force_reg (mode, src);
|
||||
|
||||
set = gen_rtx_fmt_e (code, mode, src);
|
||||
set = gen_rtx_SET (VOIDmode, dst, set);
|
||||
use = gen_rtx_USE (VOIDmode, mask);
|
||||
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
|
||||
|
||||
if (dst != operands[0])
|
||||
emit_move_insn (operands[0], dst);
|
||||
}
|
||||
|
||||
/* Return TRUE or FALSE depending on whether the first SET in INSN
|
||||
has source and destination with matching CC modes, and that the
|
||||
CC mode is at least as constrained as REQ_MODE. */
|
||||
|
|
|
@ -247,6 +247,7 @@ extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
|
|||
extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
|
||||
extern const int x86_use_ffreep, x86_sse_partial_regs_for_cvtsd2ss;
|
||||
extern const int x86_inter_unit_moves, x86_schedule;
|
||||
extern const int x86_use_bt;
|
||||
extern int x86_prefetch_sse;
|
||||
|
||||
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
|
||||
|
@ -302,6 +303,7 @@ extern int x86_prefetch_sse;
|
|||
#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
|
||||
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
|
||||
#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
|
||||
#define TARGET_USE_BT (x86_use_bt & TUNEMASK)
|
||||
|
||||
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -536,6 +536,11 @@
|
|||
(and (match_code "const_int")
|
||||
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
|
||||
|
||||
;; Match 0 to 63.
|
||||
(define_predicate "const_0_to_63_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63")))
|
||||
|
||||
;; Match 0 to 255.
|
||||
(define_predicate "const_0_to_255_operand"
|
||||
(and (match_code "const_int")
|
||||
|
@ -835,3 +840,6 @@
|
|||
|
||||
(define_predicate "compare_operator"
|
||||
(match_code "compare"))
|
||||
|
||||
(define_predicate "absneg_operator"
|
||||
(match_code "abs,neg"))
|
||||
|
|
Loading…
Reference in New Issue