re PR target/17990 (sse used for negate without -mfpmath=sse)

PR target/17990
        * config/i386/i386.c (x86_use_bt): New.
        (ix86_expand_unary_operator): Use MEM_P.
        (ix86_expand_fp_absneg_operator): New.
        * config/i386/i386.h (x86_use_bt): Declare.
        (TARGET_USE_BT): New.
        * config/i386/i386-protos.h: Update.
        * config/i386/i386.md (negsf2): Use ix86_expand_fp_absneg_operator.
        (negdf2, negxf2, abssf2, absdf2, absxf2): Likewise.
        (negsf2_memory, negsf2_ifs, negsf2_if, negdf2_memory, negdf2_ifs,
        negdf2_ifs_rex64, negdf2_if, negdf2_if_rex64, negxf2_if,
        abssf2_memory, abssf2_ifs, abssf2_if, absdf2_memory, absdf2_ifs,
        absdf2_ifs_rex64, absdf2_if, absxf2_if): Remove.
        (absnegsf2_mixed, absnegsf2_sse, absnegsf2_i387, absnegdf2_mixed,
        absnegdf2_sse, absnegdf2_i387, absnegxf2_i387): New.  Merge all
        neg and abs splitters.  Handle DFmode in general regs in 64-bit mode.
        (negextendsfdf2, absextendsfdf2): Disable for non-mixed sse math.
        (btsq, btrq, btcq): New.  Add peepholes as well.
        (movv4sf_internal splitter): Postpone til after reload.
        (movv2di_internal splitter): Likewise.
        * config/i386/predicates.md (const_0_to_63_operand): New.
        (absneg_operator): New.

From-SVN: r92165
This commit is contained in:
Richard Henderson 2004-12-14 14:45:28 -08:00 committed by Richard Henderson
parent 6af713e46c
commit 7cacf53e50
6 changed files with 526 additions and 682 deletions

View File

@ -1,3 +1,28 @@
2004-12-14 Richard Henderson <rth@redhat.com>
PR target/17990
* config/i386/i386.c (x86_use_bt): New.
(ix86_expand_unary_operator): Use MEM_P.
(ix86_expand_fp_absneg_operator): New.
* config/i386/i386.h (x86_use_bt): Declare.
(TARGET_USE_BT): New.
* config/i386/i386-protos.h: Update.
* config/i386/i386.md (negsf2): Use ix86_expand_fp_absneg_operator.
(negdf2, negxf2, abssf2, absdf2, absxf2): Likewise.
(negsf2_memory, negsf2_ifs, negsf2_if, negdf2_memory, negdf2_ifs,
negdf2_ifs_rex64, negdf2_if, negdf2_if_rex64, negxf2_if,
abssf2_memory, abssf2_ifs, abssf2_if, absdf2_memory, absdf2_ifs,
absdf2_ifs_rex64, absdf2_if, absxf2_if): Remove.
(absnegsf2_mixed, absnegsf2_sse, absnegsf2_i387, absnegdf2_mixed,
absnegdf2_sse, absnegdf2_i387, absnegxf2_i387): New. Merge all
neg and abs splitters. Handle DFmode in general regs in 64-bit mode.
(negextendsfdf2, absextendsfdf2): Disable for non-mixed sse math.
(btsq, btrq, btcq): New. Add peepholes as well.
(movv4sf_internal splitter): Postpone til after reload.
(movv2di_internal splitter): Likewise.
* config/i386/predicates.md (const_0_to_63_operand): New.
(absneg_operator): New.
2004-12-14 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
* pa.h (REGNO_OK_FOR_INDEX_P, REGNO_OK_FOR_BASE_P, REGNO_OK_FOR_FP_P):
@ -53,8 +78,8 @@
2004-12-13 Richard Henderson <rth@redhat.com>
PR middle-end/17930
* passes.c (rest_of_clean_state): Fix computation of
preferred_incoming_stack_boundary.
* passes.c (rest_of_clean_state): Fix computation of
preferred_incoming_stack_boundary.
2004-12-13 Wolfgang Bangerth <bangerth@dealii.org>

View File

@ -130,6 +130,8 @@ extern void ix86_expand_binary_operator (enum rtx_code,
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]);
extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
rtx[]);
extern int ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
extern int ix86_match_ccmode (rtx, enum machine_mode);
extern rtx ix86_expand_compare (enum rtx_code, rtx *, rtx *);

View File

@ -579,6 +579,7 @@ const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPR
the 16 byte window. */
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
const int x86_use_bt = m_ATHLON_K8;
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
@ -7636,7 +7637,7 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
/* If the destination is memory, and we do not have matching source
operands, do things in registers. */
matching_memory = 0;
if (GET_CODE (dst) == MEM)
if (MEM_P (dst))
{
if (rtx_equal_p (dst, src))
matching_memory = 1;
@ -7645,10 +7646,10 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
}
/* When source operand is memory, destination must match. */
if (!matching_memory && GET_CODE (src) == MEM)
if (MEM_P (src) && !matching_memory)
src = force_reg (mode, src);
/* If optimizing, copy to regs to improve CSE */
/* If optimizing, copy to regs to improve CSE. */
if (optimize && ! no_new_pseudos)
{
if (GET_CODE (dst) == MEM)
@ -7695,6 +7696,91 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
return TRUE;
}
/* Generate code for floating point ABS or NEG. */
void
ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
rtx operands[])
{
rtx mask, set, use, clob, dst, src;
bool matching_memory;
bool use_sse = false;
if (TARGET_SSE_MATH)
{
if (mode == SFmode)
use_sse = true;
else if (mode == DFmode && TARGET_SSE2)
use_sse = true;
}
/* NEG and ABS performed with SSE use bitwise mask operations.
Create the appropriate mask now. */
if (use_sse)
{
HOST_WIDE_INT hi, lo;
int shift = 63;
/* Find the sign bit, sign extended to 2*HWI. */
if (mode == SFmode)
lo = 0x80000000, hi = lo < 0;
else if (HOST_BITS_PER_WIDE_INT >= 64)
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
else
lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
/* If we're looking for the absolute value, then we want
the compliment. */
if (code == ABS)
lo = ~lo, hi = ~hi;
/* Force this value into the low part of a fp vector constant. */
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
mask = gen_lowpart (mode, mask);
if (mode == SFmode)
mask = gen_rtx_CONST_VECTOR (V4SFmode,
gen_rtvec (4, mask, CONST0_RTX (SFmode),
CONST0_RTX (SFmode),
CONST0_RTX (SFmode)));
else
mask = gen_rtx_CONST_VECTOR (V2DFmode,
gen_rtvec (2, mask, CONST0_RTX (DFmode)));
mask = force_reg (GET_MODE (mask), mask);
}
else
{
/* When not using SSE, we don't use the mask, but prefer to keep the
same general form of the insn pattern to reduce duplication when
it comes time to split. */
mask = const0_rtx;
}
dst = operands[0];
src = operands[1];
/* If the destination is memory, and we don't have matching source
operands, do things in registers. */
matching_memory = false;
if (MEM_P (dst))
{
if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
matching_memory = true;
else
dst = gen_reg_rtx (mode);
}
if (MEM_P (src) && !matching_memory)
src = force_reg (mode, src);
set = gen_rtx_fmt_e (code, mode, src);
set = gen_rtx_SET (VOIDmode, dst, set);
use = gen_rtx_USE (VOIDmode, mask);
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
if (dst != operands[0])
emit_move_insn (operands[0], dst);
}
/* Return TRUE or FALSE depending on whether the first SET in INSN
has source and destination with matching CC modes, and that the
CC mode is at least as constrained as REQ_MODE. */

View File

@ -247,6 +247,7 @@ extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
extern const int x86_use_ffreep, x86_sse_partial_regs_for_cvtsd2ss;
extern const int x86_inter_unit_moves, x86_schedule;
extern const int x86_use_bt;
extern int x86_prefetch_sse;
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
@ -302,6 +303,7 @@ extern int x86_prefetch_sse;
#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
#define TARGET_USE_BT (x86_use_bt & TUNEMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)

File diff suppressed because it is too large Load Diff

View File

@ -536,6 +536,11 @@
(and (match_code "const_int")
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
;; Match 0 to 63.
(define_predicate "const_0_to_63_operand"
(and (match_code "const_int")
(match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63")))
;; Match 0 to 255.
(define_predicate "const_0_to_255_operand"
(and (match_code "const_int")
@ -835,3 +840,6 @@
(define_predicate "compare_operator"
(match_code "compare"))
(define_predicate "absneg_operator"
(match_code "abs,neg"))