i386.c (ix86_expand_sse_cmp): Split out from ...
* config/i386/i386.c (ix86_expand_sse_cmp): Split out from ... (ix86_expand_sse_movcc): ... here. Take cmp as a pre-computed register. (ix86_expand_fp_movcc): Update to match. (ix86_expand_fp_vcond, ix86_expand_int_vcond): New. * config/i386/i386-protos.h: Update. * config/i386/sse.md (vcondv4sf, vcondv2df): New. (vcond<SSEMODE124>, vcondu<SSEMODE12>): New. * lib/target-supports.exp (check_effective_target_vect_condition): Add ia64, i?86, and x86_64. From-SVN: r98146
This commit is contained in:
parent
25f710babd
commit
ae46a07ab2
@ -1,3 +1,14 @@
|
||||
2005-04-14 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_sse_cmp): Split out from ...
|
||||
(ix86_expand_sse_movcc): ... here. Take cmp as a pre-computed
|
||||
register.
|
||||
(ix86_expand_fp_movcc): Update to match.
|
||||
(ix86_expand_fp_vcond, ix86_expand_int_vcond): New.
|
||||
* config/i386/i386-protos.h: Update.
|
||||
* config/i386/sse.md (vcondv4sf, vcondv2df): New.
|
||||
(vcond<SSEMODE124>, vcondu<SSEMODE12>): New.
|
||||
|
||||
2005-04-14 Joseph S. Myers <joseph@codesourcery.com>
|
||||
|
||||
* doc/cpp.texi, doc/install.texi: Change references to GCC 3.5 to
|
||||
|
@ -150,6 +150,8 @@ extern void ix86_expand_branch (enum rtx_code, rtx);
|
||||
extern int ix86_expand_setcc (enum rtx_code, rtx);
|
||||
extern int ix86_expand_int_movcc (rtx[]);
|
||||
extern int ix86_expand_fp_movcc (rtx[]);
|
||||
extern bool ix86_expand_fp_vcond (rtx[]);
|
||||
extern bool ix86_expand_int_vcond (rtx[], bool);
|
||||
extern int ix86_expand_int_addcc (rtx[]);
|
||||
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
|
||||
extern void x86_initialize_trampoline (rtx, rtx, rtx);
|
||||
|
@ -10141,12 +10141,14 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|
||||
rtx op_true, rtx op_false)
|
||||
/* Expand an sse vector comparison. Return the register with the result. */
|
||||
|
||||
static rtx
|
||||
ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|
||||
rtx op_true, rtx op_false)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (dest);
|
||||
rtx t1, t2, t3, x;
|
||||
rtx x;
|
||||
|
||||
cmp_op0 = force_reg (mode, cmp_op0);
|
||||
if (!nonimmediate_operand (cmp_op1, mode))
|
||||
@ -10155,24 +10157,33 @@ ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|
||||
if (optimize
|
||||
|| reg_overlap_mentioned_p (dest, op_true)
|
||||
|| reg_overlap_mentioned_p (dest, op_false))
|
||||
t1 = gen_reg_rtx (mode);
|
||||
else
|
||||
t1 = dest;
|
||||
dest = gen_reg_rtx (mode);
|
||||
|
||||
x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
|
||||
gcc_assert (sse_comparison_operator (x, VOIDmode));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t1, x));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
|
||||
operations. This is used for both scalar and vector conditional moves. */
|
||||
|
||||
static void
|
||||
ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (dest);
|
||||
rtx t2, t3, x;
|
||||
|
||||
if (op_false == CONST0_RTX (mode))
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
x = gen_rtx_AND (mode, t1, op_true);
|
||||
x = gen_rtx_AND (mode, cmp, op_true);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
|
||||
}
|
||||
else if (op_true == CONST0_RTX (mode))
|
||||
{
|
||||
op_false = force_reg (mode, op_false);
|
||||
x = gen_rtx_NOT (mode, t1);
|
||||
x = gen_rtx_NOT (mode, cmp);
|
||||
x = gen_rtx_AND (mode, x, op_false);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
|
||||
}
|
||||
@ -10187,10 +10198,10 @@ ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|
||||
else
|
||||
t3 = dest;
|
||||
|
||||
x = gen_rtx_AND (mode, op_true, t1);
|
||||
x = gen_rtx_AND (mode, op_true, cmp);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t2, x));
|
||||
|
||||
x = gen_rtx_NOT (mode, t1);
|
||||
x = gen_rtx_NOT (mode, cmp);
|
||||
x = gen_rtx_AND (mode, x, op_false);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t3, x));
|
||||
|
||||
@ -10199,6 +10210,8 @@ ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|
||||
}
|
||||
}
|
||||
|
||||
/* Expand a floating-point conditional move. Return true if successful. */
|
||||
|
||||
int
|
||||
ix86_expand_fp_movcc (rtx operands[])
|
||||
{
|
||||
@ -10230,8 +10243,9 @@ ix86_expand_fp_movcc (rtx operands[])
|
||||
operands[3]))
|
||||
return 1;
|
||||
|
||||
ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0,
|
||||
ix86_compare_op1, operands[2], operands[3]);
|
||||
tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
|
||||
ix86_compare_op1, operands[2], operands[3]);
|
||||
ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -10282,6 +10296,124 @@ ix86_expand_fp_movcc (rtx operands[])
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Expand a floating-point vector conditional move; a vcond operation
|
||||
rather than a movcc operation. */
|
||||
|
||||
bool
|
||||
ix86_expand_fp_vcond (rtx operands[])
|
||||
{
|
||||
enum rtx_code code = GET_CODE (operands[3]);
|
||||
rtx cmp;
|
||||
|
||||
code = ix86_prepare_sse_fp_compare_args (operands[0], code,
|
||||
&operands[4], &operands[5]);
|
||||
if (code == UNKNOWN)
|
||||
return false;
|
||||
|
||||
if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
|
||||
operands[5], operands[1], operands[2]))
|
||||
return true;
|
||||
|
||||
cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
|
||||
operands[1], operands[2]);
|
||||
ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Expand a signed integral vector conditional move. */
|
||||
|
||||
bool
|
||||
ix86_expand_int_vcond (rtx operands[], bool unsignedp)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (operands[0]);
|
||||
enum rtx_code code = GET_CODE (operands[3]);
|
||||
rtx cmp, x;
|
||||
|
||||
if (unsignedp)
|
||||
code = signed_condition (code);
|
||||
if (code == NE || code == LE || code == GE)
|
||||
{
|
||||
/* Inverse of a supported code. */
|
||||
x = operands[1];
|
||||
operands[1] = operands[2];
|
||||
operands[2] = x;
|
||||
code = reverse_condition (code);
|
||||
}
|
||||
if (code == LT)
|
||||
{
|
||||
/* Swap of a supported code. */
|
||||
x = operands[4];
|
||||
operands[4] = operands[5];
|
||||
operands[5] = x;
|
||||
code = swap_condition (code);
|
||||
}
|
||||
gcc_assert (code == EQ || code == GT);
|
||||
|
||||
/* Unlike floating-point, we can rely on the optimizers to have already
|
||||
converted to MIN/MAX expressions, so we don't have to handle that. */
|
||||
|
||||
/* Unsigned GT is not directly supported. We can zero-extend QI and
|
||||
HImode elements to the next wider element size, use a signed compare,
|
||||
then repack. For three extra instructions, this is definitely a win. */
|
||||
if (code == GT && unsignedp)
|
||||
{
|
||||
rtx o0l, o0h, o1l, o1h, cl, ch, zero;
|
||||
enum machine_mode wider;
|
||||
rtx (*unpackl) (rtx, rtx, rtx);
|
||||
rtx (*unpackh) (rtx, rtx, rtx);
|
||||
rtx (*pack) (rtx, rtx, rtx);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16QImode:
|
||||
wider = V8HImode;
|
||||
unpackl = gen_sse2_punpcklbw;
|
||||
unpackh = gen_sse2_punpckhbw;
|
||||
pack = gen_sse2_packsswb;
|
||||
break;
|
||||
case V8HImode:
|
||||
wider = V4SImode;
|
||||
unpackl = gen_sse2_punpcklwd;
|
||||
unpackh = gen_sse2_punpckhwd;
|
||||
pack = gen_sse2_packssdw;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
operands[4] = force_reg (mode, operands[4]);
|
||||
operands[5] = force_reg (mode, operands[5]);
|
||||
|
||||
o0l = gen_reg_rtx (wider);
|
||||
o0h = gen_reg_rtx (wider);
|
||||
o1l = gen_reg_rtx (wider);
|
||||
o1h = gen_reg_rtx (wider);
|
||||
cl = gen_reg_rtx (wider);
|
||||
ch = gen_reg_rtx (wider);
|
||||
cmp = gen_reg_rtx (mode);
|
||||
zero = force_reg (mode, CONST0_RTX (mode));
|
||||
|
||||
emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
|
||||
emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
|
||||
emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
|
||||
emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
|
||||
|
||||
x = gen_rtx_GT (wider, o0l, o1l);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, cl, x));
|
||||
|
||||
x = gen_rtx_GT (wider, o0h, o1h);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, ch, x));
|
||||
|
||||
emit_insn (pack (cmp, cl, ch));
|
||||
}
|
||||
else
|
||||
cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
|
||||
operands[1], operands[2]);
|
||||
|
||||
ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Expand conditional increment or decrement using adb/sbb instructions.
|
||||
The default case using setcc followed by the conditional move can be
|
||||
done by generic code. */
|
||||
|
@ -710,6 +710,22 @@
|
||||
[(set_attr "type" "ssecomi")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_expand "vcondv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "")
|
||||
(if_then_else:V4SF
|
||||
(match_operator 3 ""
|
||||
[(match_operand:V4SF 4 "nonimmediate_operand" "")
|
||||
(match_operand:V4SF 5 "nonimmediate_operand" "")])
|
||||
(match_operand:V4SF 1 "general_operand" "")
|
||||
(match_operand:V4SF 2 "general_operand" "")))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
if (ix86_expand_fp_vcond (operands))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel single-precision floating point logical operations
|
||||
@ -1648,6 +1664,22 @@
|
||||
[(set_attr "type" "ssecomi")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
(define_expand "vcondv2df"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "")
|
||||
(if_then_else:V2DF
|
||||
(match_operator 3 ""
|
||||
[(match_operand:V2DF 4 "nonimmediate_operand" "")
|
||||
(match_operand:V2DF 5 "nonimmediate_operand" "")])
|
||||
(match_operand:V2DF 1 "general_operand" "")
|
||||
(match_operand:V2DF 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_fp_vcond (operands))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel double-precision floating point logical operations
|
||||
@ -2602,6 +2634,38 @@
|
||||
[(set_attr "type" "ssecmp")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vcond<mode>"
|
||||
[(set (match_operand:SSEMODE124 0 "register_operand" "")
|
||||
(if_then_else:SSEMODE124
|
||||
(match_operator 3 ""
|
||||
[(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
|
||||
(match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
|
||||
(match_operand:SSEMODE124 1 "general_operand" "")
|
||||
(match_operand:SSEMODE124 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_int_vcond (operands, false))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
(define_expand "vcondu<mode>"
|
||||
[(set (match_operand:SSEMODE12 0 "register_operand" "")
|
||||
(if_then_else:SSEMODE12
|
||||
(match_operator 3 ""
|
||||
[(match_operand:SSEMODE12 4 "nonimmediate_operand" "")
|
||||
(match_operand:SSEMODE12 5 "nonimmediate_operand" "")])
|
||||
(match_operand:SSEMODE12 1 "general_operand" "")
|
||||
(match_operand:SSEMODE12 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_int_vcond (operands, true))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Parallel integral logical operations
|
||||
|
@ -1,3 +1,8 @@
|
||||
2005-04-14 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_vect_condition):
|
||||
Add ia64, i?86, and x86_64.
|
||||
|
||||
2005-04-14 Steven G. Kargl <kargls@comcast.net>
|
||||
|
||||
* gfortran.dg/underflow.f90: Use tiny(x)/huge(x).
|
||||
|
@ -844,7 +844,10 @@ proc check_effective_target_vect_condition { } {
|
||||
verbose "check_effective_target_vect_cond: using cached result" 2
|
||||
} else {
|
||||
set et_vect_cond_saved 0
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_cond_saved 1
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user