i386.c (ix86_expand_int_vcond): Remove unsignedp argument.
* config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp argument. Simplify canonicalization of condition. Use unsigned saturating subtraction for QI and HImode unsigned compares. Use bit arithmetic tricks for SImode unsigned compares. * config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl. * config/i386/sse.md (SSEMODE14): New. (umaxv8hi3): Use us_minus+plus to avoid vcond. (umaxv4si3): New. (smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize. (smin<SSEMODE14>3): Similarly with sminv16qi3. (umin<SSEMODE24>3): Similarly with uminv8hi3. * lib/target-supports.exp (check_effective_target_vect_no_max): Remove i386 and x86_64. From-SVN: r101429
This commit is contained in:
parent
88be5d434d
commit
9fb93f8966
@ -1,3 +1,17 @@
|
||||
2005-06-29 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
|
||||
argument. Simplify canonicalization of condition. Use unsigned
|
||||
saturating subtraction for QI and HImode unsigned compares. Use
|
||||
bit arithmetic tricks for SImode unsigned compares.
|
||||
* config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl.
|
||||
* config/i386/sse.md (SSEMODE14): New.
|
||||
(umaxv8hi3): Use us_minus+plus to avoid vcond.
|
||||
(umaxv4si3): New.
|
||||
(smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize.
|
||||
(smin<SSEMODE14>3): Similarly with sminv16qi3.
|
||||
(umin<SSEMODE24>3): Similarly with uminv8hi3.
|
||||
|
||||
2005-06-29 Ian Lance Taylor <ian@airs.com>
|
||||
|
||||
* dwarf2out.c (expand_builtin_init_dwarf_reg_sizes): Change
|
||||
|
@ -149,7 +149,7 @@ extern int ix86_expand_setcc (enum rtx_code, rtx);
|
||||
extern int ix86_expand_int_movcc (rtx[]);
|
||||
extern int ix86_expand_fp_movcc (rtx[]);
|
||||
extern bool ix86_expand_fp_vcond (rtx[]);
|
||||
extern bool ix86_expand_int_vcond (rtx[], bool);
|
||||
extern bool ix86_expand_int_vcond (rtx[]);
|
||||
extern int ix86_expand_int_addcc (rtx[]);
|
||||
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
|
||||
extern void x86_initialize_trampoline (rtx, rtx, rtx);
|
||||
|
@ -10501,94 +10501,102 @@ ix86_expand_fp_vcond (rtx operands[])
|
||||
/* Expand a signed integral vector conditional move. */
|
||||
|
||||
bool
|
||||
ix86_expand_int_vcond (rtx operands[], bool unsignedp)
|
||||
ix86_expand_int_vcond (rtx operands[])
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (operands[0]);
|
||||
enum rtx_code code = GET_CODE (operands[3]);
|
||||
rtx cmp, x;
|
||||
bool negate = false;
|
||||
rtx x, cop0, cop1;
|
||||
|
||||
if (unsignedp)
|
||||
code = signed_condition (code);
|
||||
if (code == NE || code == LE || code == GE)
|
||||
cop0 = operands[4];
|
||||
cop1 = operands[5];
|
||||
|
||||
/* Canonicalize the comparison to EQ, GT, GTU. */
|
||||
switch (code)
|
||||
{
|
||||
/* Inverse of a supported code. */
|
||||
x = operands[1];
|
||||
operands[1] = operands[2];
|
||||
operands[2] = x;
|
||||
case EQ:
|
||||
case GT:
|
||||
case GTU:
|
||||
break;
|
||||
|
||||
case NE:
|
||||
case LE:
|
||||
case LEU:
|
||||
code = reverse_condition (code);
|
||||
}
|
||||
if (code == LT)
|
||||
{
|
||||
/* Swap of a supported code. */
|
||||
x = operands[4];
|
||||
operands[4] = operands[5];
|
||||
operands[5] = x;
|
||||
negate = true;
|
||||
break;
|
||||
|
||||
case GE:
|
||||
case GEU:
|
||||
code = reverse_condition (code);
|
||||
negate = true;
|
||||
/* FALLTHRU */
|
||||
|
||||
case LT:
|
||||
case LTU:
|
||||
code = swap_condition (code);
|
||||
x = cop0, cop0 = cop1, cop1 = x;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
gcc_assert (code == EQ || code == GT);
|
||||
|
||||
/* Unlike floating-point, we can rely on the optimizers to have already
|
||||
converted to MIN/MAX expressions, so we don't have to handle that. */
|
||||
|
||||
/* Unsigned GT is not directly supported. We can zero-extend QI and
|
||||
HImode elements to the next wider element size, use a signed compare,
|
||||
then repack. For three extra instructions, this is definitely a win. */
|
||||
if (code == GT && unsignedp)
|
||||
/* Unsigned parallel compare is not supported by the hardware. Play some
|
||||
tricks to turn this into a signed comparison against 0. */
|
||||
if (code == GTU)
|
||||
{
|
||||
rtx o0l, o0h, o1l, o1h, cl, ch, zero;
|
||||
enum machine_mode wider;
|
||||
rtx (*unpackl) (rtx, rtx, rtx);
|
||||
rtx (*unpackh) (rtx, rtx, rtx);
|
||||
rtx (*pack) (rtx, rtx, rtx);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V4SImode:
|
||||
{
|
||||
rtx t1, t2, mask;
|
||||
|
||||
/* Perform a parallel modulo subtraction. */
|
||||
t1 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_subv4si3 (t1, cop0, cop1));
|
||||
|
||||
/* Extract the original sign bit of op0. */
|
||||
mask = GEN_INT (-0x80000000);
|
||||
mask = gen_rtx_CONST_VECTOR (mode,
|
||||
gen_rtvec (4, mask, mask, mask, mask));
|
||||
mask = force_reg (mode, mask);
|
||||
t2 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_andv4si3 (t2, cop0, mask));
|
||||
|
||||
/* XOR it back into the result of the subtraction. This results
|
||||
in the sign bit set iff we saw unsigned underflow. */
|
||||
x = gen_reg_rtx (mode);
|
||||
emit_insn (gen_xorv4si3 (x, t1, t2));
|
||||
|
||||
code = GT;
|
||||
}
|
||||
break;
|
||||
|
||||
case V16QImode:
|
||||
wider = V8HImode;
|
||||
unpackl = gen_sse2_punpcklbw;
|
||||
unpackh = gen_sse2_punpckhbw;
|
||||
pack = gen_sse2_packsswb;
|
||||
break;
|
||||
case V8HImode:
|
||||
wider = V4SImode;
|
||||
unpackl = gen_sse2_punpcklwd;
|
||||
unpackh = gen_sse2_punpckhwd;
|
||||
pack = gen_sse2_packssdw;
|
||||
/* Perform a parallel unsigned saturating subtraction. */
|
||||
x = gen_reg_rtx (mode);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, x,
|
||||
gen_rtx_US_MINUS (mode, cop0, cop1)));
|
||||
|
||||
code = EQ;
|
||||
negate = !negate;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
operands[4] = force_reg (mode, operands[4]);
|
||||
operands[5] = force_reg (mode, operands[5]);
|
||||
|
||||
o0l = gen_reg_rtx (wider);
|
||||
o0h = gen_reg_rtx (wider);
|
||||
o1l = gen_reg_rtx (wider);
|
||||
o1h = gen_reg_rtx (wider);
|
||||
cl = gen_reg_rtx (wider);
|
||||
ch = gen_reg_rtx (wider);
|
||||
cmp = gen_reg_rtx (mode);
|
||||
zero = force_reg (mode, CONST0_RTX (mode));
|
||||
|
||||
emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
|
||||
emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
|
||||
emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
|
||||
emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
|
||||
|
||||
x = gen_rtx_GT (wider, o0l, o1l);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, cl, x));
|
||||
|
||||
x = gen_rtx_GT (wider, o0h, o1h);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, ch, x));
|
||||
|
||||
emit_insn (pack (cmp, cl, ch));
|
||||
cop0 = x;
|
||||
cop1 = CONST0_RTX (mode);
|
||||
}
|
||||
else
|
||||
cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
|
||||
operands[1], operands[2]);
|
||||
|
||||
ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
|
||||
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
|
||||
operands[1+negate], operands[2-negate]);
|
||||
|
||||
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
|
||||
operands[2-negate]);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
;; Mix-n-match
|
||||
(define_mode_macro SSEMODE12 [V16QI V8HI])
|
||||
(define_mode_macro SSEMODE24 [V8HI V4SI])
|
||||
(define_mode_macro SSEMODE14 [V16QI V4SI])
|
||||
(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
|
||||
(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
|
||||
|
||||
@ -2741,26 +2742,6 @@
|
||||
operands[1] = gen_lowpart (TImode, operands[1]);
|
||||
})
|
||||
|
||||
(define_expand "smaxv16qi3"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "")
|
||||
(smax:V16QI (match_operand:V16QI 1 "register_operand" "")
|
||||
(match_operand:V16QI 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx xops[6];
|
||||
bool ok;
|
||||
|
||||
xops[0] = operands[0];
|
||||
xops[1] = operands[1];
|
||||
xops[2] = operands[2];
|
||||
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
|
||||
xops[4] = operands[1];
|
||||
xops[5] = operands[2];
|
||||
ok = ix86_expand_int_vcond (xops, false);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "umaxv16qi3"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "")
|
||||
(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
|
||||
@ -2794,33 +2775,22 @@
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "umaxv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "")
|
||||
(umax:V8HI (match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")))]
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
|
||||
(set (match_dup 3)
|
||||
(plus:V8HI (match_dup 0) (match_dup 2)))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx xops[6], t1, t2;
|
||||
bool ok;
|
||||
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
emit_insn (gen_sse2_ussubv8hi3 (t1, operands[2], operands[1]));
|
||||
t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
|
||||
|
||||
xops[0] = operands[0];
|
||||
xops[1] = operands[1];
|
||||
xops[2] = operands[2];
|
||||
xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
|
||||
xops[4] = t1;
|
||||
xops[5] = t2;
|
||||
ok = ix86_expand_int_vcond (xops, false);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
operands[3] = operands[0];
|
||||
if (rtx_equal_p (operands[0], operands[2]))
|
||||
operands[0] = gen_reg_rtx (V8HImode);
|
||||
})
|
||||
|
||||
(define_expand "sminv16qi3"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "")
|
||||
(smin:V16QI (match_operand:V16QI 1 "register_operand" "")
|
||||
(match_operand:V16QI 2 "register_operand" "")))]
|
||||
(define_expand "smax<mode>3"
|
||||
[(set (match_operand:SSEMODE14 0 "register_operand" "")
|
||||
(smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
|
||||
(match_operand:SSEMODE14 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx xops[6];
|
||||
@ -2830,9 +2800,29 @@
|
||||
xops[1] = operands[1];
|
||||
xops[2] = operands[2];
|
||||
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
|
||||
xops[4] = operands[2];
|
||||
xops[5] = operands[1];
|
||||
ok = ix86_expand_int_vcond (xops, false);
|
||||
xops[4] = operands[1];
|
||||
xops[5] = operands[2];
|
||||
ok = ix86_expand_int_vcond (xops);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "umaxv4si3"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "")
|
||||
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx xops[6];
|
||||
bool ok;
|
||||
|
||||
xops[0] = operands[0];
|
||||
xops[1] = operands[1];
|
||||
xops[2] = operands[2];
|
||||
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
|
||||
xops[4] = operands[1];
|
||||
xops[5] = operands[2];
|
||||
ok = ix86_expand_int_vcond (xops);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
@ -2869,26 +2859,42 @@
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "uminv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "")
|
||||
(umin:V8HI (match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")))]
|
||||
(define_expand "smin<mode>3"
|
||||
[(set (match_operand:SSEMODE14 0 "register_operand" "")
|
||||
(smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
|
||||
(match_operand:SSEMODE14 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx xops[6], t1, t2;
|
||||
rtx xops[6];
|
||||
bool ok;
|
||||
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
emit_insn (gen_sse2_ussubv8hi3 (t1, operands[1], operands[2]));
|
||||
t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
|
||||
xops[0] = operands[0];
|
||||
xops[1] = operands[2];
|
||||
xops[2] = operands[1];
|
||||
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
|
||||
xops[4] = operands[1];
|
||||
xops[5] = operands[2];
|
||||
ok = ix86_expand_int_vcond (xops);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "umin<mode>3"
|
||||
[(set (match_operand:SSEMODE24 0 "register_operand" "")
|
||||
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
|
||||
(match_operand:SSEMODE24 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx xops[6];
|
||||
bool ok;
|
||||
|
||||
xops[0] = operands[0];
|
||||
xops[1] = operands[1];
|
||||
xops[2] = operands[2];
|
||||
xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
|
||||
xops[4] = t1;
|
||||
xops[5] = t2;
|
||||
ok = ix86_expand_int_vcond (xops, false);
|
||||
xops[1] = operands[2];
|
||||
xops[2] = operands[1];
|
||||
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
|
||||
xops[4] = operands[1];
|
||||
xops[5] = operands[2];
|
||||
ok = ix86_expand_int_vcond (xops);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
@ -2929,7 +2935,7 @@
|
||||
(match_operand:SSEMODE124 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_int_vcond (operands, false))
|
||||
if (ix86_expand_int_vcond (operands))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
@ -2945,7 +2951,7 @@
|
||||
(match_operand:SSEMODE12 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_int_vcond (operands, true))
|
||||
if (ix86_expand_int_vcond (operands))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
|
@ -1,3 +1,8 @@
|
||||
2005-05-29 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_vect_no_max):
|
||||
Remove i386 and x86_64.
|
||||
|
||||
2005-06-29 Steve Ellcey <sje@cup.hp.com>
|
||||
|
||||
PR testsuite/21969
|
||||
|
@ -973,9 +973,7 @@ proc check_effective_target_vect_no_max { } {
|
||||
verbose "check_effective_target_vect_no_max: using cached result" 2
|
||||
} else {
|
||||
set et_vect_no_max_saved 0
|
||||
if { [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget sparc*-*-*]
|
||||
if { [istarget sparc*-*-*]
|
||||
|| [istarget alpha*-*-*] } {
|
||||
set et_vect_no_max_saved 1
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user