i386.c (ix86_expand_int_vcond): Remove unsignedp argument.

* config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
        argument.  Simplify canonicalization of condition.  Use unsigned
        saturating subtraction for QI and HImode unsigned compares.  Use
        bit arithmetic tricks for SImode unsigned compares.
        * config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl.
        * config/i386/sse.md (SSEMODE14): New.
        (umaxv8hi3): Use us_minus+plus to avoid vcond.
        (umaxv4si3): New.
        (smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize.
        (smin<SSEMODE14>3): Similarly with sminv16qi3.
        (umin<SSEMODE24>3): Similarly with uminv8hi3.

        * lib/target-supports.exp (check_effective_target_vect_no_max):
        Remove i386 and x86_64.

From-SVN: r101429
This commit is contained in:
Richard Henderson 2005-06-29 10:27:23 -07:00 committed by Richard Henderson
parent 88be5d434d
commit 9fb93f8966
6 changed files with 164 additions and 133 deletions

View File

@ -1,3 +1,17 @@
2005-06-29 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
argument. Simplify canonicalization of condition. Use unsigned
saturating subtraction for QI and HImode unsigned compares. Use
bit arithmetic tricks for SImode unsigned compares.
* config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl.
* config/i386/sse.md (SSEMODE14): New.
(umaxv8hi3): Use us_minus+plus to avoid vcond.
(umaxv4si3): New.
(smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize.
(smin<SSEMODE14>3): Similarly with sminv16qi3.
(umin<SSEMODE24>3): Similarly with uminv8hi3.
2005-06-29 Ian Lance Taylor <ian@airs.com>
* dwarf2out.c (expand_builtin_init_dwarf_reg_sizes): Change

View File

@ -149,7 +149,7 @@ extern int ix86_expand_setcc (enum rtx_code, rtx);
extern int ix86_expand_int_movcc (rtx[]);
extern int ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[], bool);
extern bool ix86_expand_int_vcond (rtx[]);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);

View File

@ -10501,94 +10501,102 @@ ix86_expand_fp_vcond (rtx operands[])
/* Expand a signed integral vector conditional move. */
bool
ix86_expand_int_vcond (rtx operands[], bool unsignedp)
ix86_expand_int_vcond (rtx operands[])
{
enum machine_mode mode = GET_MODE (operands[0]);
enum rtx_code code = GET_CODE (operands[3]);
rtx cmp, x;
bool negate = false;
rtx x, cop0, cop1;
if (unsignedp)
code = signed_condition (code);
if (code == NE || code == LE || code == GE)
cop0 = operands[4];
cop1 = operands[5];
/* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
{
/* Inverse of a supported code. */
x = operands[1];
operands[1] = operands[2];
operands[2] = x;
case EQ:
case GT:
case GTU:
break;
case NE:
case LE:
case LEU:
code = reverse_condition (code);
}
if (code == LT)
{
/* Swap of a supported code. */
x = operands[4];
operands[4] = operands[5];
operands[5] = x;
negate = true;
break;
case GE:
case GEU:
code = reverse_condition (code);
negate = true;
/* FALLTHRU */
case LT:
case LTU:
code = swap_condition (code);
x = cop0, cop0 = cop1, cop1 = x;
break;
default:
gcc_unreachable ();
}
gcc_assert (code == EQ || code == GT);
/* Unlike floating-point, we can rely on the optimizers to have already
converted to MIN/MAX expressions, so we don't have to handle that. */
/* Unsigned GT is not directly supported. We can zero-extend QI and
HImode elements to the next wider element size, use a signed compare,
then repack. For three extra instructions, this is definitely a win. */
if (code == GT && unsignedp)
/* Unsigned parallel compare is not supported by the hardware. Play some
tricks to turn this into a signed comparison against 0. */
if (code == GTU)
{
rtx o0l, o0h, o1l, o1h, cl, ch, zero;
enum machine_mode wider;
rtx (*unpackl) (rtx, rtx, rtx);
rtx (*unpackh) (rtx, rtx, rtx);
rtx (*pack) (rtx, rtx, rtx);
switch (mode)
{
case V4SImode:
{
rtx t1, t2, mask;
/* Perform a parallel modulo subtraction. */
t1 = gen_reg_rtx (mode);
emit_insn (gen_subv4si3 (t1, cop0, cop1));
/* Extract the original sign bit of op0. */
mask = GEN_INT (-0x80000000);
mask = gen_rtx_CONST_VECTOR (mode,
gen_rtvec (4, mask, mask, mask, mask));
mask = force_reg (mode, mask);
t2 = gen_reg_rtx (mode);
emit_insn (gen_andv4si3 (t2, cop0, mask));
/* XOR it back into the result of the subtraction. This results
in the sign bit set iff we saw unsigned underflow. */
x = gen_reg_rtx (mode);
emit_insn (gen_xorv4si3 (x, t1, t2));
code = GT;
}
break;
case V16QImode:
wider = V8HImode;
unpackl = gen_sse2_punpcklbw;
unpackh = gen_sse2_punpckhbw;
pack = gen_sse2_packsswb;
break;
case V8HImode:
wider = V4SImode;
unpackl = gen_sse2_punpcklwd;
unpackh = gen_sse2_punpckhwd;
pack = gen_sse2_packssdw;
/* Perform a parallel unsigned saturating subtraction. */
x = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (VOIDmode, x,
gen_rtx_US_MINUS (mode, cop0, cop1)));
code = EQ;
negate = !negate;
break;
default:
gcc_unreachable ();
}
operands[4] = force_reg (mode, operands[4]);
operands[5] = force_reg (mode, operands[5]);
o0l = gen_reg_rtx (wider);
o0h = gen_reg_rtx (wider);
o1l = gen_reg_rtx (wider);
o1h = gen_reg_rtx (wider);
cl = gen_reg_rtx (wider);
ch = gen_reg_rtx (wider);
cmp = gen_reg_rtx (mode);
zero = force_reg (mode, CONST0_RTX (mode));
emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
x = gen_rtx_GT (wider, o0l, o1l);
emit_insn (gen_rtx_SET (VOIDmode, cl, x));
x = gen_rtx_GT (wider, o0h, o1h);
emit_insn (gen_rtx_SET (VOIDmode, ch, x));
emit_insn (pack (cmp, cl, ch));
cop0 = x;
cop1 = CONST0_RTX (mode);
}
else
cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
operands[1], operands[2]);
ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
operands[1+negate], operands[2-negate]);
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
operands[2-negate]);
return true;
}

View File

@ -30,6 +30,7 @@
;; Mix-n-match
(define_mode_macro SSEMODE12 [V16QI V8HI])
(define_mode_macro SSEMODE24 [V8HI V4SI])
(define_mode_macro SSEMODE14 [V16QI V4SI])
(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
@ -2741,26 +2742,6 @@
operands[1] = gen_lowpart (TImode, operands[1]);
})
(define_expand "smaxv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(smax:V16QI (match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
bool ok;
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops, false);
gcc_assert (ok);
DONE;
})
(define_expand "umaxv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
@ -2794,33 +2775,22 @@
(set_attr "mode" "TI")])
(define_expand "umaxv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "")
(umax:V8HI (match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")))]
[(set (match_operand:V8HI 0 "register_operand" "=x")
(us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
(set (match_dup 3)
(plus:V8HI (match_dup 0) (match_dup 2)))]
"TARGET_SSE2"
{
rtx xops[6], t1, t2;
bool ok;
t1 = gen_reg_rtx (V8HImode);
emit_insn (gen_sse2_ussubv8hi3 (t1, operands[2], operands[1]));
t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
xops[4] = t1;
xops[5] = t2;
ok = ix86_expand_int_vcond (xops, false);
gcc_assert (ok);
DONE;
operands[3] = operands[0];
if (rtx_equal_p (operands[0], operands[2]))
operands[0] = gen_reg_rtx (V8HImode);
})
(define_expand "sminv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(smin:V16QI (match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "register_operand" "")))]
(define_expand "smax<mode>3"
[(set (match_operand:SSEMODE14 0 "register_operand" "")
(smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
(match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
@ -2830,9 +2800,29 @@
xops[1] = operands[1];
xops[2] = operands[2];
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
xops[4] = operands[2];
xops[5] = operands[1];
ok = ix86_expand_int_vcond (xops, false);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
(define_expand "umaxv4si3"
[(set (match_operand:V4SI 0 "register_operand" "")
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
bool ok;
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
@ -2869,26 +2859,42 @@
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_expand "uminv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "")
(umin:V8HI (match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")))]
(define_expand "smin<mode>3"
[(set (match_operand:SSEMODE14 0 "register_operand" "")
(smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
(match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6], t1, t2;
rtx xops[6];
bool ok;
t1 = gen_reg_rtx (V8HImode);
emit_insn (gen_sse2_ussubv8hi3 (t1, operands[1], operands[2]));
t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
xops[0] = operands[0];
xops[1] = operands[2];
xops[2] = operands[1];
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
(define_expand "umin<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "")
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
(match_operand:SSEMODE24 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
bool ok;
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
xops[4] = t1;
xops[5] = t2;
ok = ix86_expand_int_vcond (xops, false);
xops[1] = operands[2];
xops[2] = operands[1];
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
@ -2929,7 +2935,7 @@
(match_operand:SSEMODE124 2 "general_operand" "")))]
"TARGET_SSE2"
{
if (ix86_expand_int_vcond (operands, false))
if (ix86_expand_int_vcond (operands))
DONE;
else
FAIL;
@ -2945,7 +2951,7 @@
(match_operand:SSEMODE12 2 "general_operand" "")))]
"TARGET_SSE2"
{
if (ix86_expand_int_vcond (operands, true))
if (ix86_expand_int_vcond (operands))
DONE;
else
FAIL;

View File

@ -1,3 +1,8 @@
2005-05-29 Richard Henderson <rth@redhat.com>
* lib/target-supports.exp (check_effective_target_vect_no_max):
Remove i386 and x86_64.
2005-06-29 Steve Ellcey <sje@cup.hp.com>
PR testsuite/21969

View File

@ -973,9 +973,7 @@ proc check_effective_target_vect_no_max { } {
verbose "check_effective_target_vect_no_max: using cached result" 2
} else {
set et_vect_no_max_saved 0
if { [istarget i?86-*-*]
|| [istarget x86_64-*-*]
|| [istarget sparc*-*-*]
if { [istarget sparc*-*-*]
|| [istarget alpha*-*-*] } {
set et_vect_no_max_saved 1
}