diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e8365ec02c2..c13d7f8d77a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2005-06-29 Richard Henderson + + * config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp + argument. Simplify canonicalization of condition. Use unsigned + saturating subtraction for QI and HImode unsigned compares. Use + bit arithmetic tricks for SImode unsigned compares. + * config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl. + * config/i386/sse.md (SSEMODE14): New. + (umaxv8hi3): Use us_minus+plus to avoid vcond. + (umaxv4si3): New. + (smax3): Rename from smaxv16qi3 and macroize. + (smin3): Similarly with sminv16qi3. + (umin3): Similarly with uminv8hi3. + 2005-06-29 Ian Lance Taylor * dwarf2out.c (expand_builtin_init_dwarf_reg_sizes): Change diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index c7b74777359..15c52b090c2 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -149,7 +149,7 @@ extern int ix86_expand_setcc (enum rtx_code, rtx); extern int ix86_expand_int_movcc (rtx[]); extern int ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); -extern bool ix86_expand_int_vcond (rtx[], bool); +extern bool ix86_expand_int_vcond (rtx[]); extern int ix86_expand_int_addcc (rtx[]); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void x86_initialize_trampoline (rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 620d862f3ff..77437046621 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10501,94 +10501,102 @@ ix86_expand_fp_vcond (rtx operands[]) /* Expand a signed integral vector conditional move. */ bool -ix86_expand_int_vcond (rtx operands[], bool unsignedp) +ix86_expand_int_vcond (rtx operands[]) { enum machine_mode mode = GET_MODE (operands[0]); enum rtx_code code = GET_CODE (operands[3]); - rtx cmp, x; + bool negate = false; + rtx x, cop0, cop1; - if (unsignedp) - code = signed_condition (code); - if (code == NE || code == LE || code == GE) + cop0 = operands[4]; + cop1 = operands[5]; + + /* Canonicalize the comparison to EQ, GT, GTU. */ + switch (code) { - /* Inverse of a supported code. */ - x = operands[1]; - operands[1] = operands[2]; - operands[2] = x; + case EQ: + case GT: + case GTU: + break; + + case NE: + case LE: + case LEU: code = reverse_condition (code); - } - if (code == LT) - { - /* Swap of a supported code. */ - x = operands[4]; - operands[4] = operands[5]; - operands[5] = x; + negate = true; + break; + + case GE: + case GEU: + code = reverse_condition (code); + negate = true; + /* FALLTHRU */ + + case LT: + case LTU: code = swap_condition (code); + x = cop0, cop0 = cop1, cop1 = x; + break; + + default: + gcc_unreachable (); } - gcc_assert (code == EQ || code == GT); - /* Unlike floating-point, we can rely on the optimizers to have already - converted to MIN/MAX expressions, so we don't have to handle that. */ - - /* Unsigned GT is not directly supported. We can zero-extend QI and - HImode elements to the next wider element size, use a signed compare, - then repack. For three extra instructions, this is definitely a win. */ - if (code == GT && unsignedp) + /* Unsigned parallel compare is not supported by the hardware. Play some + tricks to turn this into a signed comparison against 0. */ + if (code == GTU) { - rtx o0l, o0h, o1l, o1h, cl, ch, zero; - enum machine_mode wider; - rtx (*unpackl) (rtx, rtx, rtx); - rtx (*unpackh) (rtx, rtx, rtx); - rtx (*pack) (rtx, rtx, rtx); - switch (mode) { + case V4SImode: + { + rtx t1, t2, mask; + + /* Perform a parallel modulo subtraction. */ + t1 = gen_reg_rtx (mode); + emit_insn (gen_subv4si3 (t1, cop0, cop1)); + + /* Extract the original sign bit of op0. */ + mask = GEN_INT (-0x80000000); + mask = gen_rtx_CONST_VECTOR (mode, + gen_rtvec (4, mask, mask, mask, mask)); + mask = force_reg (mode, mask); + t2 = gen_reg_rtx (mode); + emit_insn (gen_andv4si3 (t2, cop0, mask)); + + /* XOR it back into the result of the subtraction. This results + in the sign bit set iff we saw unsigned underflow. */ + x = gen_reg_rtx (mode); + emit_insn (gen_xorv4si3 (x, t1, t2)); + + code = GT; + } + break; + case V16QImode: - wider = V8HImode; - unpackl = gen_sse2_punpcklbw; - unpackh = gen_sse2_punpckhbw; - pack = gen_sse2_packsswb; - break; case V8HImode: - wider = V4SImode; - unpackl = gen_sse2_punpcklwd; - unpackh = gen_sse2_punpckhwd; - pack = gen_sse2_packssdw; + /* Perform a parallel unsigned saturating subtraction. */ + x = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, x, + gen_rtx_US_MINUS (mode, cop0, cop1))); + + code = EQ; + negate = !negate; break; + default: gcc_unreachable (); } - operands[4] = force_reg (mode, operands[4]); - operands[5] = force_reg (mode, operands[5]); - - o0l = gen_reg_rtx (wider); - o0h = gen_reg_rtx (wider); - o1l = gen_reg_rtx (wider); - o1h = gen_reg_rtx (wider); - cl = gen_reg_rtx (wider); - ch = gen_reg_rtx (wider); - cmp = gen_reg_rtx (mode); - zero = force_reg (mode, CONST0_RTX (mode)); - - emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero)); - emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero)); - emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero)); - emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero)); - - x = gen_rtx_GT (wider, o0l, o1l); - emit_insn (gen_rtx_SET (VOIDmode, cl, x)); - - x = gen_rtx_GT (wider, o0h, o1h); - emit_insn (gen_rtx_SET (VOIDmode, ch, x)); - - emit_insn (pack (cmp, cl, ch)); + cop0 = x; + cop1 = CONST0_RTX (mode); } - else - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, + operands[1+negate], operands[2-negate]); + + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], + operands[2-negate]); return true; } diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 37454f5af30..bb9f98e9eba 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -30,6 +30,7 @@ ;; Mix-n-match (define_mode_macro SSEMODE12 [V16QI V8HI]) (define_mode_macro SSEMODE24 [V8HI V4SI]) +(define_mode_macro SSEMODE14 [V16QI V4SI]) (define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) (define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) @@ -2741,26 +2742,6 @@ operands[1] = gen_lowpart (TImode, operands[1]); }) -(define_expand "smaxv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "") - (smax:V16QI (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "")))] - "TARGET_SSE2" -{ - rtx xops[6]; - bool ok; - - xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); - xops[4] = operands[1]; - xops[5] = operands[2]; - ok = ix86_expand_int_vcond (xops, false); - gcc_assert (ok); - DONE; -}) - (define_expand "umaxv16qi3" [(set (match_operand:V16QI 0 "register_operand" "") (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") @@ -2794,33 +2775,22 @@ (set_attr "mode" "TI")]) (define_expand "umaxv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "") - (umax:V8HI (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")))] + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (set (match_dup 3) + (plus:V8HI (match_dup 0) (match_dup 2)))] "TARGET_SSE2" { - rtx xops[6], t1, t2; - bool ok; - - t1 = gen_reg_rtx (V8HImode); - emit_insn (gen_sse2_ussubv8hi3 (t1, operands[2], operands[1])); - t2 = force_reg (V8HImode, CONST0_RTX (V8HImode)); - - xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = gen_rtx_EQ (VOIDmode, t1, t2); - xops[4] = t1; - xops[5] = t2; - ok = ix86_expand_int_vcond (xops, false); - gcc_assert (ok); - DONE; + operands[3] = operands[0]; + if (rtx_equal_p (operands[0], operands[2])) + operands[0] = gen_reg_rtx (V8HImode); }) -(define_expand "sminv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "") - (smin:V16QI (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "")))] +(define_expand "smax3" + [(set (match_operand:SSEMODE14 0 "register_operand" "") + (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") + (match_operand:SSEMODE14 2 "register_operand" "")))] "TARGET_SSE2" { rtx xops[6]; @@ -2830,9 +2800,29 @@ xops[1] = operands[1]; xops[2] = operands[2]; xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); - xops[4] = operands[2]; - xops[5] = operands[1]; - ok = ix86_expand_int_vcond (xops, false); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; +}) + +(define_expand "umaxv4si3" + [(set (match_operand:V4SI 0 "register_operand" "") + (umax:V4SI (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx xops[6]; + bool ok; + + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); gcc_assert (ok); DONE; }) @@ -2869,26 +2859,42 @@ [(set_attr "type" "sseiadd") (set_attr "mode" "TI")]) -(define_expand "uminv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "") - (umin:V8HI (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")))] +(define_expand "smin3" + [(set (match_operand:SSEMODE14 0 "register_operand" "") + (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") + (match_operand:SSEMODE14 2 "register_operand" "")))] "TARGET_SSE2" { - rtx xops[6], t1, t2; + rtx xops[6]; bool ok; - t1 = gen_reg_rtx (V8HImode); - emit_insn (gen_sse2_ussubv8hi3 (t1, operands[1], operands[2])); - t2 = force_reg (V8HImode, CONST0_RTX (V8HImode)); + xops[0] = operands[0]; + xops[1] = operands[2]; + xops[2] = operands[1]; + xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; +}) + +(define_expand "umin3" + [(set (match_operand:SSEMODE24 0 "register_operand" "") + (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") + (match_operand:SSEMODE24 2 "register_operand" "")))] + "TARGET_SSE2" +{ + rtx xops[6]; + bool ok; xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = gen_rtx_EQ (VOIDmode, t1, t2); - xops[4] = t1; - xops[5] = t2; - ok = ix86_expand_int_vcond (xops, false); + xops[1] = operands[2]; + xops[2] = operands[1]; + xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); gcc_assert (ok); DONE; }) @@ -2929,7 +2935,7 @@ (match_operand:SSEMODE124 2 "general_operand" "")))] "TARGET_SSE2" { - if (ix86_expand_int_vcond (operands, false)) + if (ix86_expand_int_vcond (operands)) DONE; else FAIL; @@ -2945,7 +2951,7 @@ (match_operand:SSEMODE12 2 "general_operand" "")))] "TARGET_SSE2" { - if (ix86_expand_int_vcond (operands, true)) + if (ix86_expand_int_vcond (operands)) DONE; else FAIL; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 788ca8f66c4..10b2817eeb7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2005-05-29 Richard Henderson + + * lib/target-supports.exp (check_effective_target_vect_no_max): + Remove i386 and x86_64. + 2005-06-29 Steve Ellcey PR testsuite/21969 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 059ab38e4b7..d00850ad3f0 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -973,9 +973,7 @@ proc check_effective_target_vect_no_max { } { verbose "check_effective_target_vect_no_max: using cached result" 2 } else { set et_vect_no_max_saved 0 - if { [istarget i?86-*-*] - || [istarget x86_64-*-*] - || [istarget sparc*-*-*] + if { [istarget sparc*-*-*] || [istarget alpha*-*-*] } { set et_vect_no_max_saved 1 }