diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 807a34b258a..11adcd05ec8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2010-01-04 H.J. Lu + + PR target/42542 + * config/i386/i386.c (ix86_expand_int_vcond): Don't convert + GTU to GT for V4SI and V2DI. + + * config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP. + (umin3): Removed. + (uminv8hi3): New. + (uminv4si3): Likewise. + 2010-01-04 H.J. Lu PR lto/42581 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3e15b9dd0f5..4d6d51cd739 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1,6 +1,6 @@ /* Subroutines used for code generation on IA-32. - Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of GCC. @@ -16252,37 +16252,6 @@ ix86_expand_int_vcond (rtx operands[]) switch (mode) { - case V4SImode: - case V2DImode: - { - rtx t1, t2, mask; - - /* Perform a parallel modulo subtraction. */ - t1 = gen_reg_rtx (mode); - emit_insn ((mode == V4SImode - ? gen_subv4si3 - : gen_subv2di3) (t1, cop0, cop1)); - - /* Extract the original sign bit of op0. */ - mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), - true, false); - t2 = gen_reg_rtx (mode); - emit_insn ((mode == V4SImode - ? gen_andv4si3 - : gen_andv2di3) (t2, cop0, mask)); - - /* XOR it back into the result of the subtraction. - This results in the sign bit set iff we saw - unsigned underflow. */ - x = gen_reg_rtx (mode); - emit_insn ((mode == V4SImode - ? gen_xorv4si3 - : gen_xorv2di3) (x, t1, t2)); - - code = GT; - } - break; - case V16QImode: case V8HImode: /* Perform a parallel unsigned saturating subtraction. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9bbea8031a5..3461b8d3eed 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1,5 +1,5 @@ ;; GCC machine description for SSE instructions -;; Copyright (C) 2005, 2006, 2007, 2008, 2009 +;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 ;; Free Software Foundation, Inc. ;; ;; This file is part of GCC. @@ -6138,7 +6138,7 @@ [(set (match_operand:V4SI 0 "register_operand" "") (umax:V4SI (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")))] - "TARGET_SSE2" + "TARGET_SSE4_1 || TARGET_XOP" { if (TARGET_SSE4_1) ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands); @@ -6195,14 +6195,39 @@ } }) -(define_expand "umin3" - [(set (match_operand:SSEMODE24 0 "register_operand" "") - (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") - (match_operand:SSEMODE24 2 "register_operand" "")))] +(define_expand "uminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "") + (umin:V8HI (match_operand:V8HI 1 "register_operand" "") + (match_operand:V8HI 2 "register_operand" "")))] "TARGET_SSE2" { if (TARGET_SSE4_1) - ix86_fixup_binary_operands_no_copy (UMIN, mode, operands); + ix86_fixup_binary_operands_no_copy (UMIN, V8HImode, operands); + else + { + rtx xops[6]; + bool ok; + + xops[0] = operands[0]; + xops[1] = operands[2]; + xops[2] = operands[1]; + xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); + xops[4] = operands[1]; + xops[5] = operands[2]; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + DONE; + } +}) + +(define_expand "uminv4si3" + [(set (match_operand:V4SI 0 "register_operand" "") + (umin:V4SI (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")))] + "TARGET_SSE4_1 || TARGET_XOP" +{ + if (TARGET_SSE4_1) + ix86_fixup_binary_operands_no_copy (UMIN, V4SImode, operands); else { rtx xops[6]; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c16e2d41985..44f692cc74d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2010-01-04 H.J. Lu + + PR target/42542 + * gcc.target/i386/pr42542-1.c: New. + * gcc.target/i386/pr42542-1a.c: Likewise. + * gcc.target/i386/pr42542-1b.c: Likewise. + * gcc.target/i386/pr42542-2.c: Likewise. + * gcc.target/i386/pr42542-2a.c: Likewise. + * gcc.target/i386/pr42542-2b.c: Likewise. + * gcc.target/i386/pr42542-3.c: Likewise. + * gcc.target/i386/pr42542-3a.c: Likewise. + 2009-01-04 Tobias Burnus PR fortran/41872 diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1.c b/gcc/testsuite/gcc.target/i386/pr42542-1.c new file mode 100644 index 00000000000..6e115c3f7be --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-1.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -msse2 -ftree-vectorize" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +unsigned int v1[] __attribute__ ((aligned(16))) = +{ + 0x80000000, 1, 0xa0000000, 2, + 3, 0xd0000000, 0xf0000000, 0xe0000000 +}; +unsigned int v2[] __attribute__ ((aligned(16))) = +{ + 4, 0xb0000000, 5, 0xc0000000, + 0xd0000000, 6, 7, 8 +}; + +unsigned int max[] = +{ + 0x80000000, 0xb0000000, 0xa0000000, 0xc0000000, + 0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000 +}; + +unsigned int min[] = +{ + 4, 1, 5, 2, + 3, 6, 7, 8 +}; + +unsigned int res[16] __attribute__ ((aligned(16))); + +extern void abort (void); + +void +find_max (void) +{ + int i; + + for (i = 0; i < 8; i++) + res[i] = v1[i] < v2[i] ? v2[i] : v1[i]; +} + +void +find_min (void) +{ + int i; + + for (i = 0; i < 8; i++) + res[i] = v1[i] > v2[i] ? v2[i] : v1[i]; +} + +static void +TEST (void) +{ + int i; + int err = 0; + + find_max (); + for (i = 0; i < 8; i++) + if (res[i] != max[i]) + err++; + + find_min (); + for (i = 0; i < 8; i++) + if (res[i] != min[i]) + err++; + + if (err) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1a.c b/gcc/testsuite/gcc.target/i386/pr42542-1a.c new file mode 100644 index 00000000000..cd77175f6e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-1a.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "pr42542-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1b.c b/gcc/testsuite/gcc.target/i386/pr42542-1b.c new file mode 100644 index 00000000000..7651f07a649 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-1b.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "pr42542-1.c" + +/* { dg-final { scan-assembler "pmaxud" } } */ +/* { dg-final { scan-assembler "pminud" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2.c b/gcc/testsuite/gcc.target/i386/pr42542-2.c new file mode 100644 index 00000000000..fc59534d906 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-2.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -msse2 -ftree-vectorize" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +unsigned short v1[] __attribute__ ((aligned(16))) = +{ + 0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20, + 3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30 +}; +unsigned short v2[] __attribute__ ((aligned(16))) = +{ + 4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000, + 0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000 +}; + +unsigned short max[] = +{ + 0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000, + 0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000 +}; + +unsigned short min[] = +{ + 4, 40, 1, 10, 5, 50, 2, 20, + 3, 30, 6, 65, 7, 75, 25, 30 +}; + +unsigned short res[16] __attribute__ ((aligned(16))); + +extern void abort (void); + +void +find_max (void) +{ + int i; + + for (i = 0; i < 16; i++) + res[i] = v1[i] < v2[i] ? v2[i] : v1[i]; +} + +void +find_min (void) +{ + int i; + + for (i = 0; i < 16; i++) + res[i] = v1[i] > v2[i] ? v2[i] : v1[i]; +} + +static void +TEST (void) +{ + int i; + int err = 0; + + find_max (); + for (i = 0; i < 16; i++) + if (res[i] != max[i]) + err++; + + find_min (); + for (i = 0; i < 16; i++) + if (res[i] != min[i]) + err++; + + if (err) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2a.c b/gcc/testsuite/gcc.target/i386/pr42542-2a.c new file mode 100644 index 00000000000..bcefa9cfe8d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-2a.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "pr42542-2.c" diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2b.c b/gcc/testsuite/gcc.target/i386/pr42542-2b.c new file mode 100644 index 00000000000..ddb539bf7a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-2b.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "pr42542-2.c" + +/* { dg-final { scan-assembler "pmaxuw" } } */ +/* { dg-final { scan-assembler "pminuw" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr42542-3.c b/gcc/testsuite/gcc.target/i386/pr42542-3.c new file mode 100644 index 00000000000..028d2f89964 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-3.c @@ -0,0 +1,85 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -msse2 -ftree-vectorize" } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#ifndef TEST +#define TEST sse2_test +#endif + +#include CHECK_H + +unsigned char v1[] __attribute__ ((aligned(16))) = +{ + 0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15, + 0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35, + 3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0, + 0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40 +}; +unsigned char v2[] __attribute__ ((aligned(16))) = +{ + 4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90, + 5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0, + 0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68, + 7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90 +}; + +unsigned char max[] = +{ + 0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90, + 0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0, + 0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0, + 0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90 +}; + +unsigned char min[] = +{ + 4, 44, 40, 48, 1, 15, 10, 15, + 5, 55, 50, 51, 2, 25, 20, 35, + 3, 34, 30, 36, 6, 61, 65, 68, + 7, 76, 75, 81, 25, 34, 30, 40 +}; + +unsigned char res[32] __attribute__ ((aligned(16))); + +extern void abort (void); + +void +find_max (void) +{ + int i; + + for (i = 0; i < 32; i++) + res[i] = v1[i] < v2[i] ? v2[i] : v1[i]; +} + +void +find_min (void) +{ + int i; + + for (i = 0; i < 32; i++) + res[i] = v1[i] > v2[i] ? v2[i] : v1[i]; +} + +static void +TEST (void) +{ + int i; + int err = 0; + + find_max (); + for (i = 0; i < 32; i++) + if (res[i] != max[i]) + err++; + + find_min (); + for (i = 0; i < 32; i++) + if (res[i] != min[i]) + err++; + + if (err) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr42542-3a.c b/gcc/testsuite/gcc.target/i386/pr42542-3a.c new file mode 100644 index 00000000000..754e59e8487 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr42542-3a.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2 -ftree-vectorize" } */ + +#include "pr42542-3.c" + +/* { dg-final { scan-assembler "pmaxub" } } */ +/* { dg-final { scan-assembler "pminub" } } */