Don't convert GTU to GT for V4SI and V2DI

gcc/

2010-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* config/i386/i386.c (ix86_expand_int_vcond): Don't convert
	GTU to GT for V4SI and V2DI.

	* config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
	(umin<mode>3): Removed.
	(uminv8hi3): New.
	(uminv4si3): Likewise.

gcc/testsuite/

2010-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* gcc.target/i386/pr42542-1.c: New.
	* gcc.target/i386/pr42542-1a.c: Likewise.
	* gcc.target/i386/pr42542-1b.c: Likewise.
	* gcc.target/i386/pr42542-2.c: Likewise.
	* gcc.target/i386/pr42542-2a.c: Likewise.
	* gcc.target/i386/pr42542-2b.c: Likewise.
	* gcc.target/i386/pr42542-3.c: Likewise.
	* gcc.target/i386/pr42542-3a.c: Likewise.

From-SVN: r155618
This commit is contained in:
H.J. Lu 2010-01-04 15:14:31 +00:00 committed by H.J. Lu
parent 7152f51efd
commit 22554cf937
12 changed files with 339 additions and 40 deletions

View File

@ -1,3 +1,14 @@
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* config/i386/i386.c (ix86_expand_int_vcond): Don't convert
GTU to GT for V4SI and V2DI.
* config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
(umin<mode>3): Removed.
(uminv8hi3): New.
(uminv4si3): Likewise.
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR lto/42581

View File

@ -1,6 +1,6 @@
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GCC.
@ -16252,37 +16252,6 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
case V4SImode:
case V2DImode:
{
rtx t1, t2, mask;
/* Perform a parallel modulo subtraction. */
t1 = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_subv4si3
: gen_subv2di3) (t1, cop0, cop1));
/* Extract the original sign bit of op0. */
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
true, false);
t2 = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_andv4si3
: gen_andv2di3) (t2, cop0, mask));
/* XOR it back into the result of the subtraction.
This results in the sign bit set iff we saw
unsigned underflow. */
x = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_xorv4si3
: gen_xorv2di3) (x, t1, t2));
code = GT;
}
break;
case V16QImode:
case V8HImode:
/* Perform a parallel unsigned saturating subtraction. */

View File

@ -1,5 +1,5 @@
;; GCC machine description for SSE instructions
;; Copyright (C) 2005, 2006, 2007, 2008, 2009
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
@ -6138,7 +6138,7 @@
[(set (match_operand:V4SI 0 "register_operand" "")
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
"TARGET_SSE4_1 || TARGET_XOP"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
@ -6195,14 +6195,39 @@
}
})
(define_expand "umin<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "")
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
(match_operand:SSEMODE24 2 "register_operand" "")))]
(define_expand "uminv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "")
(umin:V8HI (match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
ix86_fixup_binary_operands_no_copy (UMIN, V8HImode, operands);
else
{
rtx xops[6];
bool ok;
xops[0] = operands[0];
xops[1] = operands[2];
xops[2] = operands[1];
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
}
})
(define_expand "uminv4si3"
[(set (match_operand:V4SI 0 "register_operand" "")
(umin:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE4_1 || TARGET_XOP"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMIN, V4SImode, operands);
else
{
rtx xops[6];

View File

@ -1,3 +1,15 @@
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* gcc.target/i386/pr42542-1.c: New.
* gcc.target/i386/pr42542-1a.c: Likewise.
* gcc.target/i386/pr42542-1b.c: Likewise.
* gcc.target/i386/pr42542-2.c: Likewise.
* gcc.target/i386/pr42542-2a.c: Likewise.
* gcc.target/i386/pr42542-2b.c: Likewise.
* gcc.target/i386/pr42542-3.c: Likewise.
* gcc.target/i386/pr42542-3a.c: Likewise.
2009-01-04 Tobias Burnus <burnus@net-b.de>
PR fortran/41872

View File

@ -0,0 +1,77 @@
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned int v1[] __attribute__ ((aligned(16))) =
{
0x80000000, 1, 0xa0000000, 2,
3, 0xd0000000, 0xf0000000, 0xe0000000
};
unsigned int v2[] __attribute__ ((aligned(16))) =
{
4, 0xb0000000, 5, 0xc0000000,
0xd0000000, 6, 7, 8
};
unsigned int max[] =
{
0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
};
unsigned int min[] =
{
4, 1, 5, 2,
3, 6, 7, 8
};
unsigned int res[16] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 8; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 8; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 8; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 8; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}

View File

@ -0,0 +1,8 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-1.c"

View File

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-1.c"
/* { dg-final { scan-assembler "pmaxud" } } */
/* { dg-final { scan-assembler "pminud" } } */

View File

@ -0,0 +1,77 @@
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned short v1[] __attribute__ ((aligned(16))) =
{
0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
};
unsigned short v2[] __attribute__ ((aligned(16))) =
{
4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
};
unsigned short max[] =
{
0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
};
unsigned short min[] =
{
4, 40, 1, 10, 5, 50, 2, 20,
3, 30, 6, 65, 7, 75, 25, 30
};
unsigned short res[16] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 16; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 16; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 16; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 16; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}

View File

@ -0,0 +1,8 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-2.c"

View File

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-2.c"
/* { dg-final { scan-assembler "pmaxuw" } } */
/* { dg-final { scan-assembler "pminuw" } } */

View File

@ -0,0 +1,85 @@
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned char v1[] __attribute__ ((aligned(16))) =
{
0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
};
unsigned char v2[] __attribute__ ((aligned(16))) =
{
4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
};
unsigned char max[] =
{
0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
};
unsigned char min[] =
{
4, 44, 40, 48, 1, 15, 10, 15,
5, 55, 50, 51, 2, 25, 20, 35,
3, 34, 30, 36, 6, 61, 65, 68,
7, 76, 75, 81, 25, 34, 30, 40
};
unsigned char res[32] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 32; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 32; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 32; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 32; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}

View File

@ -0,0 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#include "pr42542-3.c"
/* { dg-final { scan-assembler "pmaxub" } } */
/* { dg-final { scan-assembler "pminub" } } */