Properly convert GTU to GT for V4SI and V2DI

gcc/

2010-01-07  H.J. Lu  <hongjiu.lu@intel.com>

	Backport from mainline
	2010-01-05  Paolo Bonzini  <bonzinI@gnu.rg>
		    H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to GT
	for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both
	operands to make them signed.

gcc/testsuite/

2010-01-07  H.J. Lu  <hongjiu.lu@intel.com>

	Backport from mainline
	2010-01-05  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* gcc.target/i386/pr42542-1.c: New.
	* gcc.target/i386/pr42542-1a.c: Likewise.
	* gcc.target/i386/pr42542-1b.c: Likewise.
	* gcc.target/i386/pr42542-2.c: Likewise.
	* gcc.target/i386/pr42542-2a.c: Likewise.
	* gcc.target/i386/pr42542-2b.c: Likewise.
	* gcc.target/i386/pr42542-3.c: Likewise.
	* gcc.target/i386/pr42542-3a.c: Likewise.

From-SVN: r155707
This commit is contained in:
H.J. Lu 2010-01-07 19:55:44 +00:00 committed by H.J. Lu
parent 2e3dce632f
commit 8e3f0db6c2
11 changed files with 330 additions and 28 deletions

View File

@ -1,3 +1,14 @@
2010-01-07 H.J. Lu <hongjiu.lu@intel.com>
Backport from mainline
2010-01-05 Paolo Bonzini <bonzinI@gnu.rg>
H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to GT
for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both
operands to make them signed.
2010-01-07 Uros Bizjak <ubizjak@gmail.com>
* ifcvt.c (if_convert): Output slim multiple dumps with TDF_SLIM.

View File

@ -15675,8 +15675,9 @@ ix86_expand_int_vcond (rtx operands[])
}
}
/* Unsigned parallel compare is not supported by the hardware. Play some
tricks to turn this into a signed comparison against 0. */
/* Unsigned parallel compare is not supported by the hardware.
Play some tricks to turn this into a signed comparison
against 0. */
if (code == GTU)
{
cop0 = force_reg (mode, cop0);
@ -15685,32 +15686,26 @@ ix86_expand_int_vcond (rtx operands[])
{
case V4SImode:
case V2DImode:
{
rtx t1, t2, mask;
{
rtx t1, t2, mask;
rtx (*gen_sub3) (rtx, rtx, rtx);
/* Perform a parallel modulo subtraction. */
t1 = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_subv4si3
: gen_subv2di3) (t1, cop0, cop1));
/* Subtract (-(INT MAX) - 1) from both operands to make
them signed. */
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
true, false);
gen_sub3 = (mode == V4SImode
? gen_subv4si3 : gen_subv2di3);
t1 = gen_reg_rtx (mode);
emit_insn (gen_sub3 (t1, cop0, mask));
/* Extract the original sign bit of op0. */
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
true, false);
t2 = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_andv4si3
: gen_andv2di3) (t2, cop0, mask));
t2 = gen_reg_rtx (mode);
emit_insn (gen_sub3 (t2, cop1, mask));
/* XOR it back into the result of the subtraction. This results
in the sign bit set iff we saw unsigned underflow. */
x = gen_reg_rtx (mode);
emit_insn ((mode == V4SImode
? gen_xorv4si3
: gen_xorv2di3) (x, t1, t2));
code = GT;
}
cop0 = t1;
cop1 = t2;
code = GT;
}
break;
case V16QImode:
@ -15720,6 +15715,8 @@ ix86_expand_int_vcond (rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, x,
gen_rtx_US_MINUS (mode, cop0, cop1)));
cop0 = x;
cop1 = CONST0_RTX (mode);
code = EQ;
negate = !negate;
break;
@ -15727,9 +15724,6 @@ ix86_expand_int_vcond (rtx operands[])
default:
gcc_unreachable ();
}
cop0 = x;
cop1 = CONST0_RTX (mode);
}
}

View File

@ -1,3 +1,18 @@
2010-01-07 H.J. Lu <hongjiu.lu@intel.com>
Backport from mainline
2010-01-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* gcc.target/i386/pr42542-1.c: New.
* gcc.target/i386/pr42542-1a.c: Likewise.
* gcc.target/i386/pr42542-1b.c: Likewise.
* gcc.target/i386/pr42542-2.c: Likewise.
* gcc.target/i386/pr42542-2a.c: Likewise.
* gcc.target/i386/pr42542-2b.c: Likewise.
* gcc.target/i386/pr42542-3.c: Likewise.
* gcc.target/i386/pr42542-3a.c: Likewise.
2010-01-05 Eric Botcazou <ebotcazou@adacore.com>
* gcc.dg/tls/opt-15.c: New test.

View File

@ -0,0 +1,77 @@
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned int v1[] __attribute__ ((aligned(16))) =
{
0x80000000, 1, 0xa0000000, 2,
3, 0xd0000000, 0xf0000000, 0xe0000000
};
unsigned int v2[] __attribute__ ((aligned(16))) =
{
4, 0xb0000000, 5, 0xc0000000,
0xd0000000, 6, 7, 8
};
unsigned int max[] =
{
0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
};
unsigned int min[] =
{
4, 1, 5, 2,
3, 6, 7, 8
};
unsigned int res[8] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 8; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 8; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 8; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 8; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}

View File

@ -0,0 +1,8 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-1.c"

View File

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-1.c"
/* { dg-final { scan-assembler "pmaxud" } } */
/* { dg-final { scan-assembler "pminud" } } */

View File

@ -0,0 +1,77 @@
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned short v1[] __attribute__ ((aligned(16))) =
{
0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
};
unsigned short v2[] __attribute__ ((aligned(16))) =
{
4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
};
unsigned short max[] =
{
0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
};
unsigned short min[] =
{
4, 40, 1, 10, 5, 50, 2, 20,
3, 30, 6, 65, 7, 75, 25, 30
};
unsigned short res[16] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 16; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 16; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 16; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 16; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}

View File

@ -0,0 +1,8 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-2.c"

View File

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "pr42542-2.c"
/* { dg-final { scan-assembler "pmaxuw" } } */
/* { dg-final { scan-assembler "pminuw" } } */

View File

@ -0,0 +1,85 @@
/* { dg-do run } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#ifndef TEST
#define TEST sse2_test
#endif
#include CHECK_H
unsigned char v1[] __attribute__ ((aligned(16))) =
{
0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
};
unsigned char v2[] __attribute__ ((aligned(16))) =
{
4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
};
unsigned char max[] =
{
0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
};
unsigned char min[] =
{
4, 44, 40, 48, 1, 15, 10, 15,
5, 55, 50, 51, 2, 25, 20, 35,
3, 34, 30, 36, 6, 61, 65, 68,
7, 76, 75, 81, 25, 34, 30, 40
};
unsigned char res[32] __attribute__ ((aligned(16)));
extern void abort (void);
void
find_max (void)
{
int i;
for (i = 0; i < 32; i++)
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
}
void
find_min (void)
{
int i;
for (i = 0; i < 32; i++)
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
}
static void
TEST (void)
{
int i;
int err = 0;
find_max ();
for (i = 0; i < 32; i++)
if (res[i] != max[i])
err++;
find_min ();
for (i = 0; i < 32; i++)
if (res[i] != min[i])
err++;
if (err)
abort ();
}

View File

@ -0,0 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
#include "pr42542-3.c"
/* { dg-final { scan-assembler "pmaxub" } } */
/* { dg-final { scan-assembler "pminub" } } */