Don't convert GTU to GT for V4SI and V2DI
gcc/ 2010-01-04 H.J. Lu <hongjiu.lu@intel.com> PR target/42542 * config/i386/i386.c (ix86_expand_int_vcond): Don't convert GTU to GT for V4SI and V2DI. * config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP. (umin<mode>3): Removed. (uminv8hi3): New. (uminv4si3): Likewise. gcc/testsuite/ 2010-01-04 H.J. Lu <hongjiu.lu@intel.com> PR target/42542 * gcc.target/i386/pr42542-1.c: New. * gcc.target/i386/pr42542-1a.c: Likewise. * gcc.target/i386/pr42542-1b.c: Likewise. * gcc.target/i386/pr42542-2.c: Likewise. * gcc.target/i386/pr42542-2a.c: Likewise. * gcc.target/i386/pr42542-2b.c: Likewise. * gcc.target/i386/pr42542-3.c: Likewise. * gcc.target/i386/pr42542-3a.c: Likewise. From-SVN: r155618
This commit is contained in:
parent
7152f51efd
commit
22554cf937
@ -1,3 +1,14 @@
|
||||
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/42542
|
||||
* config/i386/i386.c (ix86_expand_int_vcond): Don't convert
|
||||
GTU to GT for V4SI and V2DI.
|
||||
|
||||
* config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
|
||||
(umin<mode>3): Removed.
|
||||
(uminv8hi3): New.
|
||||
(uminv4si3): Likewise.
|
||||
|
||||
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR lto/42581
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Subroutines used for code generation on IA-32.
|
||||
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
||||
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
||||
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
@ -16252,37 +16252,6 @@ ix86_expand_int_vcond (rtx operands[])
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V4SImode:
|
||||
case V2DImode:
|
||||
{
|
||||
rtx t1, t2, mask;
|
||||
|
||||
/* Perform a parallel modulo subtraction. */
|
||||
t1 = gen_reg_rtx (mode);
|
||||
emit_insn ((mode == V4SImode
|
||||
? gen_subv4si3
|
||||
: gen_subv2di3) (t1, cop0, cop1));
|
||||
|
||||
/* Extract the original sign bit of op0. */
|
||||
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
|
||||
true, false);
|
||||
t2 = gen_reg_rtx (mode);
|
||||
emit_insn ((mode == V4SImode
|
||||
? gen_andv4si3
|
||||
: gen_andv2di3) (t2, cop0, mask));
|
||||
|
||||
/* XOR it back into the result of the subtraction.
|
||||
This results in the sign bit set iff we saw
|
||||
unsigned underflow. */
|
||||
x = gen_reg_rtx (mode);
|
||||
emit_insn ((mode == V4SImode
|
||||
? gen_xorv4si3
|
||||
: gen_xorv2di3) (x, t1, t2));
|
||||
|
||||
code = GT;
|
||||
}
|
||||
break;
|
||||
|
||||
case V16QImode:
|
||||
case V8HImode:
|
||||
/* Perform a parallel unsigned saturating subtraction. */
|
||||
|
@ -1,5 +1,5 @@
|
||||
;; GCC machine description for SSE instructions
|
||||
;; Copyright (C) 2005, 2006, 2007, 2008, 2009
|
||||
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
|
||||
;; Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
@ -6138,7 +6138,7 @@
|
||||
[(set (match_operand:V4SI 0 "register_operand" "")
|
||||
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
"TARGET_SSE4_1 || TARGET_XOP"
|
||||
{
|
||||
if (TARGET_SSE4_1)
|
||||
ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
|
||||
@ -6195,14 +6195,39 @@
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "umin<mode>3"
|
||||
[(set (match_operand:SSEMODE24 0 "register_operand" "")
|
||||
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
|
||||
(match_operand:SSEMODE24 2 "register_operand" "")))]
|
||||
(define_expand "uminv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "")
|
||||
(umin:V8HI (match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (TARGET_SSE4_1)
|
||||
ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
|
||||
ix86_fixup_binary_operands_no_copy (UMIN, V8HImode, operands);
|
||||
else
|
||||
{
|
||||
rtx xops[6];
|
||||
bool ok;
|
||||
|
||||
xops[0] = operands[0];
|
||||
xops[1] = operands[2];
|
||||
xops[2] = operands[1];
|
||||
xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
|
||||
xops[4] = operands[1];
|
||||
xops[5] = operands[2];
|
||||
ok = ix86_expand_int_vcond (xops);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "uminv4si3"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "")
|
||||
(umin:V4SI (match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")))]
|
||||
"TARGET_SSE4_1 || TARGET_XOP"
|
||||
{
|
||||
if (TARGET_SSE4_1)
|
||||
ix86_fixup_binary_operands_no_copy (UMIN, V4SImode, operands);
|
||||
else
|
||||
{
|
||||
rtx xops[6];
|
||||
|
@ -1,3 +1,15 @@
|
||||
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/42542
|
||||
* gcc.target/i386/pr42542-1.c: New.
|
||||
* gcc.target/i386/pr42542-1a.c: Likewise.
|
||||
* gcc.target/i386/pr42542-1b.c: Likewise.
|
||||
* gcc.target/i386/pr42542-2.c: Likewise.
|
||||
* gcc.target/i386/pr42542-2a.c: Likewise.
|
||||
* gcc.target/i386/pr42542-2b.c: Likewise.
|
||||
* gcc.target/i386/pr42542-3.c: Likewise.
|
||||
* gcc.target/i386/pr42542-3a.c: Likewise.
|
||||
|
||||
2009-01-04 Tobias Burnus <burnus@net-b.de>
|
||||
|
||||
PR fortran/41872
|
||||
|
77
gcc/testsuite/gcc.target/i386/pr42542-1.c
Normal file
77
gcc/testsuite/gcc.target/i386/pr42542-1.c
Normal file
@ -0,0 +1,77 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse2_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
unsigned int v1[] __attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x80000000, 1, 0xa0000000, 2,
|
||||
3, 0xd0000000, 0xf0000000, 0xe0000000
|
||||
};
|
||||
unsigned int v2[] __attribute__ ((aligned(16))) =
|
||||
{
|
||||
4, 0xb0000000, 5, 0xc0000000,
|
||||
0xd0000000, 6, 7, 8
|
||||
};
|
||||
|
||||
unsigned int max[] =
|
||||
{
|
||||
0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
|
||||
0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
|
||||
};
|
||||
|
||||
unsigned int min[] =
|
||||
{
|
||||
4, 1, 5, 2,
|
||||
3, 6, 7, 8
|
||||
};
|
||||
|
||||
unsigned int res[16] __attribute__ ((aligned(16)));
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
void
|
||||
find_max (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
|
||||
}
|
||||
|
||||
void
|
||||
find_min (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
int err = 0;
|
||||
|
||||
find_max ();
|
||||
for (i = 0; i < 8; i++)
|
||||
if (res[i] != max[i])
|
||||
err++;
|
||||
|
||||
find_min ();
|
||||
for (i = 0; i < 8; i++)
|
||||
if (res[i] != min[i])
|
||||
err++;
|
||||
|
||||
if (err)
|
||||
abort ();
|
||||
}
|
8
gcc/testsuite/gcc.target/i386/pr42542-1a.c
Normal file
8
gcc/testsuite/gcc.target/i386/pr42542-1a.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
|
||||
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#define TEST sse4_1_test
|
||||
|
||||
#include "pr42542-1.c"
|
10
gcc/testsuite/gcc.target/i386/pr42542-1b.c
Normal file
10
gcc/testsuite/gcc.target/i386/pr42542-1b.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
|
||||
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#define TEST sse4_1_test
|
||||
|
||||
#include "pr42542-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler "pmaxud" } } */
|
||||
/* { dg-final { scan-assembler "pminud" } } */
|
77
gcc/testsuite/gcc.target/i386/pr42542-2.c
Normal file
77
gcc/testsuite/gcc.target/i386/pr42542-2.c
Normal file
@ -0,0 +1,77 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse2_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
unsigned short v1[] __attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
|
||||
3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
|
||||
};
|
||||
unsigned short v2[] __attribute__ ((aligned(16))) =
|
||||
{
|
||||
4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
|
||||
0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
|
||||
};
|
||||
|
||||
unsigned short max[] =
|
||||
{
|
||||
0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
|
||||
0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
|
||||
};
|
||||
|
||||
unsigned short min[] =
|
||||
{
|
||||
4, 40, 1, 10, 5, 50, 2, 20,
|
||||
3, 30, 6, 65, 7, 75, 25, 30
|
||||
};
|
||||
|
||||
unsigned short res[16] __attribute__ ((aligned(16)));
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
void
|
||||
find_max (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
|
||||
}
|
||||
|
||||
void
|
||||
find_min (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
int err = 0;
|
||||
|
||||
find_max ();
|
||||
for (i = 0; i < 16; i++)
|
||||
if (res[i] != max[i])
|
||||
err++;
|
||||
|
||||
find_min ();
|
||||
for (i = 0; i < 16; i++)
|
||||
if (res[i] != min[i])
|
||||
err++;
|
||||
|
||||
if (err)
|
||||
abort ();
|
||||
}
|
8
gcc/testsuite/gcc.target/i386/pr42542-2a.c
Normal file
8
gcc/testsuite/gcc.target/i386/pr42542-2a.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
|
||||
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#define TEST sse4_1_test
|
||||
|
||||
#include "pr42542-2.c"
|
10
gcc/testsuite/gcc.target/i386/pr42542-2b.c
Normal file
10
gcc/testsuite/gcc.target/i386/pr42542-2b.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
|
||||
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#define TEST sse4_1_test
|
||||
|
||||
#include "pr42542-2.c"
|
||||
|
||||
/* { dg-final { scan-assembler "pmaxuw" } } */
|
||||
/* { dg-final { scan-assembler "pminuw" } } */
|
85
gcc/testsuite/gcc.target/i386/pr42542-3.c
Normal file
85
gcc/testsuite/gcc.target/i386/pr42542-3.c
Normal file
@ -0,0 +1,85 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse2_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
unsigned char v1[] __attribute__ ((aligned(16))) =
|
||||
{
|
||||
0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
|
||||
0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
|
||||
3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
|
||||
0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
|
||||
};
|
||||
unsigned char v2[] __attribute__ ((aligned(16))) =
|
||||
{
|
||||
4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
|
||||
5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
|
||||
0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
|
||||
7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
|
||||
};
|
||||
|
||||
unsigned char max[] =
|
||||
{
|
||||
0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
|
||||
0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
|
||||
0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
|
||||
0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
|
||||
};
|
||||
|
||||
unsigned char min[] =
|
||||
{
|
||||
4, 44, 40, 48, 1, 15, 10, 15,
|
||||
5, 55, 50, 51, 2, 25, 20, 35,
|
||||
3, 34, 30, 36, 6, 61, 65, 68,
|
||||
7, 76, 75, 81, 25, 34, 30, 40
|
||||
};
|
||||
|
||||
unsigned char res[32] __attribute__ ((aligned(16)));
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
void
|
||||
find_max (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
|
||||
}
|
||||
|
||||
void
|
||||
find_min (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
int err = 0;
|
||||
|
||||
find_max ();
|
||||
for (i = 0; i < 32; i++)
|
||||
if (res[i] != max[i])
|
||||
err++;
|
||||
|
||||
find_min ();
|
||||
for (i = 0; i < 32; i++)
|
||||
if (res[i] != min[i])
|
||||
err++;
|
||||
|
||||
if (err)
|
||||
abort ();
|
||||
}
|
7
gcc/testsuite/gcc.target/i386/pr42542-3a.c
Normal file
7
gcc/testsuite/gcc.target/i386/pr42542-3a.c
Normal file
@ -0,0 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
|
||||
|
||||
#include "pr42542-3.c"
|
||||
|
||||
/* { dg-final { scan-assembler "pmaxub" } } */
|
||||
/* { dg-final { scan-assembler "pminub" } } */
|
Loading…
Reference in New Issue
Block a user