i386.c (ix86_expand_vector_init_general): Optimize V8HImode for SSE2 and V16QImode for SSE4.1.
gcc/ 2008-05-15 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.c (ix86_expand_vector_init_general): Optimize V8HImode for SSE2 and V16QImode for SSE4.1. gcc/testsuite/ 2008-05-15 H.J. Lu <hongjiu.lu@intel.com> * gcc.target/i386/m128-check.h: New. * gcc.target/i386/set-v16qi-1.h: Likewise. * gcc.target/i386/set-v16qi-2.h: Likewise. * gcc.target/i386/set-v8hi-1.h: Likewise. * gcc.target/i386/set-v8hi-2.h: Likewise. * gcc.target/i386/sse2-set-v16qi-1.c: Likewise. * gcc.target/i386/sse2-set-v16qi-2.c: Likewise. * gcc.target/i386/sse2-set-v8hi-1.c: Likewise. * gcc.target/i386/sse2-set-v8hi-2.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise. * gcc.target/i386/sse2-check.h: Include m128-check.h. Don't include <stdio.h>. * gcc.target/i386/sse4_1-check.h: Likewise. From-SVN: r135409
This commit is contained in:
parent
d93712d9ff
commit
d0208f9b64
@ -1,3 +1,8 @@
|
||||
2008-05-15 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_vector_init_general): Optimize
|
||||
V8HImode for SSE2 and V16QImode for SSE4.1.
|
||||
|
||||
2008-05-15 Kenneth Zadeck <zadeck@naturalbridge.com>
|
||||
|
||||
* cgraph.h (compute_inline_parameters): Made public.
|
||||
|
@ -23892,7 +23892,142 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
|
||||
break;
|
||||
|
||||
case V8HImode:
|
||||
if (TARGET_SSE2)
|
||||
{
|
||||
rtx ops[4];
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE (ops); i++)
|
||||
{
|
||||
/* Extend the odd elment from HImode to SImode using
|
||||
a paradoxical SUBREG. */
|
||||
op0 = gen_reg_rtx (SImode);
|
||||
emit_move_insn (op0, gen_lowpart (SImode,
|
||||
XVECEXP (vals, 0,
|
||||
i + i)));
|
||||
|
||||
/* Insert the SImode value as low element of V4SImode
|
||||
vector. */
|
||||
op1 = gen_reg_rtx (V4SImode);
|
||||
op0 = gen_rtx_VEC_MERGE (V4SImode,
|
||||
gen_rtx_VEC_DUPLICATE (V4SImode,
|
||||
op0),
|
||||
CONST0_RTX (V4SImode),
|
||||
const1_rtx);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
|
||||
|
||||
/* Cast the V4SImode vector back to a V8HImode vector. */
|
||||
op0 = gen_reg_rtx (mode);
|
||||
emit_move_insn (op0, gen_lowpart (mode, op1));
|
||||
|
||||
/* Load even HI elements into the second positon. */
|
||||
emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0,
|
||||
i + i + 1),
|
||||
const1_rtx));
|
||||
|
||||
/* Cast V8HImode vector to V4SImode vector. */
|
||||
ops[i] = gen_reg_rtx (V4SImode);
|
||||
emit_move_insn (ops[i], gen_lowpart (V4SImode, op0));
|
||||
}
|
||||
|
||||
/* Interleave low V4SIs. */
|
||||
for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
|
||||
{
|
||||
op0 = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
|
||||
ops[i + 1]));
|
||||
|
||||
/* Cast V4SImode vectors to V2DImode vectors. */
|
||||
op1 = gen_reg_rtx (V2DImode);
|
||||
emit_move_insn (op1, gen_lowpart (V2DImode, op0));
|
||||
ops[j] = op1;
|
||||
}
|
||||
|
||||
/* Interleave low V2DIs. */
|
||||
op0 = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
|
||||
|
||||
/* Cast the V2DImode vector back to a V8HImode vector. */
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target,
|
||||
gen_lowpart (mode, op0)));
|
||||
return;
|
||||
}
|
||||
|
||||
case V16QImode:
|
||||
if (TARGET_SSE4_1)
|
||||
{
|
||||
rtx ops[8];
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE (ops); i++)
|
||||
{
|
||||
/* Extend the odd elment from QImode to SImode using
|
||||
a paradoxical SUBREG. */
|
||||
op0 = gen_reg_rtx (SImode);
|
||||
emit_move_insn (op0, gen_lowpart (SImode,
|
||||
XVECEXP (vals, 0,
|
||||
i + i)));
|
||||
|
||||
/* Insert the SImode value as low element of V4SImode
|
||||
vector. */
|
||||
op1 = gen_reg_rtx (V4SImode);
|
||||
op0 = gen_rtx_VEC_MERGE (V4SImode,
|
||||
gen_rtx_VEC_DUPLICATE (V4SImode,
|
||||
op0),
|
||||
CONST0_RTX (V4SImode),
|
||||
const1_rtx);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
|
||||
|
||||
/* Cast the V4SImode vector back to a V16QImode vector. */
|
||||
op0 = gen_reg_rtx (mode);
|
||||
emit_move_insn (op0, gen_lowpart (mode, op1));
|
||||
|
||||
/* Load even QI elements into the second positon. */
|
||||
emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0,
|
||||
i + i + 1),
|
||||
const1_rtx));
|
||||
|
||||
/* Cast V16QImode vector to V8HImode vector. */
|
||||
ops[i] = gen_reg_rtx (V8HImode);
|
||||
emit_move_insn (ops[i], gen_lowpart (V8HImode, op0));
|
||||
}
|
||||
|
||||
/* Interleave low V8HIs. */
|
||||
for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
|
||||
{
|
||||
op0 = gen_reg_rtx (V8HImode);
|
||||
emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i],
|
||||
ops[i + 1]));
|
||||
|
||||
/* Cast V8HImode vector to V4SImode vector. */
|
||||
op1 = gen_reg_rtx (V4SImode);
|
||||
emit_move_insn (op1, gen_lowpart (V4SImode, op0));
|
||||
ops[j] = op1;
|
||||
}
|
||||
|
||||
/* Interleave low V4SIs. */
|
||||
for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++)
|
||||
{
|
||||
op0 = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
|
||||
ops[i + 1]));
|
||||
|
||||
/* Cast V4SImode vectors to V2DImode vectors. */
|
||||
op1 = gen_reg_rtx (V2DImode);
|
||||
emit_move_insn (op1, gen_lowpart (V2DImode, op0));
|
||||
ops[j] = op1;
|
||||
}
|
||||
|
||||
/* Interleave low V2DIs. */
|
||||
op0 = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
|
||||
|
||||
/* Cast the V2DImode vector back to a V8HImode vector. */
|
||||
emit_insn (gen_rtx_SET (VOIDmode, target,
|
||||
gen_lowpart (mode, op0)));
|
||||
return;
|
||||
}
|
||||
|
||||
case V4HImode:
|
||||
case V8QImode:
|
||||
break;
|
||||
|
@ -1,3 +1,21 @@
|
||||
2008-05-15 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* gcc.target/i386/m128-check.h: New.
|
||||
* gcc.target/i386/set-v16qi-1.h: Likewise.
|
||||
* gcc.target/i386/set-v16qi-2.h: Likewise.
|
||||
* gcc.target/i386/set-v8hi-1.h: Likewise.
|
||||
* gcc.target/i386/set-v8hi-2.h: Likewise.
|
||||
* gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
|
||||
* gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
|
||||
* gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
|
||||
* gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
|
||||
* gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.
|
||||
|
||||
* gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
|
||||
include <stdio.h>.
|
||||
* gcc.target/i386/sse4_1-check.h: Likewise.
|
||||
|
||||
2008-05-15 Adam Nemet <anemet@caviumnetworks.com>
|
||||
|
||||
PR middle-end/36194
|
||||
|
69
gcc/testsuite/gcc.target/i386/m128-check.h
Normal file
69
gcc/testsuite/gcc.target/i386/m128-check.h
Normal file
@ -0,0 +1,69 @@
|
||||
#include <stdio.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
typedef union
|
||||
{
|
||||
__m128i x;
|
||||
char a[16];
|
||||
} union128i_b;
|
||||
|
||||
typedef union
|
||||
{
|
||||
__m128i x;
|
||||
short a[8];
|
||||
} union128i_w;
|
||||
|
||||
typedef union
|
||||
{
|
||||
__m128i x;
|
||||
int a[4];
|
||||
} union128i_d;
|
||||
|
||||
typedef union
|
||||
{
|
||||
__m128i x;
|
||||
long long a[2];
|
||||
} union128i_q;
|
||||
|
||||
typedef union
|
||||
{
|
||||
__m128 x;
|
||||
float a[4];
|
||||
} union128;
|
||||
|
||||
typedef union
|
||||
{
|
||||
__m128d x;
|
||||
double a[2];
|
||||
} union128d;
|
||||
|
||||
#ifdef DEBUG
|
||||
#define PRINTF printf
|
||||
#else
|
||||
#define PRINTF(...)
|
||||
#endif
|
||||
|
||||
#define CHECK_EXP(UINON_TYPE, VALUE_TYPE, FMT) \
|
||||
static int \
|
||||
__attribute__((noinline, unused)) \
|
||||
check_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v) \
|
||||
{ \
|
||||
int i; \
|
||||
int err = 0; \
|
||||
\
|
||||
for (i = 0; i < sizeof (u.a) / sizeof (u.a[0]); i++) \
|
||||
if (u.a[i] != v[i]) \
|
||||
{ \
|
||||
err++; \
|
||||
PRINTF ("%i: " FMT " != " FMT "\n", \
|
||||
i, v[i], u.a[i]); \
|
||||
} \
|
||||
return err; \
|
||||
}
|
||||
|
||||
CHECK_EXP (union128i_b, char, "%d")
|
||||
CHECK_EXP (union128i_w, short, "%d")
|
||||
CHECK_EXP (union128i_d, int, "0x%x")
|
||||
CHECK_EXP (union128i_q, long long, "0x%llx")
|
||||
CHECK_EXP (union128, float, "%f")
|
||||
CHECK_EXP (union128d, double, "%f")
|
30
gcc/testsuite/gcc.target/i386/set-v16qi-1.h
Normal file
30
gcc/testsuite/gcc.target/i386/set-v16qi-1.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
static __m128i
|
||||
__attribute__((noinline))
|
||||
foo (char *v)
|
||||
{
|
||||
return _mm_set_epi8 (v[15], v[14], v[13], v[12],
|
||||
v[11], v[10], v[9], v[8],
|
||||
v[7], v[6], v[5], v[4],
|
||||
v[3], v[2], v[1], v[0]);
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
char v[16] =
|
||||
{
|
||||
-3, 60, 48, 104, -90, 37, -48, 78,
|
||||
4, 33, 81, 4, -89, 17, 8, 68
|
||||
};
|
||||
union128i_b u;
|
||||
|
||||
u.x = foo (v);
|
||||
if (check_union128i_b (u, v))
|
||||
abort ();
|
||||
}
|
30
gcc/testsuite/gcc.target/i386/set-v16qi-2.h
Normal file
30
gcc/testsuite/gcc.target/i386/set-v16qi-2.h
Normal file
@ -0,0 +1,30 @@
|
||||
#include CHECK_H
|
||||
|
||||
static __m128i
|
||||
__attribute__((noinline))
|
||||
foo (char x1, char x2, char x3, char x4,
|
||||
char x5, char x6, char x7, char x8,
|
||||
char x9, char x10, char x11, char x12,
|
||||
char x13, char x14, char x15, char x16)
|
||||
{
|
||||
return _mm_set_epi8 (x1, x2, x3, x4, x5, x6, x7, x8,
|
||||
x9, x10, x11, x12, x13, x14, x15, x16);
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
char v[16] =
|
||||
{
|
||||
-3, 60, 48, 104, -90, 37, -48, 78,
|
||||
4, 33, 81, 4, -89, 17, 8, 68
|
||||
};
|
||||
union128i_b u;
|
||||
|
||||
u.x = foo (v[15], v[14], v[13], v[12],
|
||||
v[11], v[10], v[9], v[8],
|
||||
v[7], v[6], v[5], v[4],
|
||||
v[3], v[2], v[1], v[0]);
|
||||
if (check_union128i_b (u, v))
|
||||
abort ();
|
||||
}
|
19
gcc/testsuite/gcc.target/i386/set-v8hi-1.h
Normal file
19
gcc/testsuite/gcc.target/i386/set-v8hi-1.h
Normal file
@ -0,0 +1,19 @@
|
||||
#include CHECK_H
|
||||
|
||||
static __m128i
|
||||
__attribute__((noinline))
|
||||
foo (short *v)
|
||||
{
|
||||
return _mm_set_epi16 (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
short v[8] = { -3, 6000, 48, 104, -90, 34567, -1248, 34678 };
|
||||
union128i_w u;
|
||||
|
||||
u.x = foo (v);
|
||||
if (check_union128i_w (u, v))
|
||||
abort ();
|
||||
}
|
21
gcc/testsuite/gcc.target/i386/set-v8hi-2.h
Normal file
21
gcc/testsuite/gcc.target/i386/set-v8hi-2.h
Normal file
@ -0,0 +1,21 @@
|
||||
#include CHECK_H
|
||||
|
||||
__m128i
|
||||
__attribute__((noinline))
|
||||
foo (short x1, short x2, short x3, short x4,
|
||||
short x5, short x6, short x7, short x8)
|
||||
{
|
||||
return _mm_set_epi16 (x1, x2, x3, x4, x5, x6, x7, x8);
|
||||
}
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
short v[8] = { -3, 2, 1, 9, 23, -173, -13, 69 };
|
||||
union128i_w u;
|
||||
|
||||
u.x = foo (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
|
||||
|
||||
if (check_union128i_w (u, v))
|
||||
abort ();
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cpuid.h"
|
||||
#include "m128-check.h"
|
||||
|
||||
static void sse2_test (void);
|
||||
|
||||
|
7
gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c
Normal file
7
gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c
Normal file
@ -0,0 +1,7 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#define TEST sse2_test
|
||||
|
||||
#include "set-v16qi-1.h"
|
7
gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c
Normal file
7
gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c
Normal file
@ -0,0 +1,7 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#define TEST sse2_test
|
||||
|
||||
#include "set-v16qi-2.h"
|
7
gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c
Normal file
7
gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c
Normal file
@ -0,0 +1,7 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#define TEST sse2_test
|
||||
|
||||
#include "set-v8hi-1.h"
|
7
gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c
Normal file
7
gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c
Normal file
@ -0,0 +1,7 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#define CHECK_H "sse2-check.h"
|
||||
#define TEST sse2_test
|
||||
|
||||
#include "set-v8hi-2.h"
|
@ -1,7 +1,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cpuid.h"
|
||||
#include "m128-check.h"
|
||||
|
||||
static void sse4_1_test (void);
|
||||
|
||||
|
8
gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c
Normal file
8
gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#define TEST sse4_1_test
|
||||
|
||||
#include "set-v16qi-1.h"
|
8
gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c
Normal file
8
gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c
Normal file
@ -0,0 +1,8 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#define TEST sse4_1_test
|
||||
|
||||
#include "set-v16qi-2.h"
|
Loading…
Reference in New Issue
Block a user