i386.c (ix86_expand_vector_init_general): Optimize V8HImode for SSE2 and V16QImode for SSE4.1.

gcc/

2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (ix86_expand_vector_init_general): Optimize
	V8HImode for SSE2 and V16QImode for SSE4.1.

gcc/testsuite/

2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>

	* gcc.target/i386/m128-check.h: New.
	* gcc.target/i386/set-v16qi-1.h: Likewise.
	* gcc.target/i386/set-v16qi-2.h: Likewise.
	* gcc.target/i386/set-v8hi-1.h: Likewise.
	* gcc.target/i386/set-v8hi-2.h: Likewise.
	* gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
	* gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
	* gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
	* gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
	* gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
	* gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.

	* gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
	include <stdio.h>.
	* gcc.target/i386/sse4_1-check.h: Likewise.

From-SVN: r135409
This commit is contained in:
H.J. Lu 2008-05-16 06:19:39 +00:00 committed by H.J. Lu
parent d93712d9ff
commit d0208f9b64
16 changed files with 373 additions and 3 deletions

View File

@ -1,3 +1,8 @@
2008-05-15 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_expand_vector_init_general): Optimize
V8HImode for SSE2 and V16QImode for SSE4.1.
2008-05-15 Kenneth Zadeck <zadeck@naturalbridge.com>
* cgraph.h (compute_inline_parameters): Made public.

View File

@ -23892,7 +23892,142 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
break;
case V8HImode:
if (TARGET_SSE2)
{
rtx ops[4];
unsigned int i, j;
for (i = 0; i < ARRAY_SIZE (ops); i++)
{
/* Extend the odd elment from HImode to SImode using
a paradoxical SUBREG. */
op0 = gen_reg_rtx (SImode);
emit_move_insn (op0, gen_lowpart (SImode,
XVECEXP (vals, 0,
i + i)));
/* Insert the SImode value as low element of V4SImode
vector. */
op1 = gen_reg_rtx (V4SImode);
op0 = gen_rtx_VEC_MERGE (V4SImode,
gen_rtx_VEC_DUPLICATE (V4SImode,
op0),
CONST0_RTX (V4SImode),
const1_rtx);
emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
/* Cast the V4SImode vector back to a V8HImode vector. */
op0 = gen_reg_rtx (mode);
emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even HI elements into the second positon. */
emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0,
i + i + 1),
const1_rtx));
/* Cast V8HImode vector to V4SImode vector. */
ops[i] = gen_reg_rtx (V4SImode);
emit_move_insn (ops[i], gen_lowpart (V4SImode, op0));
}
/* Interleave low V4SIs. */
for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
{
op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
ops[i + 1]));
/* Cast V4SImode vectors to V2DImode vectors. */
op1 = gen_reg_rtx (V2DImode);
emit_move_insn (op1, gen_lowpart (V2DImode, op0));
ops[j] = op1;
}
/* Interleave low V2DIs. */
op0 = gen_reg_rtx (V2DImode);
emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
/* Cast the V2DImode vector back to a V8HImode vector. */
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_lowpart (mode, op0)));
return;
}
case V16QImode:
if (TARGET_SSE4_1)
{
rtx ops[8];
unsigned int i, j;
for (i = 0; i < ARRAY_SIZE (ops); i++)
{
/* Extend the odd elment from QImode to SImode using
a paradoxical SUBREG. */
op0 = gen_reg_rtx (SImode);
emit_move_insn (op0, gen_lowpart (SImode,
XVECEXP (vals, 0,
i + i)));
/* Insert the SImode value as low element of V4SImode
vector. */
op1 = gen_reg_rtx (V4SImode);
op0 = gen_rtx_VEC_MERGE (V4SImode,
gen_rtx_VEC_DUPLICATE (V4SImode,
op0),
CONST0_RTX (V4SImode),
const1_rtx);
emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
/* Cast the V4SImode vector back to a V16QImode vector. */
op0 = gen_reg_rtx (mode);
emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even QI elements into the second positon. */
emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0,
i + i + 1),
const1_rtx));
/* Cast V16QImode vector to V8HImode vector. */
ops[i] = gen_reg_rtx (V8HImode);
emit_move_insn (ops[i], gen_lowpart (V8HImode, op0));
}
/* Interleave low V8HIs. */
for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
{
op0 = gen_reg_rtx (V8HImode);
emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i],
ops[i + 1]));
/* Cast V8HImode vector to V4SImode vector. */
op1 = gen_reg_rtx (V4SImode);
emit_move_insn (op1, gen_lowpart (V4SImode, op0));
ops[j] = op1;
}
/* Interleave low V4SIs. */
for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++)
{
op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
ops[i + 1]));
/* Cast V4SImode vectors to V2DImode vectors. */
op1 = gen_reg_rtx (V2DImode);
emit_move_insn (op1, gen_lowpart (V2DImode, op0));
ops[j] = op1;
}
/* Interleave low V2DIs. */
op0 = gen_reg_rtx (V2DImode);
emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
/* Cast the V2DImode vector back to a V8HImode vector. */
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_lowpart (mode, op0)));
return;
}
case V4HImode:
case V8QImode:
break;

View File

@ -1,3 +1,21 @@
2008-05-15 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/m128-check.h: New.
* gcc.target/i386/set-v16qi-1.h: Likewise.
* gcc.target/i386/set-v16qi-2.h: Likewise.
* gcc.target/i386/set-v8hi-1.h: Likewise.
* gcc.target/i386/set-v8hi-2.h: Likewise.
* gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
* gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
* gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
* gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
* gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
* gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.
* gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
include <stdio.h>.
* gcc.target/i386/sse4_1-check.h: Likewise.
2008-05-15 Adam Nemet <anemet@caviumnetworks.com>
PR middle-end/36194

View File

@ -0,0 +1,69 @@
#include <stdio.h>
#include <emmintrin.h>
typedef union
{
__m128i x;
char a[16];
} union128i_b;
typedef union
{
__m128i x;
short a[8];
} union128i_w;
typedef union
{
__m128i x;
int a[4];
} union128i_d;
typedef union
{
__m128i x;
long long a[2];
} union128i_q;
typedef union
{
__m128 x;
float a[4];
} union128;
typedef union
{
__m128d x;
double a[2];
} union128d;
#ifdef DEBUG
#define PRINTF printf
#else
#define PRINTF(...)
#endif
#define CHECK_EXP(UINON_TYPE, VALUE_TYPE, FMT) \
static int \
__attribute__((noinline, unused)) \
check_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v) \
{ \
int i; \
int err = 0; \
\
for (i = 0; i < sizeof (u.a) / sizeof (u.a[0]); i++) \
if (u.a[i] != v[i]) \
{ \
err++; \
PRINTF ("%i: " FMT " != " FMT "\n", \
i, v[i], u.a[i]); \
} \
return err; \
}
CHECK_EXP (union128i_b, char, "%d")
CHECK_EXP (union128i_w, short, "%d")
CHECK_EXP (union128i_d, int, "0x%x")
CHECK_EXP (union128i_q, long long, "0x%llx")
CHECK_EXP (union128, float, "%f")
CHECK_EXP (union128d, double, "%f")

View File

@ -0,0 +1,30 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#include CHECK_H
static __m128i
__attribute__((noinline))
foo (char *v)
{
return _mm_set_epi8 (v[15], v[14], v[13], v[12],
v[11], v[10], v[9], v[8],
v[7], v[6], v[5], v[4],
v[3], v[2], v[1], v[0]);
}
static void
TEST (void)
{
char v[16] =
{
-3, 60, 48, 104, -90, 37, -48, 78,
4, 33, 81, 4, -89, 17, 8, 68
};
union128i_b u;
u.x = foo (v);
if (check_union128i_b (u, v))
abort ();
}

View File

@ -0,0 +1,30 @@
#include CHECK_H
static __m128i
__attribute__((noinline))
foo (char x1, char x2, char x3, char x4,
char x5, char x6, char x7, char x8,
char x9, char x10, char x11, char x12,
char x13, char x14, char x15, char x16)
{
return _mm_set_epi8 (x1, x2, x3, x4, x5, x6, x7, x8,
x9, x10, x11, x12, x13, x14, x15, x16);
}
static void
TEST (void)
{
char v[16] =
{
-3, 60, 48, 104, -90, 37, -48, 78,
4, 33, 81, 4, -89, 17, 8, 68
};
union128i_b u;
u.x = foo (v[15], v[14], v[13], v[12],
v[11], v[10], v[9], v[8],
v[7], v[6], v[5], v[4],
v[3], v[2], v[1], v[0]);
if (check_union128i_b (u, v))
abort ();
}

View File

@ -0,0 +1,19 @@
#include CHECK_H
static __m128i
__attribute__((noinline))
foo (short *v)
{
return _mm_set_epi16 (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
}
static void
TEST (void)
{
short v[8] = { -3, 6000, 48, 104, -90, 34567, -1248, 34678 };
union128i_w u;
u.x = foo (v);
if (check_union128i_w (u, v))
abort ();
}

View File

@ -0,0 +1,21 @@
#include CHECK_H
__m128i
__attribute__((noinline))
foo (short x1, short x2, short x3, short x4,
short x5, short x6, short x7, short x8)
{
return _mm_set_epi16 (x1, x2, x3, x4, x5, x6, x7, x8);
}
static void
TEST (void)
{
short v[8] = { -3, 2, 1, 9, 23, -173, -13, 69 };
union128i_w u;
u.x = foo (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
if (check_union128i_w (u, v))
abort ();
}

View File

@ -1,7 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
#include "cpuid.h"
#include "m128-check.h"
static void sse2_test (void);

View File

@ -0,0 +1,7 @@
/* { dg-do run } */
/* { dg-options "-O2 -msse2" } */
#define CHECK_H "sse2-check.h"
#define TEST sse2_test
#include "set-v16qi-1.h"

View File

@ -0,0 +1,7 @@
/* { dg-do run } */
/* { dg-options "-O2 -msse2" } */
#define CHECK_H "sse2-check.h"
#define TEST sse2_test
#include "set-v16qi-2.h"

View File

@ -0,0 +1,7 @@
/* { dg-do run } */
/* { dg-options "-O2 -msse2" } */
#define CHECK_H "sse2-check.h"
#define TEST sse2_test
#include "set-v8hi-1.h"

View File

@ -0,0 +1,7 @@
/* { dg-do run } */
/* { dg-options "-O2 -msse2" } */
#define CHECK_H "sse2-check.h"
#define TEST sse2_test
#include "set-v8hi-2.h"

View File

@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "cpuid.h"
#include "m128-check.h"
static void sse4_1_test (void);

View File

@ -0,0 +1,8 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "set-v16qi-1.h"

View File

@ -0,0 +1,8 @@
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#define CHECK_H "sse4_1-check.h"
#define TEST sse4_1_test
#include "set-v16qi-2.h"