i386-protos.h (ix86_expand_sse4_unpack): New.

2007-05-26  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.

	* config/i386/i386.c (ix86_expand_sse4_unpack): New.

	* config/i386/sse.md (vec_unpacku_hi_v16qi): Call
	ix86_expand_sse4_unpack if SSE4.1 is enabled.
	(vec_unpacks_hi_v16qi): Likewise.
	(vec_unpacku_lo_v16qi): Likewise.
	(vec_unpacks_lo_v16qi): Likewise.
	(vec_unpacku_hi_v8hi): Likewise.
	(vec_unpacks_hi_v8hi): Likewise.
	(vec_unpacku_lo_v8hi): Likewise.
	(vec_unpacks_lo_v8hi): Likewise.
	(vec_unpacku_hi_v4si): Likewise.
	(vec_unpacks_hi_v4si): Likewise.
	(vec_unpacku_lo_v4si): Likewise.
	(vec_unpacks_lo_v4si): Likewise.

From-SVN: r125093
This commit is contained in:
H.J. Lu 2007-05-26 14:34:21 +00:00 committed by H.J. Lu
parent a0cb58b2cd
commit e5ac0b9bce
4 changed files with 118 additions and 12 deletions

View File

@ -1,3 +1,23 @@
2007-05-26 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.
* config/i386/i386.c (ix86_expand_sse4_unpack): New.
* config/i386/sse.md (vec_unpacku_hi_v16qi): Call
ix86_expand_sse4_unpack if SSE4.1 is enabled.
(vec_unpacks_hi_v16qi): Likewise.
(vec_unpacku_lo_v16qi): Likewise.
(vec_unpacks_lo_v16qi): Likewise.
(vec_unpacku_hi_v8hi): Likewise.
(vec_unpacks_hi_v8hi): Likewise.
(vec_unpacku_lo_v8hi): Likewise.
(vec_unpacks_lo_v8hi): Likewise.
(vec_unpacku_hi_v4si): Likewise.
(vec_unpacks_hi_v4si): Likewise.
(vec_unpacku_lo_v4si): Likewise.
(vec_unpacks_lo_v4si): Likewise.
2007-05-26 Kazu Hirata <kazu@codesourcery.com>
* c-typeck.c, config/arm/arm.c, config/darwin.c,

View File

@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);

View File

@ -12843,6 +12843,55 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
emit_insn (unpack (dest, operands[1], se));
}
/* This function performs the same task as ix86_expand_sse_unpack,
but with SSE4.1 instructions. */
void
ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
{
enum machine_mode imode = GET_MODE (operands[1]);
rtx (*unpack)(rtx, rtx);
rtx src, dest;
switch (imode)
{
case V16QImode:
if (unsigned_p)
unpack = gen_sse4_1_zero_extendv8qiv8hi2;
else
unpack = gen_sse4_1_extendv8qiv8hi2;
break;
case V8HImode:
if (unsigned_p)
unpack = gen_sse4_1_zero_extendv4hiv4si2;
else
unpack = gen_sse4_1_extendv4hiv4si2;
break;
case V4SImode:
if (unsigned_p)
unpack = gen_sse4_1_zero_extendv2siv2di2;
else
unpack = gen_sse4_1_extendv2siv2di2;
break;
default:
gcc_unreachable ();
}
dest = operands[0];
if (high_p)
{
/* Shift higher 8 bytes to lower 8 bytes. */
src = gen_reg_rtx (imode);
emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
gen_lowpart (TImode, operands[1]),
GEN_INT (64)));
}
else
src = operands[1];
emit_insn (unpack (dest, src));
}
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */

View File

@ -4722,7 +4722,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, true, true);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, true, true);
else
ix86_expand_sse_unpack (operands, true, true);
DONE;
})
@ -4731,7 +4734,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, false, true);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, false, true);
else
ix86_expand_sse_unpack (operands, false, true);
DONE;
})
@ -4740,7 +4746,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, true, false);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, true, false);
else
ix86_expand_sse_unpack (operands, true, false);
DONE;
})
@ -4749,7 +4758,10 @@
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, false, false);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, false, false);
else
ix86_expand_sse_unpack (operands, false, false);
DONE;
})
@ -4758,7 +4770,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, true, true);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, true, true);
else
ix86_expand_sse_unpack (operands, true, true);
DONE;
})
@ -4767,7 +4782,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, false, true);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, false, true);
else
ix86_expand_sse_unpack (operands, false, true);
DONE;
})
@ -4776,7 +4794,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, true, false);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, true, false);
else
ix86_expand_sse_unpack (operands, true, false);
DONE;
})
@ -4785,7 +4806,10 @@
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, false, false);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, false, false);
else
ix86_expand_sse_unpack (operands, false, false);
DONE;
})
@ -4794,7 +4818,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, true, true);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, true, true);
else
ix86_expand_sse_unpack (operands, true, true);
DONE;
})
@ -4803,7 +4830,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, false, true);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, false, true);
else
ix86_expand_sse_unpack (operands, false, true);
DONE;
})
@ -4812,7 +4842,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, true, false);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, true, false);
else
ix86_expand_sse_unpack (operands, true, false);
DONE;
})
@ -4821,7 +4854,10 @@
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
ix86_expand_sse_unpack (operands, false, false);
if (TARGET_SSE4_1)
ix86_expand_sse4_unpack (operands, false, false);
else
ix86_expand_sse_unpack (operands, false, false);
DONE;
})