i386.c (avx_vperm2f128_parallel): New.

* config/i386/i386.c (avx_vperm2f128_parallel): New.
        * config/i386/i386-protos.h: Declare it.
        * config/i386/predicates.md (avx_vperm2f128_v8sf_operand,
        avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New.
        * config/i386/sse.md (avx_vperm2f128<mode>3): Change to expander.
        (*avx_vperm2f128<mode>_full): Renamed from avx_vperm2f128<mode>3.
        (*avx_vperm2f128<mode>_nozero): New.

From-SVN: r154832
This commit is contained in:
Richard Henderson 2009-11-30 09:36:07 -08:00 committed by Richard Henderson
parent 784e5ae13a
commit ca659f6ed9
5 changed files with 135 additions and 1 deletions

View File

@ -1,3 +1,13 @@
2009-11-30 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (avx_vperm2f128_parallel): New.
* config/i386/i386-protos.h: Declare it.
* config/i386/predicates.md (avx_vperm2f128_v8sf_operand,
avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New.
* config/i386/sse.md (avx_vperm2f128<mode>3): Change to expander.
(*avx_vperm2f128<mode>_full): Renamed from avx_vperm2f128<mode>3.
(*avx_vperm2f128<mode>_nozero): New.
2009-11-30 Richard Henderson <rth@redhat.com>
* config/i386/i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New.

View File

@ -48,6 +48,7 @@ extern bool x86_extended_reg_mentioned_p (rtx);
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);

View File

@ -24646,6 +24646,58 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
/* Make sure success has a non-zero value by adding one. */
return mask + 1;
}
/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
the expansion functions to turn the parallel back into a mask.
The return value is 0 for no match and the imm8+1 for a match. */
int
avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
{
unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
unsigned mask = 0;
unsigned char ipar[8];
if (XVECLEN (par, 0) != (int) nelt)
return 0;
/* Validate that all of the elements are constants, and not totally
out of range. Copy the data into an integral array to make the
subsequent checks easier. */
for (i = 0; i < nelt; ++i)
{
rtx er = XVECEXP (par, 0, i);
unsigned HOST_WIDE_INT ei;
if (!CONST_INT_P (er))
return 0;
ei = INTVAL (er);
if (ei >= 2 * nelt)
return 0;
ipar[i] = ei;
}
/* Validate that the halves of the permute are halves. */
for (i = 0; i < nelt2 - 1; ++i)
if (ipar[i] + 1 != ipar[i + 1])
return 0;
for (i = nelt2; i < nelt - 1; ++i)
if (ipar[i] + 1 != ipar[i + 1])
return 0;
/* Reconstruct the mask. */
for (i = 0; i < 2; ++i)
{
unsigned e = ipar[i * nelt2];
if (e % nelt2)
return 0;
e /= nelt2;
mask |= e << (i * 4);
}
/* Make sure success has a non-zero value by adding one. */
return mask + 1;
}
/* Store OPERAND to the memory after reload is completed. This means

View File

@ -1227,3 +1227,17 @@
(define_predicate "avx_vpermilp_v2df_operand"
(and (match_code "parallel")
(match_test "avx_vpermilp_parallel (op, V2DFmode)")))
;; Return 1 if OP is a parallel for a vperm2f128 permute.
(define_predicate "avx_vperm2f128_v8sf_operand"
(and (match_code "parallel")
(match_test "avx_vperm2f128_parallel (op, V8SFmode)")))
(define_predicate "avx_vperm2f128_v8si_operand"
(and (match_code "parallel")
(match_test "avx_vperm2f128_parallel (op, V8SImode)")))
(define_predicate "avx_vperm2f128_v4df_operand"
(and (match_code "parallel")
(match_test "avx_vperm2f128_parallel (op, V4DFmode)")))

View File

@ -11917,7 +11917,44 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_insn "avx_vperm2f128<mode>3"
(define_expand "avx_vperm2f128<mode>3"
[(set (match_operand:AVX256MODE2P 0 "register_operand" "")
(unspec:AVX256MODE2P
[(match_operand:AVX256MODE2P 1 "register_operand" "")
(match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
(match_operand:SI 3 "const_0_to_255_operand" "")]
UNSPEC_VPERMIL2F128))]
"TARGET_AVX"
{
int mask = INTVAL (operands[2]);
if ((mask & 0x88) == 0)
{
rtx perm[<ssescalarnum>], t1, t2;
int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
base = (mask & 3) * nelt2;
for (i = 0; i < nelt2; ++i)
perm[i] = GEN_INT (base + i);
base = ((mask >> 4) & 3) * nelt2;
for (i = 0; i < nelt2; ++i)
perm[i + nelt2] = GEN_INT (base + i);
t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
operands[1], operands[2]);
t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
emit_insn (t2);
DONE;
}
})
;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
;; means that in order to represent this properly in rtl we'd have to
;; nest *another* vec_concat with a zero operand and do the select from
;; a 4x wide vector. That doesn't seem very nice.
(define_insn "*avx_vperm2f128<mode>_full"
[(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
(unspec:AVX256MODE2P
[(match_operand:AVX256MODE2P 1 "register_operand" "x")
@ -11932,6 +11969,26 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_vperm2f128<mode>_nozero"
[(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
(vec_select:AVX256MODE2P
(vec_concat:<ssedoublesizemode>
(match_operand:AVX256MODE2P 1 "register_operand" "x")
(match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
(match_parallel 3 "avx_vperm2f128_<mode>_operand"
[(match_operand 4 "const_int_operand" "")])))]
"TARGET_AVX"
{
int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
operands[3] = GEN_INT (mask);
return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
[(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
(vec_concat:AVXMODEF4P