i386.c (avx_vperm2f128_parallel): New.
* config/i386/i386.c (avx_vperm2f128_parallel): New. * config/i386/i386-protos.h: Declare it. * config/i386/predicates.md (avx_vperm2f128_v8sf_operand, avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New. * config/i386/sse.md (avx_vperm2f128<mode>3): Change to expander. (*avx_vperm2f128<mode>_full): Renamed from avx_vperm2f128<mode>3. (*avx_vperm2f128<mode>_nozero): New. From-SVN: r154832
This commit is contained in:
parent
784e5ae13a
commit
ca659f6ed9
@ -1,3 +1,13 @@
|
||||
2009-11-30 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (avx_vperm2f128_parallel): New.
|
||||
* config/i386/i386-protos.h: Declare it.
|
||||
* config/i386/predicates.md (avx_vperm2f128_v8sf_operand,
|
||||
avx_vperm2f128_v8si_operand, avx_vperm2f128_v4df_operand): New.
|
||||
* config/i386/sse.md (avx_vperm2f128<mode>3): Change to expander.
|
||||
(*avx_vperm2f128<mode>_full): Renamed from avx_vperm2f128<mode>3.
|
||||
(*avx_vperm2f128<mode>_nozero): New.
|
||||
|
||||
2009-11-30 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386-builtin-types.def (V4DF_FTYPE_V4DF_V4DF_V4DI): New.
|
||||
|
@ -48,6 +48,7 @@ extern bool x86_extended_reg_mentioned_p (rtx);
|
||||
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
|
||||
|
||||
extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
|
||||
extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
|
||||
|
||||
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
|
@ -24646,6 +24646,58 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
|
||||
/* Make sure success has a non-zero value by adding one. */
|
||||
return mask + 1;
|
||||
}
|
||||
|
||||
/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
|
||||
the expansion functions to turn the parallel back into a mask.
|
||||
The return value is 0 for no match and the imm8+1 for a match. */
|
||||
|
||||
int
|
||||
avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
|
||||
{
|
||||
unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
|
||||
unsigned mask = 0;
|
||||
unsigned char ipar[8];
|
||||
|
||||
if (XVECLEN (par, 0) != (int) nelt)
|
||||
return 0;
|
||||
|
||||
/* Validate that all of the elements are constants, and not totally
|
||||
out of range. Copy the data into an integral array to make the
|
||||
subsequent checks easier. */
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
rtx er = XVECEXP (par, 0, i);
|
||||
unsigned HOST_WIDE_INT ei;
|
||||
|
||||
if (!CONST_INT_P (er))
|
||||
return 0;
|
||||
ei = INTVAL (er);
|
||||
if (ei >= 2 * nelt)
|
||||
return 0;
|
||||
ipar[i] = ei;
|
||||
}
|
||||
|
||||
/* Validate that the halves of the permute are halves. */
|
||||
for (i = 0; i < nelt2 - 1; ++i)
|
||||
if (ipar[i] + 1 != ipar[i + 1])
|
||||
return 0;
|
||||
for (i = nelt2; i < nelt - 1; ++i)
|
||||
if (ipar[i] + 1 != ipar[i + 1])
|
||||
return 0;
|
||||
|
||||
/* Reconstruct the mask. */
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
unsigned e = ipar[i * nelt2];
|
||||
if (e % nelt2)
|
||||
return 0;
|
||||
e /= nelt2;
|
||||
mask |= e << (i * 4);
|
||||
}
|
||||
|
||||
/* Make sure success has a non-zero value by adding one. */
|
||||
return mask + 1;
|
||||
}
|
||||
|
||||
|
||||
/* Store OPERAND to the memory after reload is completed. This means
|
||||
|
@ -1227,3 +1227,17 @@
|
||||
(define_predicate "avx_vpermilp_v2df_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vpermilp_parallel (op, V2DFmode)")))
|
||||
|
||||
;; Return 1 if OP is a parallel for a vperm2f128 permute.
|
||||
|
||||
(define_predicate "avx_vperm2f128_v8sf_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vperm2f128_parallel (op, V8SFmode)")))
|
||||
|
||||
(define_predicate "avx_vperm2f128_v8si_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vperm2f128_parallel (op, V8SImode)")))
|
||||
|
||||
(define_predicate "avx_vperm2f128_v4df_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vperm2f128_parallel (op, V4DFmode)")))
|
||||
|
@ -11917,7 +11917,44 @@
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx_vperm2f128<mode>3"
|
||||
(define_expand "avx_vperm2f128<mode>3"
|
||||
[(set (match_operand:AVX256MODE2P 0 "register_operand" "")
|
||||
(unspec:AVX256MODE2P
|
||||
[(match_operand:AVX256MODE2P 1 "register_operand" "")
|
||||
(match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
|
||||
(match_operand:SI 3 "const_0_to_255_operand" "")]
|
||||
UNSPEC_VPERMIL2F128))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
int mask = INTVAL (operands[2]);
|
||||
if ((mask & 0x88) == 0)
|
||||
{
|
||||
rtx perm[<ssescalarnum>], t1, t2;
|
||||
int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
|
||||
|
||||
base = (mask & 3) * nelt2;
|
||||
for (i = 0; i < nelt2; ++i)
|
||||
perm[i] = GEN_INT (base + i);
|
||||
|
||||
base = ((mask >> 4) & 3) * nelt2;
|
||||
for (i = 0; i < nelt2; ++i)
|
||||
perm[i + nelt2] = GEN_INT (base + i);
|
||||
|
||||
t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
|
||||
operands[1], operands[2]);
|
||||
t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
|
||||
t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
|
||||
t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
|
||||
emit_insn (t2);
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
|
||||
;; means that in order to represent this properly in rtl we'd have to
|
||||
;; nest *another* vec_concat with a zero operand and do the select from
|
||||
;; a 4x wide vector. That doesn't seem very nice.
|
||||
(define_insn "*avx_vperm2f128<mode>_full"
|
||||
[(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
|
||||
(unspec:AVX256MODE2P
|
||||
[(match_operand:AVX256MODE2P 1 "register_operand" "x")
|
||||
@ -11932,6 +11969,26 @@
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
|
||||
(define_insn "*avx_vperm2f128<mode>_nozero"
|
||||
[(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
|
||||
(vec_select:AVX256MODE2P
|
||||
(vec_concat:<ssedoublesizemode>
|
||||
(match_operand:AVX256MODE2P 1 "register_operand" "x")
|
||||
(match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
|
||||
(match_parallel 3 "avx_vperm2f128_<mode>_operand"
|
||||
[(match_operand 4 "const_int_operand" "")])))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
|
||||
operands[3] = GEN_INT (mask);
|
||||
return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V8SF")])
|
||||
|
||||
(define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
|
||||
[(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
|
||||
(vec_concat:AVXMODEF4P
|
||||
|
Loading…
Reference in New Issue
Block a user