sse.md (vec_interleave_high<mode>, [...]): Add AVX2 expanders for VI_256 modes.
* config/i386/sse.md (vec_interleave_high<mode>, vec_interleave_low<mode>): Add AVX2 expanders for VI_256 modes. * config/i386/i386.c (expand_vec_perm_interleave3): New function. (ix86_expand_vec_perm_builtin_1): Call it. From-SVN: r179995
This commit is contained in:
parent
a08147527b
commit
2e2accf8ae
@ -1,3 +1,11 @@
|
||||
2011-10-14 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/sse.md (vec_interleave_high<mode>,
|
||||
vec_interleave_low<mode>): Add AVX2 expanders for VI_256
|
||||
modes.
|
||||
* config/i386/i386.c (expand_vec_perm_interleave3): New function.
|
||||
(ix86_expand_vec_perm_builtin_1): Call it.
|
||||
|
||||
2011-10-14 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
Fix thinko from r179765
|
||||
|
@ -35474,6 +35474,82 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
||||
a two vector permutation using 2 intra-lane interleave insns
|
||||
and cross-lane shuffle for 32-byte vectors. */
|
||||
|
||||
static bool
|
||||
expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned i, nelt;
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
|
||||
if (d->op0 == d->op1)
|
||||
return false;
|
||||
if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
|
||||
;
|
||||
else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
|
||||
;
|
||||
else
|
||||
return false;
|
||||
|
||||
nelt = d->nelt;
|
||||
if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
|
||||
return false;
|
||||
for (i = 0; i < nelt; i += 2)
|
||||
if (d->perm[i] != d->perm[0] + i / 2
|
||||
|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
|
||||
return false;
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
switch (d->vmode)
|
||||
{
|
||||
case V32QImode:
|
||||
if (d->perm[0])
|
||||
gen = gen_vec_interleave_highv32qi;
|
||||
else
|
||||
gen = gen_vec_interleave_lowv32qi;
|
||||
break;
|
||||
case V16HImode:
|
||||
if (d->perm[0])
|
||||
gen = gen_vec_interleave_highv16hi;
|
||||
else
|
||||
gen = gen_vec_interleave_lowv16hi;
|
||||
break;
|
||||
case V8SImode:
|
||||
if (d->perm[0])
|
||||
gen = gen_vec_interleave_highv8si;
|
||||
else
|
||||
gen = gen_vec_interleave_lowv8si;
|
||||
break;
|
||||
case V4DImode:
|
||||
if (d->perm[0])
|
||||
gen = gen_vec_interleave_highv4di;
|
||||
else
|
||||
gen = gen_vec_interleave_lowv4di;
|
||||
break;
|
||||
case V8SFmode:
|
||||
if (d->perm[0])
|
||||
gen = gen_vec_interleave_highv8sf;
|
||||
else
|
||||
gen = gen_vec_interleave_lowv8sf;
|
||||
break;
|
||||
case V4DFmode:
|
||||
if (d->perm[0])
|
||||
gen = gen_vec_interleave_highv4df;
|
||||
else
|
||||
gen = gen_vec_interleave_lowv4df;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
emit_insn (gen (d->target, d->op0, d->op1));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
|
||||
permutation with two pshufb insns and an ior. We should have already
|
||||
failed all two instruction sequences. */
|
||||
@ -35972,6 +36048,9 @@ ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
|
||||
if (expand_vec_perm_pshufb2 (d))
|
||||
return true;
|
||||
|
||||
if (expand_vec_perm_interleave3 (d))
|
||||
return true;
|
||||
|
||||
/* Try sequences of four instructions. */
|
||||
|
||||
if (expand_vec_perm_vpshufb2_vpermq (d))
|
||||
|
@ -6848,6 +6848,38 @@
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vec_interleave_high<mode>"
|
||||
[(match_operand:VI_256 0 "register_operand" "=x")
|
||||
(match_operand:VI_256 1 "register_operand" "x")
|
||||
(match_operand:VI_256 2 "nonimmediate_operand" "xm")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx t1 = gen_reg_rtx (<MODE>mode);
|
||||
rtx t2 = gen_reg_rtx (<MODE>mode);
|
||||
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
|
||||
gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_low<mode>"
|
||||
[(match_operand:VI_256 0 "register_operand" "=x")
|
||||
(match_operand:VI_256 1 "register_operand" "x")
|
||||
(match_operand:VI_256 2 "nonimmediate_operand" "xm")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx t1 = gen_reg_rtx (<MODE>mode);
|
||||
rtx t2 = gen_reg_rtx (<MODE>mode);
|
||||
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
|
||||
gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Modes handled by pinsr patterns.
|
||||
(define_mode_iterator PINSR_MODE
|
||||
[(V16QI "TARGET_SSE4_1") V8HI
|
||||
|
Loading…
x
Reference in New Issue
Block a user