i386.c (expand_vec_perm_palignr): Handle 256-bit vectors for TARGET_AVX2.
* config/i386/i386.c (expand_vec_perm_palignr): Handle 256-bit vectors for TARGET_AVX2. From-SVN: r215778
This commit is contained in:
parent
0aa789a731
commit
7cf4833e8d
@ -1,5 +1,8 @@
|
||||
2014-10-01 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/i386.c (expand_vec_perm_palignr): Handle
|
||||
256-bit vectors for TARGET_AVX2.
|
||||
|
||||
* config/i386/i386.c (expand_vec_perm_vperm2f128): Canonicalize
|
||||
dfirst permutation.
|
||||
|
||||
|
@ -43297,44 +43297,75 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
||||
rtx shift, target;
|
||||
struct expand_vec_perm_d dcopy;
|
||||
|
||||
/* Even with AVX, palignr only operates on 128-bit vectors. */
|
||||
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
||||
/* Even with AVX, palignr only operates on 128-bit vectors,
|
||||
in AVX2 palignr operates on both 128-bit lanes. */
|
||||
if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
||||
&& (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
|
||||
return false;
|
||||
|
||||
min = nelt, max = 0;
|
||||
min = 2 * nelt, max = 0;
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
unsigned e = d->perm[i];
|
||||
if (GET_MODE_SIZE (d->vmode) == 32)
|
||||
e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
|
||||
if (e < min)
|
||||
min = e;
|
||||
if (e > max)
|
||||
max = e;
|
||||
}
|
||||
if (min == 0 || max - min >= nelt)
|
||||
if (min == 0
|
||||
|| max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
|
||||
return false;
|
||||
|
||||
/* Given that we have SSSE3, we know we'll be able to implement the
|
||||
single operand permutation after the palignr with pshufb. */
|
||||
if (d->testing_p)
|
||||
single operand permutation after the palignr with pshufb for
|
||||
128-bit vectors. */
|
||||
if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16)
|
||||
return true;
|
||||
|
||||
dcopy = *d;
|
||||
shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
|
||||
target = gen_reg_rtx (TImode);
|
||||
emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
|
||||
gen_lowpart (TImode, d->op0), shift));
|
||||
|
||||
dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
|
||||
dcopy.one_operand_p = true;
|
||||
|
||||
in_order = true;
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
unsigned e = dcopy.perm[i] - min;
|
||||
unsigned e = dcopy.perm[i];
|
||||
if (GET_MODE_SIZE (d->vmode) == 32
|
||||
&& e >= nelt
|
||||
&& (e & (nelt / 2 - 1)) < min)
|
||||
e = e - min - (nelt / 2);
|
||||
else
|
||||
e = e - min;
|
||||
if (e != i)
|
||||
in_order = false;
|
||||
dcopy.perm[i] = e;
|
||||
}
|
||||
dcopy.one_operand_p = true;
|
||||
|
||||
/* For AVX2, test whether we can permute the result in one instruction. */
|
||||
if (d->testing_p)
|
||||
{
|
||||
if (in_order)
|
||||
return true;
|
||||
dcopy.op1 = dcopy.op0;
|
||||
return expand_vec_perm_1 (&dcopy);
|
||||
}
|
||||
|
||||
shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
|
||||
if (GET_MODE_SIZE (d->vmode) == 16)
|
||||
{
|
||||
target = gen_reg_rtx (TImode);
|
||||
emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
|
||||
gen_lowpart (TImode, d->op0), shift));
|
||||
}
|
||||
else
|
||||
{
|
||||
target = gen_reg_rtx (V2TImode);
|
||||
emit_insn (gen_avx2_palignrv2ti (target, gen_lowpart (V2TImode, d->op1),
|
||||
gen_lowpart (V2TImode, d->op0), shift));
|
||||
}
|
||||
|
||||
dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
|
||||
|
||||
/* Test for the degenerate case where the alignment by itself
|
||||
produces the desired permutation. */
|
||||
@ -43345,7 +43376,7 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
||||
}
|
||||
|
||||
ok = expand_vec_perm_1 (&dcopy);
|
||||
gcc_assert (ok);
|
||||
gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user