re PR target/52607 (v4df __builtin_shuffle with {0,2,1,3} or {1,3,0,2})

2012-05-14  Marc Glisse  <marc.glisse@inria.fr>

	PR target/52607
	* config/i386/i386.c (ix86_expand_vec_perm_const): Move code to ...
	(canonicalize_perm): ... new function.
	(expand_vec_perm_2vperm2f128_vshuf): New function.
	(ix86_expand_vec_perm_const_1): Call it.

From-SVN: r187479
This commit is contained in:
Marc Glisse 2012-05-14 22:19:30 +02:00 committed by Marc Glisse
parent 80b91c0b39
commit 6015a67d39
2 changed files with 111 additions and 33 deletions

View File

@ -1,3 +1,11 @@
2012-05-14 Marc Glisse <marc.glisse@inria.fr>
PR target/52607
* config/i386/i386.c (ix86_expand_vec_perm_const): Move code to ...
(canonicalize_perm): ... new function.
(expand_vec_perm_2vperm2f128_vshuf): New function.
(ix86_expand_vec_perm_const_1): Call it.
2012-05-14 Andrew Pinski <apinski@cavium.com>
H.J. Lu <hongjiu.lu@intel.com>
Jakub Jelinek <jakub@redhat.com>

View File

@ -33343,6 +33343,7 @@ struct expand_vec_perm_d
bool testing_p;
};
static bool canonicalize_perm (struct expand_vec_perm_d *d);
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
@ -37400,6 +37401,57 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
return true;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
permutation using two vperm2f128, followed by a vshufpd insn blending
the two vectors together. */
static bool
expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
{
struct expand_vec_perm_d dfirst, dsecond, dthird;
bool ok;
if (!TARGET_AVX || (d->vmode != V4DFmode))
return false;
if (d->testing_p)
return true;
dfirst = *d;
dsecond = *d;
dthird = *d;
dfirst.perm[0] = (d->perm[0] & ~1);
dfirst.perm[1] = (d->perm[0] & ~1) + 1;
dfirst.perm[2] = (d->perm[2] & ~1);
dfirst.perm[3] = (d->perm[2] & ~1) + 1;
dsecond.perm[0] = (d->perm[1] & ~1);
dsecond.perm[1] = (d->perm[1] & ~1) + 1;
dsecond.perm[2] = (d->perm[3] & ~1);
dsecond.perm[3] = (d->perm[3] & ~1) + 1;
dthird.perm[0] = (d->perm[0] % 2);
dthird.perm[1] = (d->perm[1] % 2) + 4;
dthird.perm[2] = (d->perm[2] % 2) + 2;
dthird.perm[3] = (d->perm[3] % 2) + 6;
dfirst.target = gen_reg_rtx (dfirst.vmode);
dsecond.target = gen_reg_rtx (dsecond.vmode);
dthird.op0 = dfirst.target;
dthird.op1 = dsecond.target;
dthird.one_operand_p = false;
canonicalize_perm (&dfirst);
canonicalize_perm (&dsecond);
ok = expand_vec_perm_1 (&dfirst)
&& expand_vec_perm_1 (&dsecond)
&& expand_vec_perm_1 (&dthird);
gcc_assert (ok);
return true;
}
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
permutation with two pshufb insns and an ior. We should have already
failed all two instruction sequences. */
@ -38049,6 +38101,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Try sequences of three instructions. */
if (expand_vec_perm_2vperm2f128_vshuf (d))
return true;
if (expand_vec_perm_pshufb2 (d))
return true;
@ -38086,12 +38141,56 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return false;
}
/* If a permutation only uses one operand, make it clear. Returns true
if the permutation references both operands. */
static bool
canonicalize_perm (struct expand_vec_perm_d *d)
{
int i, which, nelt = d->nelt;
for (i = which = 0; i < nelt; ++i)
which |= (d->perm[i] < nelt ? 1 : 2);
d->one_operand_p = true;
switch (which)
{
default:
gcc_unreachable();
case 3:
if (!rtx_equal_p (d->op0, d->op1))
{
d->one_operand_p = false;
break;
}
/* The elements of PERM do not suggest that only the first operand
is used, but both operands are identical. Allow easier matching
of the permutation by folding the permutation into the single
input vector. */
/* FALLTHRU */
case 2:
for (i = 0; i < nelt; ++i)
d->perm[i] &= nelt - 1;
d->op0 = d->op1;
break;
case 1:
d->op1 = d->op0;
break;
}
return (which == 3);
}
bool
ix86_expand_vec_perm_const (rtx operands[4])
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
int i, nelt, which;
int i, nelt;
bool two_args;
rtx sel;
d.target = operands[0];
@ -38108,44 +38207,15 @@ ix86_expand_vec_perm_const (rtx operands[4])
gcc_assert (XVECLEN (sel, 0) == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
for (i = which = 0; i < nelt; ++i)
for (i = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
int ei = INTVAL (e) & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
d.perm[i] = ei;
perm[i] = ei;
}
d.one_operand_p = true;
switch (which)
{
default:
gcc_unreachable();
case 3:
if (!rtx_equal_p (d.op0, d.op1))
{
d.one_operand_p = false;
break;
}
/* The elements of PERM do not suggest that only the first operand
is used, but both operands are identical. Allow easier matching
of the permutation by folding the permutation into the single
input vector. */
/* FALLTHRU */
case 2:
for (i = 0; i < nelt; ++i)
d.perm[i] &= nelt - 1;
d.op0 = d.op1;
break;
case 1:
d.op1 = d.op0;
break;
}
two_args = canonicalize_perm (&d);
if (ix86_expand_vec_perm_const_1 (&d))
return true;
@ -38154,7 +38224,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
same, the above tried to expand with one_operand_p and flattened selector.
If that didn't work, retry without one_operand_p; we succeeded with that
during testing. */
if (which == 3 && d.one_operand_p)
if (two_args && d.one_operand_p)
{
d.one_operand_p = false;
memcpy (d.perm, perm, sizeof (perm));