re PR target/52607 (v4df __builtin_shuffle with {0,2,1,3} or {1,3,0,2})
2012-05-14 Marc Glisse <marc.glisse@inria.fr> PR target/52607 * config/i386/i386.c (ix86_expand_vec_perm_const): Move code to ... (canonicalize_perm): ... new function. (expand_vec_perm_2vperm2f128_vshuf): New function. (ix86_expand_vec_perm_const_1): Call it. From-SVN: r187479
This commit is contained in:
parent
80b91c0b39
commit
6015a67d39
|
@ -1,3 +1,11 @@
|
||||||
|
2012-05-14 Marc Glisse <marc.glisse@inria.fr>
|
||||||
|
|
||||||
|
PR target/52607
|
||||||
|
* config/i386/i386.c (ix86_expand_vec_perm_const): Move code to ...
|
||||||
|
(canonicalize_perm): ... new function.
|
||||||
|
(expand_vec_perm_2vperm2f128_vshuf): New function.
|
||||||
|
(ix86_expand_vec_perm_const_1): Call it.
|
||||||
|
|
||||||
2012-05-14 Andrew Pinski <apinski@cavium.com>
|
2012-05-14 Andrew Pinski <apinski@cavium.com>
|
||||||
H.J. Lu <hongjiu.lu@intel.com>
|
H.J. Lu <hongjiu.lu@intel.com>
|
||||||
Jakub Jelinek <jakub@redhat.com>
|
Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
|
@ -33343,6 +33343,7 @@ struct expand_vec_perm_d
|
||||||
bool testing_p;
|
bool testing_p;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool canonicalize_perm (struct expand_vec_perm_d *d);
|
||||||
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
|
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
|
||||||
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
|
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
|
||||||
|
|
||||||
|
@ -37400,6 +37401,57 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
|
||||||
|
permutation using two vperm2f128, followed by a vshufpd insn blending
|
||||||
|
the two vectors together. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
struct expand_vec_perm_d dfirst, dsecond, dthird;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
if (!TARGET_AVX || (d->vmode != V4DFmode))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (d->testing_p)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
dfirst = *d;
|
||||||
|
dsecond = *d;
|
||||||
|
dthird = *d;
|
||||||
|
|
||||||
|
dfirst.perm[0] = (d->perm[0] & ~1);
|
||||||
|
dfirst.perm[1] = (d->perm[0] & ~1) + 1;
|
||||||
|
dfirst.perm[2] = (d->perm[2] & ~1);
|
||||||
|
dfirst.perm[3] = (d->perm[2] & ~1) + 1;
|
||||||
|
dsecond.perm[0] = (d->perm[1] & ~1);
|
||||||
|
dsecond.perm[1] = (d->perm[1] & ~1) + 1;
|
||||||
|
dsecond.perm[2] = (d->perm[3] & ~1);
|
||||||
|
dsecond.perm[3] = (d->perm[3] & ~1) + 1;
|
||||||
|
dthird.perm[0] = (d->perm[0] % 2);
|
||||||
|
dthird.perm[1] = (d->perm[1] % 2) + 4;
|
||||||
|
dthird.perm[2] = (d->perm[2] % 2) + 2;
|
||||||
|
dthird.perm[3] = (d->perm[3] % 2) + 6;
|
||||||
|
|
||||||
|
dfirst.target = gen_reg_rtx (dfirst.vmode);
|
||||||
|
dsecond.target = gen_reg_rtx (dsecond.vmode);
|
||||||
|
dthird.op0 = dfirst.target;
|
||||||
|
dthird.op1 = dsecond.target;
|
||||||
|
dthird.one_operand_p = false;
|
||||||
|
|
||||||
|
canonicalize_perm (&dfirst);
|
||||||
|
canonicalize_perm (&dsecond);
|
||||||
|
|
||||||
|
ok = expand_vec_perm_1 (&dfirst)
|
||||||
|
&& expand_vec_perm_1 (&dsecond)
|
||||||
|
&& expand_vec_perm_1 (&dthird);
|
||||||
|
|
||||||
|
gcc_assert (ok);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
|
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
|
||||||
permutation with two pshufb insns and an ior. We should have already
|
permutation with two pshufb insns and an ior. We should have already
|
||||||
failed all two instruction sequences. */
|
failed all two instruction sequences. */
|
||||||
|
@ -38049,6 +38101,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
/* Try sequences of three instructions. */
|
/* Try sequences of three instructions. */
|
||||||
|
|
||||||
|
if (expand_vec_perm_2vperm2f128_vshuf (d))
|
||||||
|
return true;
|
||||||
|
|
||||||
if (expand_vec_perm_pshufb2 (d))
|
if (expand_vec_perm_pshufb2 (d))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -38086,12 +38141,56 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If a permutation only uses one operand, make it clear. Returns true
|
||||||
|
if the permutation references both operands. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
canonicalize_perm (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
int i, which, nelt = d->nelt;
|
||||||
|
|
||||||
|
for (i = which = 0; i < nelt; ++i)
|
||||||
|
which |= (d->perm[i] < nelt ? 1 : 2);
|
||||||
|
|
||||||
|
d->one_operand_p = true;
|
||||||
|
switch (which)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
gcc_unreachable();
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
if (!rtx_equal_p (d->op0, d->op1))
|
||||||
|
{
|
||||||
|
d->one_operand_p = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* The elements of PERM do not suggest that only the first operand
|
||||||
|
is used, but both operands are identical. Allow easier matching
|
||||||
|
of the permutation by folding the permutation into the single
|
||||||
|
input vector. */
|
||||||
|
/* FALLTHRU */
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
d->perm[i] &= nelt - 1;
|
||||||
|
d->op0 = d->op1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
d->op1 = d->op0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (which == 3);
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ix86_expand_vec_perm_const (rtx operands[4])
|
ix86_expand_vec_perm_const (rtx operands[4])
|
||||||
{
|
{
|
||||||
struct expand_vec_perm_d d;
|
struct expand_vec_perm_d d;
|
||||||
unsigned char perm[MAX_VECT_LEN];
|
unsigned char perm[MAX_VECT_LEN];
|
||||||
int i, nelt, which;
|
int i, nelt;
|
||||||
|
bool two_args;
|
||||||
rtx sel;
|
rtx sel;
|
||||||
|
|
||||||
d.target = operands[0];
|
d.target = operands[0];
|
||||||
|
@ -38108,44 +38207,15 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
||||||
gcc_assert (XVECLEN (sel, 0) == nelt);
|
gcc_assert (XVECLEN (sel, 0) == nelt);
|
||||||
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
|
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
|
||||||
|
|
||||||
for (i = which = 0; i < nelt; ++i)
|
for (i = 0; i < nelt; ++i)
|
||||||
{
|
{
|
||||||
rtx e = XVECEXP (sel, 0, i);
|
rtx e = XVECEXP (sel, 0, i);
|
||||||
int ei = INTVAL (e) & (2 * nelt - 1);
|
int ei = INTVAL (e) & (2 * nelt - 1);
|
||||||
|
|
||||||
which |= (ei < nelt ? 1 : 2);
|
|
||||||
d.perm[i] = ei;
|
d.perm[i] = ei;
|
||||||
perm[i] = ei;
|
perm[i] = ei;
|
||||||
}
|
}
|
||||||
|
|
||||||
d.one_operand_p = true;
|
two_args = canonicalize_perm (&d);
|
||||||
switch (which)
|
|
||||||
{
|
|
||||||
default:
|
|
||||||
gcc_unreachable();
|
|
||||||
|
|
||||||
case 3:
|
|
||||||
if (!rtx_equal_p (d.op0, d.op1))
|
|
||||||
{
|
|
||||||
d.one_operand_p = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* The elements of PERM do not suggest that only the first operand
|
|
||||||
is used, but both operands are identical. Allow easier matching
|
|
||||||
of the permutation by folding the permutation into the single
|
|
||||||
input vector. */
|
|
||||||
/* FALLTHRU */
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
for (i = 0; i < nelt; ++i)
|
|
||||||
d.perm[i] &= nelt - 1;
|
|
||||||
d.op0 = d.op1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
d.op1 = d.op0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ix86_expand_vec_perm_const_1 (&d))
|
if (ix86_expand_vec_perm_const_1 (&d))
|
||||||
return true;
|
return true;
|
||||||
|
@ -38154,7 +38224,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
||||||
same, the above tried to expand with one_operand_p and flattened selector.
|
same, the above tried to expand with one_operand_p and flattened selector.
|
||||||
If that didn't work, retry without one_operand_p; we succeeded with that
|
If that didn't work, retry without one_operand_p; we succeeded with that
|
||||||
during testing. */
|
during testing. */
|
||||||
if (which == 3 && d.one_operand_p)
|
if (two_args && d.one_operand_p)
|
||||||
{
|
{
|
||||||
d.one_operand_p = false;
|
d.one_operand_p = false;
|
||||||
memcpy (d.perm, perm, sizeof (perm));
|
memcpy (d.perm, perm, sizeof (perm));
|
||||||
|
|
Loading…
Reference in New Issue