re PR target/91560 (Try harder for AVX non-AVX2 cross-lane permutations)
PR target/91560 * config/i386/i386-expand.c (expand_vec_perm_movs, expand_vec_perm_blend, expand_vec_perm_vpermil, expand_vec_perm_pshufb, expand_vec_perm_1, expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_palignr, expand_vec_perm_interleave2, expand_vec_perm_vpermq_perm_1, expand_vec_perm_vperm2f128, expand_vec_perm_interleave3, expand_vec_perm_vperm2f128_vblend, expand_vec_perm_2vperm2f128_vshuf, expand_vec_perm_even_odd, expand_vec_perm_broadcast): Adjust function comments - replace ix86_expand_vec_perm_builtin_1 with ix86_expand_vec_perm_const_1. (expand_vec_perm2_vperm2f128_vblend): New function. (ix86_expand_vec_perm_const_1): New forward declaration. Call expand_vec_perm2_vperm2f128_vblend as last resort. (canonicalize_perm): Formatting fix. * gcc.dg/torture/vshuf-8.inc: Add two further permutations. From-SVN: r275027
This commit is contained in:
parent
1d9cd701ec
commit
4bf4c103ee
@ -1,5 +1,21 @@
|
||||
2019-08-29 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/91560
|
||||
* config/i386/i386-expand.c (expand_vec_perm_movs,
|
||||
expand_vec_perm_blend, expand_vec_perm_vpermil,
|
||||
expand_vec_perm_pshufb, expand_vec_perm_1,
|
||||
expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_palignr,
|
||||
expand_vec_perm_interleave2, expand_vec_perm_vpermq_perm_1,
|
||||
expand_vec_perm_vperm2f128, expand_vec_perm_interleave3,
|
||||
expand_vec_perm_vperm2f128_vblend, expand_vec_perm_2vperm2f128_vshuf,
|
||||
expand_vec_perm_even_odd, expand_vec_perm_broadcast): Adjust function
|
||||
comments - replace ix86_expand_vec_perm_builtin_1 with
|
||||
ix86_expand_vec_perm_const_1.
|
||||
(expand_vec_perm2_vperm2f128_vblend): New function.
|
||||
(ix86_expand_vec_perm_const_1): New forward declaration. Call
|
||||
expand_vec_perm2_vperm2f128_vblend as last resort.
|
||||
(canonicalize_perm): Formatting fix.
|
||||
|
||||
PR tree-optimization/91351
|
||||
* tree-cfg.c (generate_range_test): Use range_check_type instead of
|
||||
unsigned_type_for.
|
||||
|
@ -16372,7 +16372,7 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
|
||||
using movss or movsd. */
|
||||
static bool
|
||||
expand_vec_perm_movs (struct expand_vec_perm_d *d)
|
||||
@ -16408,7 +16408,7 @@ expand_vec_perm_movs (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
|
||||
in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
|
||||
|
||||
static bool
|
||||
@ -16633,7 +16633,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
|
||||
in terms of the variable form of vpermilps.
|
||||
|
||||
Note that we will have already failed the immediate input vpermilps,
|
||||
@ -16709,7 +16709,7 @@ valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
|
||||
in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
|
||||
|
||||
static bool
|
||||
@ -17026,7 +17026,7 @@ ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
|
||||
|
||||
static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to instantiate D
|
||||
in a single instruction. */
|
||||
|
||||
static bool
|
||||
@ -17216,7 +17216,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
|
||||
in terms of a pair of pshuflw + pshufhw instructions. */
|
||||
|
||||
static bool
|
||||
@ -17257,7 +17257,7 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
|
||||
the permutation using the SSSE3 palignr instruction. This succeeds
|
||||
when all of the elements in PERM fit within one vector and we merely
|
||||
need to shift them down so that a single vector permutation has a
|
||||
@ -17474,7 +17474,7 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
|
||||
|
||||
static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
|
||||
a two vector permutation into a single vector permutation by using
|
||||
an interleave operation to merge the vectors. */
|
||||
|
||||
@ -17752,7 +17752,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
|
||||
a single vector cross-lane permutation into vpermq followed
|
||||
by any of the single insn permutations. */
|
||||
|
||||
@ -17833,7 +17833,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
||||
|
||||
static bool canonicalize_perm (struct expand_vec_perm_d *d);
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to expand
|
||||
a vector permutation using two instructions, vperm2f128 resp.
|
||||
vperm2i128 followed by any single in-lane permutation. */
|
||||
|
||||
@ -17950,7 +17950,7 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
|
||||
a two vector permutation using 2 intra-lane interleave insns
|
||||
and cross-lane shuffle for 32-byte vectors. */
|
||||
|
||||
@ -18026,7 +18026,7 @@ expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement
|
||||
a single vector permutation using a single intra-lane vector
|
||||
permutation, vperm2f128 swapping the lanes and vblend* insn blending
|
||||
the non-swapped and swapped vectors together. */
|
||||
@ -18094,7 +18094,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Implement a V4DF
|
||||
permutation using two vperm2f128, followed by a vshufpd insn blending
|
||||
the two vectors together. */
|
||||
|
||||
@ -18145,6 +18145,106 @@ expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *);
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement
|
||||
a two vector permutation using two intra-lane vector
|
||||
permutations, vperm2f128 swapping the lanes and vblend* insn blending
|
||||
the non-swapped and swapped vectors together. */
|
||||
|
||||
static bool
|
||||
expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
||||
{
|
||||
struct expand_vec_perm_d dfirst, dsecond, dthird;
|
||||
unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2, which1 = 0, which2 = 0;
|
||||
rtx_insn *seq1, *seq2;
|
||||
bool ok;
|
||||
rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
|
||||
|
||||
if (!TARGET_AVX
|
||||
|| TARGET_AVX2
|
||||
|| (d->vmode != V8SFmode && d->vmode != V4DFmode)
|
||||
|| d->one_operand_p)
|
||||
return false;
|
||||
|
||||
dfirst = *d;
|
||||
dsecond = *d;
|
||||
for (i = 0; i < nelt; i++)
|
||||
{
|
||||
dfirst.perm[i] = 0xff;
|
||||
dsecond.perm[i] = 0xff;
|
||||
}
|
||||
for (i = 0, msk = 0; i < nelt; i++)
|
||||
{
|
||||
j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
|
||||
if (j == i)
|
||||
{
|
||||
dfirst.perm[j] = d->perm[i];
|
||||
which1 |= (d->perm[i] < nelt ? 1 : 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
dsecond.perm[j] = d->perm[i];
|
||||
which2 |= (d->perm[i] < nelt ? 1 : 2);
|
||||
msk |= (1U << i);
|
||||
}
|
||||
}
|
||||
if (msk == 0 || msk == (1U << nelt) - 1)
|
||||
return false;
|
||||
|
||||
if (!d->testing_p)
|
||||
{
|
||||
dfirst.target = gen_reg_rtx (dfirst.vmode);
|
||||
dsecond.target = gen_reg_rtx (dsecond.vmode);
|
||||
}
|
||||
|
||||
for (i = 0; i < nelt; i++)
|
||||
{
|
||||
if (dfirst.perm[i] == 0xff)
|
||||
dfirst.perm[i] = (which1 == 2 ? i + nelt : i);
|
||||
if (dsecond.perm[i] == 0xff)
|
||||
dsecond.perm[i] = (which2 == 2 ? i + nelt : i);
|
||||
}
|
||||
canonicalize_perm (&dfirst);
|
||||
start_sequence ();
|
||||
ok = ix86_expand_vec_perm_const_1 (&dfirst);
|
||||
seq1 = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
if (!ok)
|
||||
return false;
|
||||
|
||||
canonicalize_perm (&dsecond);
|
||||
start_sequence ();
|
||||
ok = ix86_expand_vec_perm_const_1 (&dsecond);
|
||||
seq2 = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
if (!ok)
|
||||
return false;
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
emit_insn (seq1);
|
||||
emit_insn (seq2);
|
||||
|
||||
dthird = *d;
|
||||
dthird.op0 = dsecond.target;
|
||||
dthird.op1 = dsecond.target;
|
||||
dthird.one_operand_p = true;
|
||||
dthird.target = gen_reg_rtx (dthird.vmode);
|
||||
for (i = 0; i < nelt; i++)
|
||||
dthird.perm[i] = i ^ nelt2;
|
||||
|
||||
ok = expand_vec_perm_1 (&dthird);
|
||||
gcc_assert (ok);
|
||||
|
||||
blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
|
||||
emit_insn (blend (d->target, dfirst.target, dthird.target, GEN_INT (msk)));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
|
||||
permutation with two pshufb insns and an ior. We should have already
|
||||
failed all two instruction sequences. */
|
||||
@ -18534,7 +18634,7 @@ expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Implement extract-even
|
||||
and extract-odd permutations. */
|
||||
|
||||
static bool
|
||||
@ -18743,7 +18843,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Pattern match
|
||||
extract-even and extract-odd permutations. */
|
||||
|
||||
static bool
|
||||
@ -18762,7 +18862,7 @@ expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
|
||||
return expand_vec_perm_even_odd_1 (d, odd);
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Implement broadcast
|
||||
permutations. We assume that expand_vec_perm_1 has already failed. */
|
||||
|
||||
static bool
|
||||
@ -18841,7 +18941,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
|
||||
}
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
|
||||
/* A subroutine of ix86_expand_vec_perm_const_1. Pattern match
|
||||
broadcast permutations. */
|
||||
|
||||
static bool
|
||||
@ -19137,6 +19237,10 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Even longer, including recursion to ix86_expand_vec_perm_const_1. */
|
||||
if (expand_vec_perm2_vperm2f128_vblend (d))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -19149,7 +19253,7 @@ canonicalize_perm (struct expand_vec_perm_d *d)
|
||||
int i, which, nelt = d->nelt;
|
||||
|
||||
for (i = which = 0; i < nelt; ++i)
|
||||
which |= (d->perm[i] < nelt ? 1 : 2);
|
||||
which |= (d->perm[i] < nelt ? 1 : 2);
|
||||
|
||||
d->one_operand_p = true;
|
||||
switch (which)
|
||||
|
@ -1,5 +1,8 @@
|
||||
2019-08-29 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/91560
|
||||
* gcc.dg/torture/vshuf-8.inc: Add two further permutations.
|
||||
|
||||
PR tree-optimization/91351
|
||||
* g++.dg/opt/pr91351.C: New test.
|
||||
|
||||
|
@ -25,7 +25,9 @@ T (21, 4, 12, 5, 13, 6, 14, 7, 15) \
|
||||
T (22, 1, 2, 3, 4, 5, 6, 7, 0) \
|
||||
T (23, 6, 5, 4, 3, 2, 1, 0, 7) \
|
||||
T (24, 0, 1, 2, 3, 8, 9, 10, 11) \
|
||||
T (25, 0, 1, 2, 3, 12, 13, 14, 15)
|
||||
T (25, 0, 1, 2, 3, 12, 13, 14, 15) \
|
||||
T (26, 0, 1, 8, 9, 10, 11, 12, 13) \
|
||||
T (27, 0, 8, 9, 10, 11, 12, 13, 14)
|
||||
#define EXPTESTS \
|
||||
T (116, 9, 3, 9, 4, 7, 0, 0, 6) \
|
||||
T (117, 4, 14, 12, 8, 9, 6, 0, 10) \
|
||||
|
Loading…
Reference in New Issue
Block a user