re PR target/62128 (Use vpalignr for AVX2 rotation)

PR target/62128
	* config/i386/i386.c (expand_vec_perm_palignr): If op1, op0 order
	of palignr arguments can't be used due to min 0 or max - min
	too high, try also op0, op1 order of palignr arguments.

	* gcc.dg/torture/vshuf-16.inc (TESTS): Add 2 new permutations.
	* gcc.dg/torture/vshuf-32.inc (TESTS): Add 5 new permutations.

From-SVN: r215834
This commit is contained in:
Jakub Jelinek 2014-10-03 09:27:50 +02:00 committed by Jakub Jelinek
parent 4081ada2c7
commit 661fb5ddb8
5 changed files with 60 additions and 11 deletions

View File

@ -1,3 +1,10 @@
2014-10-03 Jakub Jelinek <jakub@redhat.com>
PR target/62128
* config/i386/i386.c (expand_vec_perm_palignr): If op1, op0 order
of palignr arguments can't be used due to min 0 or max - min
too high, try also op0, op1 order of palignr arguments.
2014-10-02 Jan Hubicka <hubicka@ucw.cz> 2014-10-02 Jan Hubicka <hubicka@ucw.cz>
* cgraph.h (ipa_polymorphic_call_context): * cgraph.h (ipa_polymorphic_call_context):

View File

@ -43298,8 +43298,8 @@ static bool
expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
{ {
unsigned i, nelt = d->nelt; unsigned i, nelt = d->nelt;
unsigned min, max; unsigned min, max, minswap, maxswap;
bool in_order, ok; bool in_order, ok, swap = false;
rtx shift, target; rtx shift, target;
struct expand_vec_perm_d dcopy; struct expand_vec_perm_d dcopy;
@ -43309,20 +43309,40 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
&& (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
return false; return false;
min = 2 * nelt, max = 0; min = 2 * nelt;
max = 0;
minswap = 2 * nelt;
maxswap = 0;
for (i = 0; i < nelt; ++i) for (i = 0; i < nelt; ++i)
{ {
unsigned e = d->perm[i]; unsigned e = d->perm[i];
unsigned eswap = d->perm[i] ^ nelt;
if (GET_MODE_SIZE (d->vmode) == 32) if (GET_MODE_SIZE (d->vmode) == 32)
e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); {
e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
eswap = e ^ (nelt / 2);
}
if (e < min) if (e < min)
min = e; min = e;
if (e > max) if (e > max)
max = e; max = e;
if (eswap < minswap)
minswap = eswap;
if (eswap > maxswap)
maxswap = eswap;
} }
if (min == 0 if (min == 0
|| max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
return false; {
if (d->one_operand_p
|| minswap == 0
|| maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
? nelt / 2 : nelt))
return false;
swap = true;
min = minswap;
max = maxswap;
}
/* Given that we have SSSE3, we know we'll be able to implement the /* Given that we have SSSE3, we know we'll be able to implement the
single operand permutation after the palignr with pshufb for single operand permutation after the palignr with pshufb for
@ -43332,6 +43352,13 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
return true; return true;
dcopy = *d; dcopy = *d;
if (swap)
{
dcopy.op0 = d->op1;
dcopy.op1 = d->op0;
for (i = 0; i < nelt; ++i)
dcopy.perm[i] ^= nelt;
}
in_order = true; in_order = true;
for (i = 0; i < nelt; ++i) for (i = 0; i < nelt; ++i)
@ -43365,14 +43392,16 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
if (GET_MODE_SIZE (d->vmode) == 16) if (GET_MODE_SIZE (d->vmode) == 16)
{ {
target = gen_reg_rtx (TImode); target = gen_reg_rtx (TImode);
emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1), emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
gen_lowpart (TImode, d->op0), shift)); gen_lowpart (TImode, dcopy.op0), shift));
} }
else else
{ {
target = gen_reg_rtx (V2TImode); target = gen_reg_rtx (V2TImode);
emit_insn (gen_avx2_palignrv2ti (target, gen_lowpart (V2TImode, d->op1), emit_insn (gen_avx2_palignrv2ti (target,
gen_lowpart (V2TImode, d->op0), shift)); gen_lowpart (V2TImode, dcopy.op1),
gen_lowpart (V2TImode, dcopy.op0),
shift));
} }
dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);

View File

@ -1,3 +1,9 @@
2014-10-03 Jakub Jelinek <jakub@redhat.com>
PR target/62128
* gcc.dg/torture/vshuf-16.inc (TESTS): Add 2 new permutations.
* gcc.dg/torture/vshuf-32.inc (TESTS): Add 5 new permutations.
2014-10-02 Edward Smith-Rowland <3dw4rd@verizon.net> 2014-10-02 Edward Smith-Rowland <3dw4rd@verizon.net>
* g++.dg/cpp1y/feat-cxx98-neg.C: Fix double negative thinko and * g++.dg/cpp1y/feat-cxx98-neg.C: Fix double negative thinko and

View File

@ -23,7 +23,9 @@ T (19, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) \
T (20, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) \ T (20, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) \
T (21, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) \ T (21, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) \
T (22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0) \ T (22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0) \
T (23, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15) T (23, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15) \
T (24, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5) \
T (25, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)
#define EXPTESTS \ #define EXPTESTS \
T (116, 28, 13, 27, 11, 21, 1, 5, 22, 29, 14, 15, 6, 3, 10, 16, 30) \ T (116, 28, 13, 27, 11, 21, 1, 5, 22, 29, 14, 15, 6, 3, 10, 16, 30) \
T (117, 22, 26, 1, 13, 29, 3, 18, 18, 11, 21, 12, 28, 19, 5, 7, 4) \ T (117, 22, 26, 1, 13, 29, 3, 18, 18, 11, 21, 12, 28, 19, 5, 7, 4) \

View File

@ -23,7 +23,12 @@ T (19, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 1
T (20, 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47) \ T (20, 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47) \
T (21, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63) \ T (21, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63) \
T (22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0) \ T (22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0) \
T (23, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31) T (23, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31) \
T (24, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29) \
T (25, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42) \
T (26, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52) \
T (27, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53) \
T (28, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 16, 17, 18, 19, 20, 21)
#define EXPTESTS \ #define EXPTESTS \
T (116, 13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \ T (116, 13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \
T (117, 39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \ T (117, 39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \