[ARM] PR68532 Fix up vzip recognition for big endian
gcc/ChangeLog: 2016-02-09 Charles Baylis <charles.baylis@linaro.org> PR target/68532 * config/arm/arm.c (arm_evpc_neon_vzip): Allow for big endian lane order. * config/arm/arm_neon.h (vzipq_s8): Adjust shuffle patterns for big endian. (vzipq_s16): Likewise. (vzipq_s32): Likewise. (vzipq_f32): Likewise. (vzipq_u8): Likewise. (vzipq_u16): Likewise. (vzipq_u32): Likewise. (vzipq_p8): Likewise. (vzipq_p16): Likewise. From-SVN: r233252
This commit is contained in:
parent
4b79ac23c6
commit
1149b83cbd
|
@ -1,3 +1,19 @@
|
||||||
|
2016-02-09 Charles Baylis <charles.baylis@linaro.org>
|
||||||
|
|
||||||
|
PR target/68532
|
||||||
|
* config/arm/arm.c (arm_evpc_neon_vzip): Allow for big endian lane
|
||||||
|
order.
|
||||||
|
* config/arm/arm_neon.h (vzipq_s8): Adjust shuffle patterns for big
|
||||||
|
endian.
|
||||||
|
(vzipq_s16): Likewise.
|
||||||
|
(vzipq_s32): Likewise.
|
||||||
|
(vzipq_f32): Likewise.
|
||||||
|
(vzipq_u8): Likewise.
|
||||||
|
(vzipq_u16): Likewise.
|
||||||
|
(vzipq_u32): Likewise.
|
||||||
|
(vzipq_p8): Likewise.
|
||||||
|
(vzipq_p16): Likewise.
|
||||||
|
|
||||||
2016-02-09 Charles Baylis <charles.baylis@linaro.org>
|
2016-02-09 Charles Baylis <charles.baylis@linaro.org>
|
||||||
|
|
||||||
PR target/68532
|
PR target/68532
|
||||||
|
|
|
@ -28313,15 +28313,20 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
|
||||||
unsigned int i, high, mask, nelt = d->nelt;
|
unsigned int i, high, mask, nelt = d->nelt;
|
||||||
rtx out0, out1, in0, in1;
|
rtx out0, out1, in0, in1;
|
||||||
rtx (*gen)(rtx, rtx, rtx, rtx);
|
rtx (*gen)(rtx, rtx, rtx, rtx);
|
||||||
|
int first_elem;
|
||||||
|
bool is_swapped;
|
||||||
|
|
||||||
if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
|
if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Note that these are little-endian tests. Adjust for big-endian later. */
|
is_swapped = BYTES_BIG_ENDIAN;
|
||||||
|
|
||||||
|
first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
|
||||||
|
|
||||||
high = nelt / 2;
|
high = nelt / 2;
|
||||||
if (d->perm[0] == high)
|
if (first_elem == neon_endian_lane_map (d->vmode, high))
|
||||||
;
|
;
|
||||||
else if (d->perm[0] == 0)
|
else if (first_elem == neon_endian_lane_map (d->vmode, 0))
|
||||||
high = 0;
|
high = 0;
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
@ -28329,11 +28334,15 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
for (i = 0; i < nelt / 2; i++)
|
for (i = 0; i < nelt / 2; i++)
|
||||||
{
|
{
|
||||||
unsigned elt = (i + high) & mask;
|
unsigned elt =
|
||||||
if (d->perm[i * 2] != elt)
|
neon_pair_endian_lane_map (d->vmode, i + high) & mask;
|
||||||
|
if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
|
||||||
|
!= elt)
|
||||||
return false;
|
return false;
|
||||||
elt = (elt + nelt) & mask;
|
elt =
|
||||||
if (d->perm[i * 2 + 1] != elt)
|
neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
|
||||||
|
if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
|
||||||
|
!= elt)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28357,11 +28366,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
in0 = d->op0;
|
in0 = d->op0;
|
||||||
in1 = d->op1;
|
in1 = d->op1;
|
||||||
if (BYTES_BIG_ENDIAN)
|
if (is_swapped)
|
||||||
{
|
std::swap (in0, in1);
|
||||||
std::swap (in0, in1);
|
|
||||||
high = !high;
|
|
||||||
}
|
|
||||||
|
|
||||||
out0 = d->target;
|
out0 = d->target;
|
||||||
out1 = gen_reg_rtx (d->vmode);
|
out1 = gen_reg_rtx (d->vmode);
|
||||||
|
|
|
@ -8453,9 +8453,9 @@ vzipq_s8 (int8x16_t __a, int8x16_t __b)
|
||||||
int8x16x2_t __rv;
|
int8x16x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
|
{ 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
|
{ 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
|
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
|
||||||
|
@ -8471,9 +8471,9 @@ vzipq_s16 (int16x8_t __a, int16x8_t __b)
|
||||||
int16x8x2_t __rv;
|
int16x8x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 12, 4, 13, 5, 14, 6, 15, 7 });
|
{ 10, 2, 11, 3, 8, 0, 9, 1 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 8, 0, 9, 1, 10, 2, 11, 3 });
|
{ 14, 6, 15, 7, 12, 4, 13, 5 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 0, 8, 1, 9, 2, 10, 3, 11 });
|
{ 0, 8, 1, 9, 2, 10, 3, 11 });
|
||||||
|
@ -8488,8 +8488,8 @@ vzipq_s32 (int32x4_t __a, int32x4_t __b)
|
||||||
{
|
{
|
||||||
int32x4x2_t __rv;
|
int32x4x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
|
||||||
|
@ -8502,8 +8502,8 @@ vzipq_f32 (float32x4_t __a, float32x4_t __b)
|
||||||
{
|
{
|
||||||
float32x4x2_t __rv;
|
float32x4x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
|
||||||
|
@ -8517,9 +8517,9 @@ vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
|
||||||
uint8x16x2_t __rv;
|
uint8x16x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
|
{ 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
|
{ 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
|
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
|
||||||
|
@ -8535,9 +8535,9 @@ vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
|
||||||
uint16x8x2_t __rv;
|
uint16x8x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 12, 4, 13, 5, 14, 6, 15, 7 });
|
{ 10, 2, 11, 3, 8, 0, 9, 1 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 8, 0, 9, 1, 10, 2, 11, 3 });
|
{ 14, 6, 15, 7, 12, 4, 13, 5 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 0, 8, 1, 9, 2, 10, 3, 11 });
|
{ 0, 8, 1, 9, 2, 10, 3, 11 });
|
||||||
|
@ -8552,8 +8552,8 @@ vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
|
||||||
{
|
{
|
||||||
uint32x4x2_t __rv;
|
uint32x4x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
|
||||||
|
@ -8567,9 +8567,9 @@ vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
|
||||||
poly8x16x2_t __rv;
|
poly8x16x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
|
{ 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
|
{ 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||||
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
|
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
|
||||||
|
@ -8585,9 +8585,9 @@ vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
|
||||||
poly16x8x2_t __rv;
|
poly16x8x2_t __rv;
|
||||||
#ifdef __ARM_BIG_ENDIAN
|
#ifdef __ARM_BIG_ENDIAN
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 12, 4, 13, 5, 14, 6, 15, 7 });
|
{ 10, 2, 11, 3, 8, 0, 9, 1 });
|
||||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 8, 0, 9, 1, 10, 2, 11, 3 });
|
{ 14, 6, 15, 7, 12, 4, 13, 5 });
|
||||||
#else
|
#else
|
||||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||||
{ 0, 8, 1, 9, 2, 10, 3, 11 });
|
{ 0, 8, 1, 9, 2, 10, 3, 11 });
|
||||||
|
|
Loading…
Reference in New Issue