i386.c (expand_vec_perm_movs): New method matching movs patterns.

* config/i386/i386.c (expand_vec_perm_movs): New method matching movs
	patterns.
	(expand_vec_perm_1): Try the new method.

	* gcc.target/i386/sse2-movs.c: New test.

From-SVN: r263549
This commit is contained in:
Allan Sandfeld Jensen 2018-08-15 10:33:12 +06:00 committed by Jeff Law
parent 4720f4afba
commit 107192f73e
6 changed files with 77 additions and 2 deletions

View File

@ -1,3 +1,9 @@
2018-08-14 Allan Sandfeld Jensen <allan.jensen@qt.io>
* config/i386/i386.c (expand_vec_perm_movs): New method matching movs
patterns.
(expand_vec_perm_1): Try the new method.
2018-08-14 Ilya Leoshkevich <iii@linux.ibm.com>
PR target/86547

View File

@ -113,7 +113,7 @@ _mm_setzero_pd (void)
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
return __extension__ (__m128d) __builtin_shuffle((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
}
/* Load two DPFP values from P. The address must be 16-byte aligned. */

View File

@ -46145,6 +46145,43 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
return ok;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
using movss or movsd. */
static bool
expand_vec_perm_movs (struct expand_vec_perm_d *d)
{
machine_mode vmode = d->vmode;
unsigned i, nelt = d->nelt;
rtx x;
if (d->one_operand_p)
return false;
if (TARGET_SSE2 && (vmode == V2DFmode || vmode == V4SFmode))
;
else
return false;
/* Only the first element is changed. */
if (d->perm[0] != nelt && d->perm[0] != 0)
return false;
for (i = 1; i < nelt; ++i)
if (d->perm[i] != i + nelt - d->perm[0])
return false;
if (d->testing_p)
return true;
if (d->perm[0] == nelt)
x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1));
else
x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1));
emit_insn (gen_rtx_SET (d->target, x));
return true;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
@ -46887,6 +46924,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
}
}
/* Try movss/movsd instructions. */
if (expand_vec_perm_movs (d))
return true;
/* Finally, try the fully general two operand permute. */
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
d->testing_p))

View File

@ -1011,7 +1011,10 @@ _mm_storer_ps (float *__P, __m128 __A)
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
return (__m128) __builtin_shuffle ((__v4sf)__A, (__v4sf)__B,
__extension__
(__attribute__((__vector_size__ (16))) int)
{4,1,2,3});
}
/* Extracts one of the four words of A. The selector N must be immediate. */

View File

@ -1,3 +1,7 @@
2018-08-14 Allan Sandfeld Jensen <allan.jensen@qt.io>
* gcc.target/i386/sse2-movs.c: New test.
2018-08-14 Martin Sebor <msebor@redhat.com>
PR tree-optimization/86650

View File

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-assembler "movss" } } */
/* { dg-final { scan-assembler "movsd" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
/* { dg-final { scan-assembler-not "shufpd" } } */
typedef float v4sf __attribute__ ((vector_size (16)));
typedef double v2df __attribute__ ((vector_size (16)));
v4sf movss(v4sf a, v4sf b)
{
return (v4sf){b[0],a[1],a[2],a[3]};
}
v2df movsd(v2df a, v2df b)
{
return (v2df){b[0],a[1]};
}