re PR target/90991 (_mm_loadu_ps instrinsic translates to vmovaps in combination with _mm512_insertf32x4)

PR target/90991
	* config/i386/sse.md
	(*<extract_type>_vinsert<shuffletype><extract_suf>_0): Use vmovupd,
	vmovups, vmovdqu, vmovdqu32 or vmovdqu64 instead of the aligned
	insns if operands[2] is misaligned_operand.

	* gcc.target/i386/avx512dq-pr90991-1.c: New test.

From-SVN: r272674
This commit is contained in:
Jakub Jelinek 2019-06-26 10:26:18 +02:00 committed by Jakub Jelinek
parent fdfbed383e
commit d55c1ffd49
4 changed files with 80 additions and 6 deletions

View File

@ -1,3 +1,11 @@
2019-06-26 Jakub Jelinek <jakub@redhat.com>
PR target/90991
* config/i386/sse.md
(*<extract_type>_vinsert<shuffletype><extract_suf>_0): Use vmovupd,
vmovups, vmovdqu, vmovdqu32 or vmovdqu64 instead of the aligned
insns if operands[2] is misaligned_operand.
2019-06-26 Li Jia He <helijia@linux.ibm.com>
* config/rs6000/rs6000.h (TARGET_MADDLD): Remove the restriction of

View File

@ -13747,15 +13747,29 @@
switch (<MODE>mode)
{
case E_V8DFmode:
return "vmovapd\t{%2, %x0|%x0, %2}";
if (misaligned_operand (operands[2], <ssequartermode>mode))
return "vmovupd\t{%2, %x0|%x0, %2}";
else
return "vmovapd\t{%2, %x0|%x0, %2}";
case E_V16SFmode:
return "vmovaps\t{%2, %x0|%x0, %2}";
if (misaligned_operand (operands[2], <ssequartermode>mode))
return "vmovups\t{%2, %x0|%x0, %2}";
else
return "vmovaps\t{%2, %x0|%x0, %2}";
case E_V8DImode:
return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
: "vmovdqa\t{%2, %x0|%x0, %2}";
if (misaligned_operand (operands[2], <ssequartermode>mode))
return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
: "vmovdqu\t{%2, %x0|%x0, %2}";
else
return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
: "vmovdqa\t{%2, %x0|%x0, %2}";
case E_V16SImode:
return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
: "vmovdqa\t{%2, %x0|%x0, %2}";
if (misaligned_operand (operands[2], <ssequartermode>mode))
return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
: "vmovdqu\t{%2, %x0|%x0, %2}";
else
return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
: "vmovdqa\t{%2, %x0|%x0, %2}";
default:
gcc_unreachable ();
}

View File

@ -1,3 +1,8 @@
2019-06-26 Jakub Jelinek <jakub@redhat.com>
PR target/90991
* gcc.target/i386/avx512dq-pr90991-1.c: New test.
2019-06-26 Li Jia He <helijia@linux.ibm.com>
* gcc.target/powerpc/maddld-1.c: New testcase.

View File

@ -0,0 +1,47 @@
/* PR target/90991 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512dq -masm=att" } */
/* { dg-final { scan-assembler-times "vmovaps\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
/* { dg-final { scan-assembler-times "vmovups\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
/* { dg-final { scan-assembler-times "vmovupd\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
#include <x86intrin.h>
__m512
f1 (void *a)
{
return _mm512_insertf32x4 (_mm512_set1_ps (0.0f), _mm_load_ps (a), 0);
}
__m512d
f2 (void *a)
{
return _mm512_insertf64x2 (_mm512_set1_pd (0.0), _mm_load_pd (a), 0);
}
__m512i
f3 (void *a)
{
return _mm512_inserti32x4 (_mm512_set1_epi32 (0), _mm_load_si128 (a), 0);
}
__m512
f4 (void *a)
{
return _mm512_insertf32x4 (_mm512_set1_ps (0.0f), _mm_loadu_ps (a), 0);
}
__m512d
f5 (void *a)
{
return _mm512_insertf64x2 (_mm512_set1_pd (0.0), _mm_loadu_pd (a), 0);
}
__m512i
f6 (void *a)
{
return _mm512_inserti32x4 (_mm512_set1_epi32 (0), _mm_loadu_si128 (a), 0);
}