re PR target/59539 (Missed optimisation: VEX-prefixed operations don't need aligned data)

PR target/59539
	* config/i386/sse.md
	(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>,
	<sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders,
	prefix existing define_insn names with *.

	* gcc.target/i386/pr59539-1.c: New test.
	* gcc.target/i386/pr59539-2.c: New test.

From-SVN: r206090
This commit is contained in:
Jakub Jelinek 2013-12-18 17:50:06 +01:00 committed by Jakub Jelinek
parent 69aeb34f52
commit 90be6e465c
5 changed files with 91 additions and 2 deletions

View File

@ -1,3 +1,11 @@
2013-12-18 Jakub Jelinek <jakub@redhat.com>
PR target/59539
* config/i386/sse.md
(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>,
<sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders,
prefix existing define_insn names with *.
2013-12-18 Eric Botcazou <ebotcazou@adacore.com>
* config/arm/arm.c (arm_expand_epilogue_apcs_frame): Fix thinko.

View File

@ -912,7 +912,28 @@
DONE;
})
(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
[(set (match_operand:VF 0 "register_operand")
(unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
UNSPEC_LOADU))]
"TARGET_SSE && <mask_mode512bit_condition>"
{
/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
just fine if misaligned_operand is true, and without the UNSPEC it can
be combined with arithmetic instructions. If misaligned_operand is
false, still emit UNSPEC_LOADU insn to honor user's request for
misaligned load. */
if (TARGET_AVX
&& misaligned_operand (operands[1], <MODE>mode)
/* FIXME: Revisit after AVX512F merge is completed. */
&& !<mask_applied>)
{
emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
DONE;
}
})
(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
[(set (match_operand:VF 0 "register_operand" "=v")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "vm")]
@ -999,7 +1020,29 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
[(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
(unspec:VI_UNALIGNED_LOADSTORE
[(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
UNSPEC_LOADU))]
"TARGET_SSE2 && <mask_mode512bit_condition>"
{
/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
just fine if misaligned_operand is true, and without the UNSPEC it can
be combined with arithmetic instructions. If misaligned_operand is
false, still emit UNSPEC_LOADU insn to honor user's request for
misaligned load. */
if (TARGET_AVX
&& misaligned_operand (operands[1], <MODE>mode)
/* FIXME: Revisit after AVX512F merge is completed. */
&& !<mask_applied>)
{
emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
DONE;
}
})
(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
[(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
(unspec:VI_UNALIGNED_LOADSTORE
[(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]

View File

@ -1,3 +1,9 @@
2013-12-18 Jakub Jelinek <jakub@redhat.com>
PR target/59539
* gcc.target/i386/pr59539-1.c: New test.
* gcc.target/i386/pr59539-2.c: New test.
2013-12-18 Nick Clifton <nickc@redhat.com>
* gcc.dg/pr32912-2.c: Fix for 16-bit targets.

View File

@ -0,0 +1,16 @@
/* PR target/59539 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
#include <immintrin.h>
int
foo (void *p1, void *p2)
{
__m128i d1 = _mm_loadu_si128 ((__m128i *) p1);
__m128i d2 = _mm_loadu_si128 ((__m128i *) p2);
__m128i result = _mm_cmpeq_epi16 (d1, d2);
return _mm_movemask_epi8 (result);
}
/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */

View File

@ -0,0 +1,16 @@
/* PR target/59539 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx2" } */
#include <immintrin.h>
int
foo (void *p1, void *p2)
{
__m256i d1 = _mm256_loadu_si256 ((__m256i *) p1);
__m256i d2 = _mm256_loadu_si256 ((__m256i *) p2);
__m256i result = _mm256_cmpeq_epi16 (d1, d2);
return _mm256_movemask_epi8 (result);
}
/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */