re PR target/59539 (Missed optimisation: VEX-prefixed operations don't need aligned data)

PR target/59539 * config/i386/sse.md (<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>, <sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders, prefix existing define_insn names with *. * gcc.target/i386/pr59539-1.c: New test. * gcc.target/i386/pr59539-2.c: New test. From-SVN: r206090
2013-12-18 17:50:06 +01:00 · 2013-12-18 17:50:06 +01:00 · 90be6e465c
commit 90be6e465c
parent 69aeb34f52
5 changed files with 91 additions and 2 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
+2013-12-18  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/59539
+	* config/i386/sse.md
+	(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>,
+	<sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders,
+	prefix existing define_insn names with *.
+
 2013-12-18  Eric Botcazou  <ebotcazou@adacore.com>

 	* config/arm/arm.c (arm_expand_epilogue_apcs_frame): Fix thinko.
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@ -912,7 +912,28 @@
  DONE;
 })

-(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE && <mask_mode512bit_condition>"
+{
+  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+     just fine if misaligned_operand is true, and without the UNSPEC it can
+     be combined with arithmetic instructions.  If misaligned_operand is
+     false, still emit UNSPEC_LOADU insn to honor user's request for
+     misaligned load.  */
+  if (TARGET_AVX
+      && misaligned_operand (operands[1], <MODE>mode)
+      /* FIXME: Revisit after AVX512F merge is completed.  */
+      && !<mask_applied>)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
  [(set (match_operand:VF 0 "register_operand" "=v")
 	(unspec:VF
 	  [(match_operand:VF 1 "nonimmediate_operand" "vm")]
@ -999,7 +1020,29 @@
   (set_attr "prefix" "evex")
   (set_attr "mode" "<sseinsnmode>")])

-(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+{
+  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+     just fine if misaligned_operand is true, and without the UNSPEC it can
+     be combined with arithmetic instructions.  If misaligned_operand is
+     false, still emit UNSPEC_LOADU insn to honor user's request for
+     misaligned load.  */
+  if (TARGET_AVX
+      && misaligned_operand (operands[1], <MODE>mode)
+      /* FIXME: Revisit after AVX512F merge is completed.  */
+      && !<mask_applied>)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,9 @@
+2013-12-18  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/59539
+	* gcc.target/i386/pr59539-1.c: New test.
+	* gcc.target/i386/pr59539-2.c: New test.
+
 2013-12-18  Nick Clifton  <nickc@redhat.com>

 	* gcc.dg/pr32912-2.c: Fix for 16-bit targets.
--- a/gcc/testsuite/gcc.target/i386/pr59539-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr59539-1.c
@ -0,0 +1,16 @@
+/* PR target/59539 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+#include <immintrin.h>
+
+int
+foo (void *p1, void *p2)
+{
+  __m128i d1 = _mm_loadu_si128 ((__m128i *) p1);
+  __m128i d2 = _mm_loadu_si128 ((__m128i *) p2);
+  __m128i result = _mm_cmpeq_epi16 (d1, d2);
+  return _mm_movemask_epi8 (result);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */
--- a/gcc/testsuite/gcc.target/i386/pr59539-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr59539-2.c
@ -0,0 +1,16 @@
+/* PR target/59539 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+
+#include <immintrin.h>
+
+int
+foo (void *p1, void *p2)
+{
+  __m256i d1 = _mm256_loadu_si256 ((__m256i *) p1);
+  __m256i d2 = _mm256_loadu_si256 ((__m256i *) p2);
+  __m256i result = _mm256_cmpeq_epi16 (d1, d2);
+  return _mm256_movemask_epi8 (result);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */