[PR103302] skip multi-word pre-move clobber during lra

If we emit clobbers before multi-word moves during lra, we get confused if a copy ends up with input or output replaced with each other: the clobber then kills the previous set, and it gets deleted. This patch avoids emitting such clobbers when lra_in_progress. for gcc/ChangeLog PR target/103302 * expr.c (emit_move_multi_word): Skip clobber during lra. for gcc/testsuite/ChangeLog PR target/103302 * gcc.target/riscv/pr103302.c: New.
2021-12-08 23:37:14 -03:00 · 2021-12-08 23:37:14 -03:00 · 50e8b0c9bc
parent 2bff91f3b4
commit 50e8b0c9bc
2 changed files with 48 additions and 1 deletions
--- a/gcc/expr.c
+++ b/gcc/expr.c
@ -3929,7 +3929,7 @@ emit_move_multi_word (machine_mode mode, rtx x, rtx y)
     hard regs shouldn't appear here except as return values.
     We never want to emit such a clobber after reload.  */
  if (x != y
-      && ! (reload_in_progress || reload_completed)
+      && ! (lra_in_progress || reload_in_progress || reload_completed)
      && need_clobber != 0)
    emit_clobber (x);

--- a/gcc/testsuite/gcc.target/riscv/pr103302.c
+++ b/gcc/testsuite/gcc.target/riscv/pr103302.c
@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-Og -fharden-compares -fno-tree-dce -fno-tree-fre " } */
+
+typedef unsigned char u8;
+typedef unsigned char __attribute__((__vector_size__ (32))) v256u8;
+typedef unsigned short __attribute__((__vector_size__ (32))) v256u16;
+typedef unsigned short __attribute__((__vector_size__ (64))) v512u16;
+typedef unsigned int u32;
+typedef unsigned int __attribute__((__vector_size__ (4))) v512u32;
+typedef unsigned long long __attribute__((__vector_size__ (32))) v256u64;
+typedef unsigned long long __attribute__((__vector_size__ (64))) v512u64;
+typedef unsigned __int128 __attribute__((__vector_size__ (32))) v256u128;
+typedef unsigned __int128 __attribute__((__vector_size__ (64))) v512u128;
+
+v512u16 g;
+
+void
+foo0 (u8 u8_0, v256u16 v256u16_0, v512u16 v512u16_0, u32 u32_0, v512u32,
+      v256u64 v256u64_0, v512u64 v512u64_0, v256u128 v256u128_0,
+      v512u128 v512u128_0)
+{
+  u32_0 <= (v512u128) (v512u128_0 != u8_0);
+  v512u64 v512u64_1 =
+    __builtin_shufflevector (v256u64_0, v512u64_0, 7, 8, 0, 9, 5, 0, 3, 1);
+  g = v512u16_0;
+  (v256u8) v256u16_0 + (v256u8) v256u128_0;
+}
+
+int
+main (void)
+{
+  foo0 (40, (v256u16)
+	{
+	}, (v512u16)
+	{
+	}, 0, (v512u32)
+	{
+	}, (v256u64)
+	{
+	}, (v512u64)
+	{
+	}, (v256u128)
+	{
+	}, (v512u128)
+	{
+	});
+}