PR 69577: Invalid RA of destination subregs

In PR 69577 we have:

      A: (set (reg:V2TI X) ...)
      B: (set (subreg:TI (reg:V2TI X) 0) ...)

X gets allocated to an AVX register, as usual for V2TI.  The problem is
that the movti for B doesn't then preserve the other half of X, even
though the subreg semantics are supposed to guarantee that.

If instead the same value had been set by:

      A': (set (subreg:TI (reg:V2TI X) 16) ...)
      B: (set (subreg:TI (reg:V2TI X) 0) ...)

the subreg in A' would have prevented the use of AVX registers for X,
since you can't directly access the high part.

IMO these are really the same thing.  An alternative way to view it
is that the original sequence is equivalent to:

      A: (set (reg:V2TI X) ...)
      B1: (set (subreg:TI (reg:V2TI X) 0) ...)
      B2: (set (subreg:TI (reg:V2TI X) 16) (subreg:TI (reg:V2TI X) 16))

in which B2 is a no-op and therefore implicit.  The handling ought
to be the same regardless of whether there is an rtl insn that
explicitly assigns to (subreg:TI (reg:V2TI X) 16).

This patch implements that idea.  Hopefully the comments explain
what's going on.

Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabihf.

gcc/
	PR rtl-optimization/69577
	* reginfo.c (record_subregs_of_mode): Add a partial_def parameter.
	(find_subregs_of_mode): Update accordingly.  Iterate over partial
	definitions.

gcc/testsuite/
	PR rtl-optimization/69577
	* gcc.target/i386/pr69577.c: New test.

From-SVN: r233143
This commit is contained in:
Richard Sandiford 2016-02-04 15:01:15 +00:00 committed by Richard Sandiford
parent 1d10863481
commit 2692b5c857
4 changed files with 83 additions and 5 deletions

View File

@ -1,3 +1,10 @@
2016-02-04 Richard Sandiford <richard.sandiford@arm.com>
PR rtl-optimization/69577
* reginfo.c (record_subregs_of_mode): Add a partial_def parameter.
(find_subregs_of_mode): Update accordingly. Iterate over partial
definitions.
2016-02-04 Alan Lawrence <alan.lawrence@arm.com>
* config/arm/arm-protos.h (neon_reinterpret): Remove.

View File

@ -1244,8 +1244,16 @@ simplifiable_subregs (const subreg_shape &shape)
static HARD_REG_SET **valid_mode_changes;
static obstack valid_mode_changes_obstack;
/* Restrict the choice of register for SUBREG_REG (SUBREG) based
on information about SUBREG.
If PARTIAL_DEF, SUBREG is a partial definition of a multipart inner
register and we want to ensure that the other parts of the inner
register are correctly preserved. If !PARTIAL_DEF we need to
ensure that SUBREG itself can be formed. */
static void
record_subregs_of_mode (rtx subreg)
record_subregs_of_mode (rtx subreg, bool partial_def)
{
unsigned int regno;
@ -1256,15 +1264,41 @@ record_subregs_of_mode (rtx subreg)
if (regno < FIRST_PSEUDO_REGISTER)
return;
subreg_shape shape (shape_of_subreg (subreg));
if (partial_def)
{
/* The number of independently-accessible SHAPE.outer_mode values
in SHAPE.inner_mode is GET_MODE_SIZE (SHAPE.inner_mode) / SIZE.
We need to check that the assignment will preserve all the other
SIZE-byte chunks in the inner register besides the one that
includes SUBREG.
In practice it is enough to check whether an equivalent
SHAPE.inner_mode value in an adjacent SIZE-byte chunk can be formed.
If the underlying registers are small enough, both subregs will
be valid. If the underlying registers are too large, one of the
subregs will be invalid.
This relies on the fact that we've already been passed
SUBREG with PARTIAL_DEF set to false. */
unsigned int size = MAX (REGMODE_NATURAL_SIZE (shape.inner_mode),
GET_MODE_SIZE (shape.outer_mode));
gcc_checking_assert (size < GET_MODE_SIZE (shape.inner_mode));
if (shape.offset >= size)
shape.offset -= size;
else
shape.offset += size;
}
if (valid_mode_changes[regno])
AND_HARD_REG_SET (*valid_mode_changes[regno],
simplifiable_subregs (shape_of_subreg (subreg)));
simplifiable_subregs (shape));
else
{
valid_mode_changes[regno]
= XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET);
COPY_HARD_REG_SET (*valid_mode_changes[regno],
simplifiable_subregs (shape_of_subreg (subreg)));
simplifiable_subregs (shape));
}
}
@ -1277,7 +1311,7 @@ find_subregs_of_mode (rtx x)
int i;
if (code == SUBREG)
record_subregs_of_mode (x);
record_subregs_of_mode (x, false);
/* Time for some deep diving. */
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
@ -1305,7 +1339,14 @@ init_subregs_of_mode (void)
FOR_EACH_BB_FN (bb, cfun)
FOR_BB_INSNS (bb, insn)
if (NONDEBUG_INSN_P (insn))
find_subregs_of_mode (PATTERN (insn));
{
find_subregs_of_mode (PATTERN (insn));
df_ref def;
FOR_EACH_INSN_DEF (def, insn)
if (DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL)
&& df_read_modify_subreg_p (DF_REF_REG (def)))
record_subregs_of_mode (DF_REF_REG (def), true);
}
}
const HARD_REG_SET *

View File

@ -1,3 +1,8 @@
2016-02-04 Richard Sandiford <richard.sandiford@arm.com>
PR rtl-optimization/69577
* gcc.target/i386/pr69577.c: New test.
2016-02-04 Martin Liska <mliska@suse.cz>
* g++.dg/asan/pr69276.C: New test.

View File

@ -0,0 +1,25 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-require-effective-target int128 } */
/* { dg-options "-O -fno-forward-propagate -fno-split-wide-types -mavx" } */
typedef unsigned int u32;
typedef unsigned __int128 u128;
typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32)));
u128 __attribute__ ((noinline, noclone))
foo (u32 u32_0, v32u128 v32u128_0)
{
v32u128_0[0] >>= u32_0;
v32u128_0 += (v32u128) {u32_0, 0};
return u32_0 + v32u128_0[0] + v32u128_0[1];
}
int
main()
{
u128 x = foo (1, (v32u128) {1, 4});
if (x != 6)
__builtin_abort ();
return 0;
}