df: Record all definitions in DF_LR_BB_INFO->def [PR98863]

df_lr_bb_local_compute has:

      FOR_EACH_INSN_INFO_DEF (def, insn_info)
	/* If the def is to only part of the reg, it does
	   not kill the other defs that reach here.  */
	if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))

However, as noted in the comment in the patch and below, almost all
partial definitions have an associated use.  This means that the
confluence function:

  IN = (OUT & ~DEF) | USE

is unaffected by whether partial definitions are in DEF or not.

Even though the choice doesn't matter for the LR problem itself,
it's IMO much more convenient for consumers if DEF contains all the
definitions in the block.  The only pre-RTL-SSA code that tries to
consume DEF directly is shrink-wrap.c, which already has to work
around the incompleteness of the information:

	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
	     at -O1, just give up searching NEXT_BLOCK.  */

I hit the same problem when trying to fix the RTL-SSA part of PR98863.

This patch treats partial definitions as both a def and a use,
just like the df_ref records almost always do.

To show that partial definitions almost always have uses:

  DF_REF_CONDITIONAL:

    Added by:

      case COND_EXEC:
	df_defs_record (collection_rec, COND_EXEC_CODE (x),
			bb, insn_info, DF_REF_CONDITIONAL);
	break;

    Later, df_get_conditional_uses creates uses for all DF_REF_CONDITIONAL
    definitions.

  DF_REF_PARTIAL:

    In total, there are 4 locations at which we add partial definitions.

    Case 1:

      if (GET_CODE (dst) == STRICT_LOW_PART)
	{
	  flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL | DF_REF_STRICT_LOW_PART;

	  loc = &XEXP (dst, 0);
	  dst = *loc;
	}

    Corresponding use:

      case STRICT_LOW_PART:
	{
	  rtx *temp = &XEXP (dst, 0);
	  /* A strict_low_part uses the whole REG and not just the
	   SUBREG.  */
	  dst = XEXP (dst, 0);
	  df_uses_record (collection_rec,
			  (GET_CODE (dst) == SUBREG) ? &SUBREG_REG (dst) : temp,
			  DF_REF_REG_USE, bb, insn_info,
			  DF_REF_READ_WRITE | DF_REF_STRICT_LOW_PART);
	}
	break;

    Case 2:

      if (GET_CODE (dst) == ZERO_EXTRACT)
	{
	  flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL | DF_REF_ZERO_EXTRACT;

	  loc = &XEXP (dst, 0);
	  dst = *loc;
	}

    Corresponding use:

      case ZERO_EXTRACT:
	{
	  df_uses_record (collection_rec, &XEXP (dst, 1),
			  DF_REF_REG_USE, bb, insn_info, flags);
	  df_uses_record (collection_rec, &XEXP (dst, 2),
			  DF_REF_REG_USE, bb, insn_info, flags);
	  if (GET_CODE (XEXP (dst,0)) == MEM)
	    df_uses_record (collection_rec, &XEXP (dst, 0),
			    DF_REF_REG_USE, bb, insn_info,
			    flags);
	  else
	    df_uses_record (collection_rec, &XEXP (dst, 0),
			    DF_REF_REG_USE, bb, insn_info,
			    DF_REF_READ_WRITE | DF_REF_ZERO_EXTRACT);
----------------------------^^^^^^^^^^^^^^^^^
	}
	break;

    Case 3:

      else if (GET_CODE (dst) == SUBREG && REG_P (SUBREG_REG (dst)))
	{
	  if (read_modify_subreg_p (dst))
	    flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL;

	  flags |= DF_REF_SUBREG;

	  df_ref_record (DF_REF_REGULAR, collection_rec,
			 dst, loc, bb, insn_info, DF_REF_REG_DEF, flags);
	}

    Corresponding use:

      case SUBREG:
	if (read_modify_subreg_p (dst))
	  {
	    df_uses_record (collection_rec, &SUBREG_REG (dst),
			    DF_REF_REG_USE, bb, insn_info,
			    flags | DF_REF_READ_WRITE | DF_REF_SUBREG);
	    break;
	  }

    Case 4:

      /*  If this is a multiword hardreg, we create some extra
	  datastructures that will enable us to easily build REG_DEAD
	  and REG_UNUSED notes.  */
      if (collection_rec
	  && (endregno != regno + 1) && insn_info)
	{
	  /* Sets to a subreg of a multiword register are partial.
	     Sets to a non-subreg of a multiword register are not.  */
	  if (GET_CODE (reg) == SUBREG)
	    ref_flags |= DF_REF_PARTIAL;
	  ref_flags |= DF_REF_MW_HARDREG;

    Corresponding use:

      None.  However, this case should be rare to non-existent on most
      targets, and the current handling seems suspect.  See the comment
      in the patch for more details.

gcc/
	* df-problems.c (df_lr_bb_local_compute): Treat partial definitions
	as read-modify operations.

gcc/testsuite/
	* gcc.dg/rtl/aarch64/multi-subreg-1.c: New test.
This commit is contained in:
Richard Sandiford 2021-02-12 15:54:48 +00:00
parent b7210405ed
commit f60226fd72
2 changed files with 66 additions and 7 deletions

View File

@ -842,14 +842,54 @@ df_lr_bb_local_compute (unsigned int bb_index)
df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
FOR_EACH_INSN_INFO_DEF (def, insn_info)
/* If the def is to only part of the reg, it does
not kill the other defs that reach here. */
if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
{
unsigned int dregno = DF_REF_REGNO (def);
bitmap_set_bit (&bb_info->def, dregno);
{
/* If the definition is to only part of the register, it will
usually have a corresponding use. For example, stores to one
word of a multiword register R have both a use and a partial
definition of R.
In those cases, the LR confluence function:
IN = (OUT & ~DEF) | USE
is unaffected by whether we count the partial definition or not.
However, it's more convenient for consumers if DEF contains
*all* the registers defined in a block.
The only current case in which we record a partial definition
without a corresponding use is if the destination is the
multi-register subreg of a hard register. An artificial
example of this is:
(set (subreg:TI (reg:V8HI x0) 0) (const_int -1))
on AArch64. This is described as a DF_REF_PARTIAL
definition of x0 and x1 with no corresponding uses.
In previous versions of GCC, the instruction had no
effect on LR (that is, LR acted as though the instruction
didn't exist).
It seems suspect that this case is treated differently.
Either the instruction should be a full definition of x0 and x1,
or the definition should be treated in the same way as other
partial definitions, such as strict_lowparts or subregs that
satisfy read_modify_subreg_p.
Fortunately, multi-register subregs of hard registers should
be rare. They should be folded into a plain REG if the target
allows that (as AArch64 does for example above).
Here we treat the cases alike by forcing a use even in the rare
case that no DF_REF_REG_USE is recorded. That is, we model all
partial definitions as both a use and a definition of the
register. */
unsigned int dregno = DF_REF_REGNO (def);
bitmap_set_bit (&bb_info->def, dregno);
if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
bitmap_set_bit (&bb_info->use, dregno);
else
bitmap_clear_bit (&bb_info->use, dregno);
}
}
FOR_EACH_INSN_INFO_USE (use, insn_info)
/* Add use to set of uses in this BB. */

View File

@ -0,0 +1,19 @@
/* { dg-additional-options "-O -fdump-rtl-cse1-all" } */
__int128 __RTL (startwith ("vregs")) foo (void)
{
(function "foo"
(insn-chain
(block 2
(edge-from entry (flags "FALLTHRU"))
(cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
(cnote 2 NOTE_INSN_FUNCTION_BEG)
(cinsn 3 (set (subreg:TI (reg:V8HI x0) 0) (const_int -1)))
(edge-to exit (flags "FALLTHRU"))
)
)
(crtl (return_rtx (reg/i:TI x0)))
)
}
/* { dg-final { scan-rtl-dump {(?n)lr *def.*\[x0\].*\[x1\]} cse1 } } */