ira: Consider modelling caller-save allocations as loop spills

If an allocno A in an inner loop L spans a call, a parent allocno AP
can choose to handle a call-clobbered/caller-saved hard register R
in one of two ways:

(1) save R before each call in L and restore R after each call
(2) spill R to memory throughout L

(2) can be cheaper than (1) in some cases, particularly if L does
not reference A.

Before the patch we always did (1).  The patch adds support for
picking (2) instead, when it seems cheaper.  It builds on the
earlier support for not propagating conflicts to parent allocnos.

gcc/
	PR rtl-optimization/98782
	* ira-int.h (ira_caller_save_cost): New function.
	(ira_caller_save_loop_spill_p): Likewise.
	* ira-build.c (ira_propagate_hard_reg_costs): Test whether it is
	cheaper to spill a call-clobbered register throughout a loop rather
	than spill it around each individual call.  If so, treat all
	call-clobbered registers as conflicts and...
	(propagate_allocno_info): ...do not propagate call information
	from the child to the parent.
	* ira-color.c (move_spill_restore): Update accordingly.
	* ira-costs.c (ira_tune_allocno_costs): Use ira_caller_save_cost.

gcc/testsuite/
	* gcc.target/aarch64/reg-alloc-3.c: New test.
This commit is contained in:
Richard Sandiford 2022-01-10 14:47:08 +00:00
parent 8e7a23728f
commit 01f3e6a40e
5 changed files with 129 additions and 18 deletions

View File

@ -2000,6 +2000,8 @@ ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a,
int spill_cost)
{
HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a);
if (ira_caller_save_loop_spill_p (parent_a, a, spill_cost))
conflicts |= ira_need_caller_save_regs (a);
conflicts &= ~ira_total_conflict_hard_regs (parent_a);
auto costs = ALLOCNO_HARD_REG_COSTS (a);
@ -2069,15 +2071,18 @@ propagate_allocno_info (void)
if (!ira_subloop_allocnos_can_differ_p (parent_a))
merge_hard_reg_conflicts (a, parent_a, true);
ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
ALLOCNO_CALLS_CROSSED_NUM (parent_a)
+= ALLOCNO_CALLS_CROSSED_NUM (a);
ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
+= ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a);
ALLOCNO_CROSSED_CALLS_ABIS (parent_a)
|= ALLOCNO_CROSSED_CALLS_ABIS (a);
ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a)
|= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a);
if (!ira_caller_save_loop_spill_p (parent_a, a, spill_cost))
{
ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
ALLOCNO_CALLS_CROSSED_NUM (parent_a)
+= ALLOCNO_CALLS_CROSSED_NUM (a);
ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
+= ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a);
ALLOCNO_CROSSED_CALLS_ABIS (parent_a)
|= ALLOCNO_CROSSED_CALLS_ABIS (a);
ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a)
|= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a);
}
ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a)
+= ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a);
aclass = ALLOCNO_CLASS (a);

View File

@ -3597,11 +3597,16 @@ move_spill_restore (void)
propagate_allocno_info will have propagated
the cost of spilling HARD_REGNO in SUBLOOP_NODE.
(ira_subloop_allocnos_can_differ_p must be true
in that case.) Otherwise, SPILL_COST acted as
a cap on the propagated register cost, in cases
where the allocations can differ. */
in that case.) If HARD_REGNO is a caller-saved
register, we might have modelled it in the same way.
Otherwise, SPILL_COST acted as a cap on the propagated
register cost, in cases where the allocations can differ. */
auto conflicts = ira_total_conflict_hard_regs (subloop_allocno);
if (TEST_HARD_REG_BIT (conflicts, hard_regno))
if (TEST_HARD_REG_BIT (conflicts, hard_regno)
|| (ira_need_caller_save_p (subloop_allocno, hard_regno)
&& ira_caller_save_loop_spill_p (a, subloop_allocno,
spill_cost)))
reg_cost = spill_cost;
else if (ira_subloop_allocnos_can_differ_p (a))
reg_cost = MIN (reg_cost, spill_cost);

View File

@ -2308,7 +2308,7 @@ ira_tune_allocno_costs (void)
{
int j, n, regno;
int cost, min_cost, *reg_costs;
enum reg_class aclass, rclass;
enum reg_class aclass;
machine_mode mode;
ira_allocno_t a;
ira_allocno_iterator ai;
@ -2347,12 +2347,9 @@ ira_tune_allocno_costs (void)
}
if (skip_p)
continue;
rclass = REGNO_REG_CLASS (regno);
cost = 0;
if (ira_need_caller_save_p (a, regno))
cost += (ALLOCNO_CALL_FREQ (a)
* (ira_memory_move_cost[mode][rclass][0]
+ ira_memory_move_cost[mode][rclass][1]));
cost += ira_caller_save_cost (a);
#ifdef IRA_HARD_REGNO_ADD_COST_MULTIPLIER
cost += ((ira_memory_move_cost[mode][rclass][0]
+ ira_memory_move_cost[mode][rclass][1])

View File

@ -1660,4 +1660,43 @@ ira_total_conflict_hard_regs (ira_allocno_t a)
return conflicts;
}
/* Return the cost of saving a caller-saved register before each call
in A's live range and restoring the same register after each call. */
inline int
ira_caller_save_cost (ira_allocno_t a)
{
auto mode = ALLOCNO_MODE (a);
auto rclass = ALLOCNO_CLASS (a);
return (ALLOCNO_CALL_FREQ (a)
* (ira_memory_move_cost[mode][rclass][0]
+ ira_memory_move_cost[mode][rclass][1]));
}
/* A and SUBLOOP_A are allocnos for the same pseudo register, with A's
loop immediately enclosing SUBLOOP_A's loop. If we allocate to A a
hard register R that is clobbered by a call in SUBLOOP_A, decide
which of the following approaches should be used for handling the
conflict:
(1) Spill R on entry to SUBLOOP_A's loop, assign memory to SUBLOOP_A,
and restore R on exit from SUBLOOP_A's loop.
(2) Spill R before each necessary call in SUBLOOP_A's live range and
restore R after each such call.
Return true if (1) is better than (2). SPILL_COST is the cost of
doing (1). */
inline bool
ira_caller_save_loop_spill_p (ira_allocno_t a, ira_allocno_t subloop_a,
int spill_cost)
{
if (!ira_subloop_allocnos_can_differ_p (a))
return false;
/* Calculate the cost of saving a call-clobbered register
before each call and restoring it afterwards. */
int call_cost = ira_caller_save_cost (subloop_a);
return call_cost && call_cost >= spill_cost;
}
#endif /* GCC_IRA_INT_H */

View File

@ -0,0 +1,65 @@
/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
#define PROB 0.1
struct L
{
int data;
volatile struct L *next;
volatile struct L *inner;
};
void ext();
/* The thing we're testing here is that the !head->inner path of the outer loop
body has no stack accesses. It's possible that we'll need to update this
pattern for unrelated code changes. but the test should be XFAILed rather
than changed if any new stack accesses creep into the !head->inner path. */
/*
** foo:
** ...
** ldr (w[0-9]+), \[(x[0-9]+)\]
** add (w[0-9]+), (?:\3, \1|\1, \3)
** ldr (x[0-9]+), \[\2, #?16\]
** str \3, \[\2\]
** ldr \2, \[\2, #?8\]
** cbn?z \4, .*
** ...
** ret
*/
void
foo (volatile struct L *head, int inc, double *ptr)
{
double d = *ptr;
while (head)
{
/* Clobber all call-preserved GPRs, so that the loop has to use
call-clobbered GPRs if it is to avoid spilling. */
asm volatile ("" :::
"x19", "x20", "x21", "x22", "x23",
"x24", "x25", "x26", "x27", "x28");
inc = head->data + inc;
volatile struct L *inner = head->inner;
head->data = inc;
head = head->next;
if (__builtin_expect_with_probability (inner != 0, 0, PROB))
for (int i = 0; i < 1000; ++i)
{
ext ();
/* Hack to create high register pressure, so that IRA doesn't
collapse this loop into the parent loop. */
d += 1;
asm volatile ("// foo" :::
"d0", "d1", "d2", "d3",
"d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11",
"d12", "d13", "d14", "d15",
"d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23",
"d24", "d25", "d26", "d27",
"d28", "d29", "d30", "d31");
}
}
*ptr = d;
}