RISC-V: Inhibit FP <--> int register moves via tune param
Under extreme register pressure, compiler can use FP <--> int
moves as a cheap alternate to spilling to memory.
This was seen with SPEC2017 FP benchmark 507.cactu:
ML_BSSN_Advect.cc:ML_BSSN_Advect_Body()
| fmv.d.x fa5,s9 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1
| .LVL325:
| ld s9,184(sp) # _12469, %sfp
| ...
| .LVL339:
| fmv.x.d s4,fa5 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1
|
The FMV instructions could be costlier (than stack spill) on certain
micro-architectures, thus this needs to be a per-cpu tunable
(default being to inhibit on all existing RV cpus).
Testsuite run with new test reports 10 failures without the fix
corresponding to the build variations of pr105666.c
| === gcc Summary ===
|
| # of expected passes 123318 (+10)
| # of unexpected failures 34 (-10)
| # of unexpected successes 4
| # of expected failures 780
| # of unresolved testcases 4
| # of unsupported tests 2796
gcc/ChangeLog:
* config/riscv/riscv.cc: (struct riscv_tune_param): Add
fmv_cost.
(rocket_tune_info): Add default fmv_cost 8.
(sifive_7_tune_info): Ditto.
(thead_c906_tune_info): Ditto.
(optimize_size_tune_info): Ditto.
(riscv_register_move_cost): Use fmv_cost for int<->fp moves.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105666.c: New test.
Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
(cherry picked from commit b646d7d279
)
This commit is contained in:
parent
682d238f32
commit
e919fae01b
@ -220,6 +220,7 @@ struct riscv_tune_param
|
||||
unsigned short issue_rate;
|
||||
unsigned short branch_cost;
|
||||
unsigned short memory_cost;
|
||||
unsigned short fmv_cost;
|
||||
bool slow_unaligned_access;
|
||||
};
|
||||
|
||||
@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
|
||||
1, /* issue_rate */
|
||||
3, /* branch_cost */
|
||||
5, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
};
|
||||
|
||||
@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
|
||||
2, /* issue_rate */
|
||||
4, /* branch_cost */
|
||||
3, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
};
|
||||
|
||||
@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
|
||||
1, /* issue_rate */
|
||||
3, /* branch_cost */
|
||||
5, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
false, /* slow_unaligned_access */
|
||||
};
|
||||
|
||||
@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
|
||||
1, /* issue_rate */
|
||||
1, /* branch_cost */
|
||||
2, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
false, /* slow_unaligned_access */
|
||||
};
|
||||
|
||||
@ -4737,6 +4742,10 @@ static int
|
||||
riscv_register_move_cost (machine_mode mode,
|
||||
reg_class_t from, reg_class_t to)
|
||||
{
|
||||
if ((from == FP_REGS && to == GR_REGS) ||
|
||||
(from == GR_REGS && to == FP_REGS))
|
||||
return tune_param->fmv_cost;
|
||||
|
||||
return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
|
||||
}
|
||||
|
||||
|
56
gcc/testsuite/gcc.target/riscv/pr105666.c
Normal file
56
gcc/testsuite/gcc.target/riscv/pr105666.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* Shamelessly plugged off gcc/testsuite/gcc.c-torture/execute/pr28982a.c.
|
||||
|
||||
The idea is to induce high register pressure for both int/fp registers
|
||||
so that they spill. By default FMV instructions would be used to stash
|
||||
int reg to a fp reg (and vice-versa) but that could be costlier than
|
||||
spilling to stack. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target hard_float } */
|
||||
/* { dg-options "-march=rv64g -ffast-math" } */
|
||||
|
||||
#define NITER 4
|
||||
#define NVARS 20
|
||||
#define MULTI(X) \
|
||||
X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
|
||||
X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
|
||||
|
||||
#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
|
||||
#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 5
|
||||
#define LOOP(INDEX) result##INDEX += result##INDEX * (*ptr##INDEX), ptr##INDEX += inc##INDEX
|
||||
#define COPYOUT(INDEX) results[INDEX] = result##INDEX
|
||||
|
||||
double *ptrs[NVARS];
|
||||
double results[NVARS];
|
||||
int incs[NVARS];
|
||||
|
||||
void __attribute__((noinline))
|
||||
foo (int n)
|
||||
{
|
||||
int MULTI (DECLAREI);
|
||||
double MULTI (DECLAREF);
|
||||
while (n--)
|
||||
MULTI (LOOP);
|
||||
MULTI (COPYOUT);
|
||||
}
|
||||
|
||||
double input[NITER * NVARS];
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NVARS; i++)
|
||||
ptrs[i] = input + i, incs[i] = i;
|
||||
for (i = 0; i < NITER * NVARS; i++)
|
||||
input[i] = i;
|
||||
foo (NITER);
|
||||
for (i = 0; i < NVARS; i++)
|
||||
if (results[i] != i * NITER * (NITER + 1) / 2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "\tfmv\\.d\\.x\t" } } */
|
||||
/* { dg-final { scan-assembler-not "\tfmv\\.x\\.d\t" } } */
|
Loading…
Reference in New Issue
Block a user