diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ea6a6bbb075..0eab63e51a5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2006-09-13 Richard Sandiford + + PR rtl-optimization/28982 + * reload.c (find_reloads_address_1): Use RELOAD_OTHER for the + index of a PRE_MODIFY or POST_MODIFY address. + * reload1.c (inc_for_reload): Use find_replacement on the original + base and index registers. + 2006-09-12 H.J. Lu * doc/invoke.texi (mpreferred-stack-boundary): Remove exception diff --git a/gcc/reload.c b/gcc/reload.c index e1a4abc6953..1f1bc23020b 100644 --- a/gcc/reload.c +++ b/gcc/reload.c @@ -5541,12 +5541,18 @@ find_reloads_address_1 (enum machine_mode mode, rtx x, int context, /* Require index register (or constant). Let's just handle the register case in the meantime... If the target allows auto-modify by a constant then we could try replacing a pseudo - register with its equivalent constant where applicable. */ + register with its equivalent constant where applicable. + + If we later decide to reload the whole PRE_MODIFY or + POST_MODIFY, inc_for_reload might clobber the reload register + before reading the index. The index register might therefore + need to live longer than a TYPE reload normally would, so be + conservative and class it as RELOAD_OTHER. */ if (REG_P (XEXP (op1, 1))) if (!REGNO_OK_FOR_INDEX_P (REGNO (XEXP (op1, 1)))) find_reloads_address_1 (mode, XEXP (op1, 1), 1, code, SCRATCH, - &XEXP (op1, 1), opnum, type, ind_levels, - insn); + &XEXP (op1, 1), opnum, RELOAD_OTHER, + ind_levels, insn); gcc_assert (REG_P (XEXP (op1, 0))); diff --git a/gcc/reload1.c b/gcc/reload1.c index 8dd00184f57..04f64488566 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -8177,7 +8177,7 @@ static rtx inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount) { /* REG or MEM to be copied and incremented. */ - rtx incloc = XEXP (value, 0); + rtx incloc = find_replacement (&XEXP (value, 0)); /* Nonzero if increment after copying. */ int post = (GET_CODE (value) == POST_DEC || GET_CODE (value) == POST_INC || GET_CODE (value) == POST_MODIFY); @@ -8186,7 +8186,7 @@ inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount) rtx add_insn; int code; rtx store; - rtx real_in = in == value ? XEXP (in, 0) : in; + rtx real_in = in == value ? incloc : in; /* No hard register is equivalent to this register after inc/dec operation. If REG_LAST_RELOAD_REG were nonzero, @@ -8198,7 +8198,7 @@ inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount) if (GET_CODE (value) == PRE_MODIFY || GET_CODE (value) == POST_MODIFY) { gcc_assert (GET_CODE (XEXP (value, 1)) == PLUS); - inc = XEXP (XEXP (value, 1), 1); + inc = find_replacement (&XEXP (XEXP (value, 1), 1)); } else { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9930647a684..f9de5f24209 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2006-09-13 Richard Sandiford + + PR rtl-optimization/28982 + * gcc.c-torture/execute/pr28982a.c: New test. + * gcc.c-torture/execute/pr28982b.c: Likewise. + 2006-09-12 Eric Christopher * gcc.target/x86_64/abi/asm-support-darwin.s: New. diff --git a/gcc/testsuite/gcc.c-torture/execute/pr28982a.c b/gcc/testsuite/gcc.c-torture/execute/pr28982a.c new file mode 100644 index 00000000000..5660a8d3aff --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr28982a.c @@ -0,0 +1,65 @@ +/* PR rtl-optimization/28982. Function foo() does the equivalent of: + + float tmp_results[NVARS]; + for (int i = 0; i < NVARS; i++) + { + int inc = incs[i]; + float *ptr = ptrs[i], result = 0; + for (int j = 0; j < n; j++) + result += *ptr, ptr += inc; + tmp_results[i] = result; + } + memcpy (results, tmp_results, sizeof (results)); + + but without the outermost loop. The idea is to create high register + pressure and ensure that some INC and PTR variables are spilled. + + On ARM targets, sequences like "result += *ptr, ptr += inc" can + usually be implemented using (mem (post_modify ...)), and we do + indeed create such MEMs before reload for this testcase. However, + (post_modify ...) is not a valid address for coprocessor loads, so + for -mfloat-abi=softfp, reload reloads the POST_MODIFY into a base + register. GCC did not deal correctly with cases where the base and + index of the POST_MODIFY are themselves reloaded. */ +#define NITER 4 +#define NVARS 20 +#define MULTI(X) \ + X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \ + X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19) + +#define DECLAREI(INDEX) inc##INDEX = incs[INDEX] +#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0 +#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX +#define COPYOUT(INDEX) results[INDEX] = result##INDEX + +float *ptrs[NVARS]; +float results[NVARS]; +int incs[NVARS]; + +void __attribute__((noinline)) +foo (int n) +{ + int MULTI (DECLAREI); + float MULTI (DECLAREF); + while (n--) + MULTI (LOOP); + MULTI (COPYOUT); +} + +float input[NITER * NVARS]; + +int +main (void) +{ + int i; + + for (i = 0; i < NVARS; i++) + ptrs[i] = input + i, incs[i] = i; + for (i = 0; i < NITER * NVARS; i++) + input[i] = i; + foo (NITER); + for (i = 0; i < NVARS; i++) + if (results[i] != i * NITER * (NITER + 1) / 2) + return 1; + return 0; +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr28982b.c b/gcc/testsuite/gcc.c-torture/execute/pr28982b.c new file mode 100644 index 00000000000..3f9f5bafce5 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr28982b.c @@ -0,0 +1,58 @@ +/* Like pr28982a.c, but with the spill slots outside the range of + a single sp-based load on ARM. This test tests for cases where + the addresses in the base and index reloads require further reloads. */ +#if defined(STACK_SIZE) && STACK_SIZE <= 0x80100 +int main (void) { return 0; } +#else +#define NITER 4 +#define NVARS 20 +#define MULTI(X) \ + X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \ + X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19) + +#define DECLAREI(INDEX) inc##INDEX = incs[INDEX] +#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0 +#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX +#define COPYOUT(INDEX) results[INDEX] = result##INDEX + +float *ptrs[NVARS]; +float results[NVARS]; +int incs[NVARS]; + +struct big { int i[0x10000]; }; +void __attribute__((noinline)) +bar (struct big b) +{ + incs[0] += b.i[0]; +} + +void __attribute__((noinline)) +foo (int n) +{ + struct big b = {}; + int MULTI (DECLAREI); + float MULTI (DECLAREF); + while (n--) + MULTI (LOOP); + MULTI (COPYOUT); + bar (b); +} + +float input[NITER * NVARS]; + +int +main (void) +{ + int i; + + for (i = 0; i < NVARS; i++) + ptrs[i] = input + i, incs[i] = i; + for (i = 0; i < NITER * NVARS; i++) + input[i] = i; + foo (NITER); + for (i = 0; i < NVARS; i++) + if (results[i] != i * NITER * (NITER + 1) / 2) + return 1; + return 0; +} +#endif