re PR rtl-optimization/28982 (Incorrect reloading of automodification expressions)

gcc/
	PR rtl-optimization/28982
	* reload.c (find_reloads_address_1): Use RELOAD_OTHER for the
	index of a PRE_MODIFY or POST_MODIFY address.
	* reload1.c (inc_for_reload): Use find_replacement on the original
	base and index registers.

gcc/testsuite/
	PR rtl-optimization/28982
	* gcc.c-torture/execute/pr28982a.c: New test.
	* gcc.c-torture/execute/pr28982b.c: Likewise.

From-SVN: r116919
This commit is contained in:
Richard Sandiford 2006-09-13 06:30:59 +00:00 committed by Richard Sandiford
parent 04df96dded
commit 4b7b277aff
6 changed files with 149 additions and 6 deletions

View File

@ -1,3 +1,11 @@
2006-09-13 Richard Sandiford <richard@codesourcery.com>
PR rtl-optimization/28982
* reload.c (find_reloads_address_1): Use RELOAD_OTHER for the
index of a PRE_MODIFY or POST_MODIFY address.
* reload1.c (inc_for_reload): Use find_replacement on the original
base and index registers.
2006-09-12 H.J. Lu <hongjiu.lu@intel.com>
* doc/invoke.texi (mpreferred-stack-boundary): Remove exception

View File

@ -5541,12 +5541,18 @@ find_reloads_address_1 (enum machine_mode mode, rtx x, int context,
/* Require index register (or constant). Let's just handle the
register case in the meantime... If the target allows
auto-modify by a constant then we could try replacing a pseudo
register with its equivalent constant where applicable. */
register with its equivalent constant where applicable.
If we later decide to reload the whole PRE_MODIFY or
POST_MODIFY, inc_for_reload might clobber the reload register
before reading the index. The index register might therefore
need to live longer than a TYPE reload normally would, so be
conservative and class it as RELOAD_OTHER. */
if (REG_P (XEXP (op1, 1)))
if (!REGNO_OK_FOR_INDEX_P (REGNO (XEXP (op1, 1))))
find_reloads_address_1 (mode, XEXP (op1, 1), 1, code, SCRATCH,
&XEXP (op1, 1), opnum, type, ind_levels,
insn);
&XEXP (op1, 1), opnum, RELOAD_OTHER,
ind_levels, insn);
gcc_assert (REG_P (XEXP (op1, 0)));

View File

@ -8177,7 +8177,7 @@ static rtx
inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
{
/* REG or MEM to be copied and incremented. */
rtx incloc = XEXP (value, 0);
rtx incloc = find_replacement (&XEXP (value, 0));
/* Nonzero if increment after copying. */
int post = (GET_CODE (value) == POST_DEC || GET_CODE (value) == POST_INC
|| GET_CODE (value) == POST_MODIFY);
@ -8186,7 +8186,7 @@ inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
rtx add_insn;
int code;
rtx store;
rtx real_in = in == value ? XEXP (in, 0) : in;
rtx real_in = in == value ? incloc : in;
/* No hard register is equivalent to this register after
inc/dec operation. If REG_LAST_RELOAD_REG were nonzero,
@ -8198,7 +8198,7 @@ inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
if (GET_CODE (value) == PRE_MODIFY || GET_CODE (value) == POST_MODIFY)
{
gcc_assert (GET_CODE (XEXP (value, 1)) == PLUS);
inc = XEXP (XEXP (value, 1), 1);
inc = find_replacement (&XEXP (XEXP (value, 1), 1));
}
else
{

View File

@ -1,3 +1,9 @@
2006-09-13 Richard Sandiford <richard@codesourcery.com>
PR rtl-optimization/28982
* gcc.c-torture/execute/pr28982a.c: New test.
* gcc.c-torture/execute/pr28982b.c: Likewise.
2006-09-12 Eric Christopher <echristo@apple.com>
* gcc.target/x86_64/abi/asm-support-darwin.s: New.

View File

@ -0,0 +1,65 @@
/* PR rtl-optimization/28982. Function foo() does the equivalent of:
float tmp_results[NVARS];
for (int i = 0; i < NVARS; i++)
{
int inc = incs[i];
float *ptr = ptrs[i], result = 0;
for (int j = 0; j < n; j++)
result += *ptr, ptr += inc;
tmp_results[i] = result;
}
memcpy (results, tmp_results, sizeof (results));
but without the outermost loop. The idea is to create high register
pressure and ensure that some INC and PTR variables are spilled.
On ARM targets, sequences like "result += *ptr, ptr += inc" can
usually be implemented using (mem (post_modify ...)), and we do
indeed create such MEMs before reload for this testcase. However,
(post_modify ...) is not a valid address for coprocessor loads, so
for -mfloat-abi=softfp, reload reloads the POST_MODIFY into a base
register. GCC did not deal correctly with cases where the base and
index of the POST_MODIFY are themselves reloaded. */
#define NITER 4
#define NVARS 20
#define MULTI(X) \
X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
#define COPYOUT(INDEX) results[INDEX] = result##INDEX
float *ptrs[NVARS];
float results[NVARS];
int incs[NVARS];
void __attribute__((noinline))
foo (int n)
{
int MULTI (DECLAREI);
float MULTI (DECLAREF);
while (n--)
MULTI (LOOP);
MULTI (COPYOUT);
}
float input[NITER * NVARS];
int
main (void)
{
int i;
for (i = 0; i < NVARS; i++)
ptrs[i] = input + i, incs[i] = i;
for (i = 0; i < NITER * NVARS; i++)
input[i] = i;
foo (NITER);
for (i = 0; i < NVARS; i++)
if (results[i] != i * NITER * (NITER + 1) / 2)
return 1;
return 0;
}

View File

@ -0,0 +1,58 @@
/* Like pr28982a.c, but with the spill slots outside the range of
a single sp-based load on ARM. This test tests for cases where
the addresses in the base and index reloads require further reloads. */
#if defined(STACK_SIZE) && STACK_SIZE <= 0x80100
int main (void) { return 0; }
#else
#define NITER 4
#define NVARS 20
#define MULTI(X) \
X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
#define COPYOUT(INDEX) results[INDEX] = result##INDEX
float *ptrs[NVARS];
float results[NVARS];
int incs[NVARS];
struct big { int i[0x10000]; };
void __attribute__((noinline))
bar (struct big b)
{
incs[0] += b.i[0];
}
void __attribute__((noinline))
foo (int n)
{
struct big b = {};
int MULTI (DECLAREI);
float MULTI (DECLAREF);
while (n--)
MULTI (LOOP);
MULTI (COPYOUT);
bar (b);
}
float input[NITER * NVARS];
int
main (void)
{
int i;
for (i = 0; i < NVARS; i++)
ptrs[i] = input + i, incs[i] = i;
for (i = 0; i < NITER * NVARS; i++)
input[i] = i;
foo (NITER);
for (i = 0; i < NVARS; i++)
if (results[i] != i * NITER * (NITER + 1) / 2)
return 1;
return 0;
}
#endif