re PR rtl-optimization/28982 (Incorrect reloading of automodification expressions)
gcc/ PR rtl-optimization/28982 * reload.c (find_reloads_address_1): Use RELOAD_OTHER for the index of a PRE_MODIFY or POST_MODIFY address. * reload1.c (inc_for_reload): Use find_replacement on the original base and index registers. gcc/testsuite/ PR rtl-optimization/28982 * gcc.c-torture/execute/pr28982a.c: New test. * gcc.c-torture/execute/pr28982b.c: Likewise. From-SVN: r116919
This commit is contained in:
parent
04df96dded
commit
4b7b277aff
@ -1,3 +1,11 @@
|
||||
2006-09-13 Richard Sandiford <richard@codesourcery.com>
|
||||
|
||||
PR rtl-optimization/28982
|
||||
* reload.c (find_reloads_address_1): Use RELOAD_OTHER for the
|
||||
index of a PRE_MODIFY or POST_MODIFY address.
|
||||
* reload1.c (inc_for_reload): Use find_replacement on the original
|
||||
base and index registers.
|
||||
|
||||
2006-09-12 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* doc/invoke.texi (mpreferred-stack-boundary): Remove exception
|
||||
|
12
gcc/reload.c
12
gcc/reload.c
@ -5541,12 +5541,18 @@ find_reloads_address_1 (enum machine_mode mode, rtx x, int context,
|
||||
/* Require index register (or constant). Let's just handle the
|
||||
register case in the meantime... If the target allows
|
||||
auto-modify by a constant then we could try replacing a pseudo
|
||||
register with its equivalent constant where applicable. */
|
||||
register with its equivalent constant where applicable.
|
||||
|
||||
If we later decide to reload the whole PRE_MODIFY or
|
||||
POST_MODIFY, inc_for_reload might clobber the reload register
|
||||
before reading the index. The index register might therefore
|
||||
need to live longer than a TYPE reload normally would, so be
|
||||
conservative and class it as RELOAD_OTHER. */
|
||||
if (REG_P (XEXP (op1, 1)))
|
||||
if (!REGNO_OK_FOR_INDEX_P (REGNO (XEXP (op1, 1))))
|
||||
find_reloads_address_1 (mode, XEXP (op1, 1), 1, code, SCRATCH,
|
||||
&XEXP (op1, 1), opnum, type, ind_levels,
|
||||
insn);
|
||||
&XEXP (op1, 1), opnum, RELOAD_OTHER,
|
||||
ind_levels, insn);
|
||||
|
||||
gcc_assert (REG_P (XEXP (op1, 0)));
|
||||
|
||||
|
@ -8177,7 +8177,7 @@ static rtx
|
||||
inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
|
||||
{
|
||||
/* REG or MEM to be copied and incremented. */
|
||||
rtx incloc = XEXP (value, 0);
|
||||
rtx incloc = find_replacement (&XEXP (value, 0));
|
||||
/* Nonzero if increment after copying. */
|
||||
int post = (GET_CODE (value) == POST_DEC || GET_CODE (value) == POST_INC
|
||||
|| GET_CODE (value) == POST_MODIFY);
|
||||
@ -8186,7 +8186,7 @@ inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
|
||||
rtx add_insn;
|
||||
int code;
|
||||
rtx store;
|
||||
rtx real_in = in == value ? XEXP (in, 0) : in;
|
||||
rtx real_in = in == value ? incloc : in;
|
||||
|
||||
/* No hard register is equivalent to this register after
|
||||
inc/dec operation. If REG_LAST_RELOAD_REG were nonzero,
|
||||
@ -8198,7 +8198,7 @@ inc_for_reload (rtx reloadreg, rtx in, rtx value, int inc_amount)
|
||||
if (GET_CODE (value) == PRE_MODIFY || GET_CODE (value) == POST_MODIFY)
|
||||
{
|
||||
gcc_assert (GET_CODE (XEXP (value, 1)) == PLUS);
|
||||
inc = XEXP (XEXP (value, 1), 1);
|
||||
inc = find_replacement (&XEXP (XEXP (value, 1), 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1,3 +1,9 @@
|
||||
2006-09-13 Richard Sandiford <richard@codesourcery.com>
|
||||
|
||||
PR rtl-optimization/28982
|
||||
* gcc.c-torture/execute/pr28982a.c: New test.
|
||||
* gcc.c-torture/execute/pr28982b.c: Likewise.
|
||||
|
||||
2006-09-12 Eric Christopher <echristo@apple.com>
|
||||
|
||||
* gcc.target/x86_64/abi/asm-support-darwin.s: New.
|
||||
|
65
gcc/testsuite/gcc.c-torture/execute/pr28982a.c
Normal file
65
gcc/testsuite/gcc.c-torture/execute/pr28982a.c
Normal file
@ -0,0 +1,65 @@
|
||||
/* PR rtl-optimization/28982. Function foo() does the equivalent of:
|
||||
|
||||
float tmp_results[NVARS];
|
||||
for (int i = 0; i < NVARS; i++)
|
||||
{
|
||||
int inc = incs[i];
|
||||
float *ptr = ptrs[i], result = 0;
|
||||
for (int j = 0; j < n; j++)
|
||||
result += *ptr, ptr += inc;
|
||||
tmp_results[i] = result;
|
||||
}
|
||||
memcpy (results, tmp_results, sizeof (results));
|
||||
|
||||
but without the outermost loop. The idea is to create high register
|
||||
pressure and ensure that some INC and PTR variables are spilled.
|
||||
|
||||
On ARM targets, sequences like "result += *ptr, ptr += inc" can
|
||||
usually be implemented using (mem (post_modify ...)), and we do
|
||||
indeed create such MEMs before reload for this testcase. However,
|
||||
(post_modify ...) is not a valid address for coprocessor loads, so
|
||||
for -mfloat-abi=softfp, reload reloads the POST_MODIFY into a base
|
||||
register. GCC did not deal correctly with cases where the base and
|
||||
index of the POST_MODIFY are themselves reloaded. */
|
||||
#define NITER 4
|
||||
#define NVARS 20
|
||||
#define MULTI(X) \
|
||||
X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
|
||||
X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
|
||||
|
||||
#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
|
||||
#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
|
||||
#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
|
||||
#define COPYOUT(INDEX) results[INDEX] = result##INDEX
|
||||
|
||||
float *ptrs[NVARS];
|
||||
float results[NVARS];
|
||||
int incs[NVARS];
|
||||
|
||||
void __attribute__((noinline))
|
||||
foo (int n)
|
||||
{
|
||||
int MULTI (DECLAREI);
|
||||
float MULTI (DECLAREF);
|
||||
while (n--)
|
||||
MULTI (LOOP);
|
||||
MULTI (COPYOUT);
|
||||
}
|
||||
|
||||
float input[NITER * NVARS];
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NVARS; i++)
|
||||
ptrs[i] = input + i, incs[i] = i;
|
||||
for (i = 0; i < NITER * NVARS; i++)
|
||||
input[i] = i;
|
||||
foo (NITER);
|
||||
for (i = 0; i < NVARS; i++)
|
||||
if (results[i] != i * NITER * (NITER + 1) / 2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
58
gcc/testsuite/gcc.c-torture/execute/pr28982b.c
Normal file
58
gcc/testsuite/gcc.c-torture/execute/pr28982b.c
Normal file
@ -0,0 +1,58 @@
|
||||
/* Like pr28982a.c, but with the spill slots outside the range of
|
||||
a single sp-based load on ARM. This test tests for cases where
|
||||
the addresses in the base and index reloads require further reloads. */
|
||||
#if defined(STACK_SIZE) && STACK_SIZE <= 0x80100
|
||||
int main (void) { return 0; }
|
||||
#else
|
||||
#define NITER 4
|
||||
#define NVARS 20
|
||||
#define MULTI(X) \
|
||||
X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
|
||||
X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
|
||||
|
||||
#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
|
||||
#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
|
||||
#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
|
||||
#define COPYOUT(INDEX) results[INDEX] = result##INDEX
|
||||
|
||||
float *ptrs[NVARS];
|
||||
float results[NVARS];
|
||||
int incs[NVARS];
|
||||
|
||||
struct big { int i[0x10000]; };
|
||||
void __attribute__((noinline))
|
||||
bar (struct big b)
|
||||
{
|
||||
incs[0] += b.i[0];
|
||||
}
|
||||
|
||||
void __attribute__((noinline))
|
||||
foo (int n)
|
||||
{
|
||||
struct big b = {};
|
||||
int MULTI (DECLAREI);
|
||||
float MULTI (DECLAREF);
|
||||
while (n--)
|
||||
MULTI (LOOP);
|
||||
MULTI (COPYOUT);
|
||||
bar (b);
|
||||
}
|
||||
|
||||
float input[NITER * NVARS];
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NVARS; i++)
|
||||
ptrs[i] = input + i, incs[i] = i;
|
||||
for (i = 0; i < NITER * NVARS; i++)
|
||||
input[i] = i;
|
||||
foo (NITER);
|
||||
for (i = 0; i < NVARS; i++)
|
||||
if (results[i] != i * NITER * (NITER + 1) / 2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user