re PR rtl-optimization/69052 (Performance regression after r229402.)
PR tree-optimization/69052 * loop-invariant.c (canonicalize_address): New function. (inv_can_prop_to_addr_use): Check validity of address expression which is canonicalized by above function. gcc/testsuite/ChangeLog PR tree-optimization/69052 * gcc.target/i386/pr69052.c: New test. From-SVN: r233907
This commit is contained in:
parent
90a7a40b65
commit
192912db8a
@ -1,3 +1,10 @@
|
||||
2016-03-02 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/69052
|
||||
* loop-invariant.c (canonicalize_address): New function.
|
||||
(inv_can_prop_to_addr_use): Check validity of address expression
|
||||
which is canonicalized by above function.
|
||||
|
||||
2016-03-02 Alan Modra <amodra@gmail.com>
|
||||
|
||||
PR ipa/69990
|
||||
|
@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "cfgloop.h"
|
||||
#include "expr.h"
|
||||
#include "params.h"
|
||||
#include "rtl-iter.h"
|
||||
#include "dumpfile.h"
|
||||
|
||||
/* The data stored for the loop. */
|
||||
@ -754,6 +755,130 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
|
||||
return inv;
|
||||
}
|
||||
|
||||
/* Return a canonical version of X for the address, from the point of view,
|
||||
that all multiplications are represented as MULT instead of the multiply
|
||||
by a power of 2 being represented as ASHIFT.
|
||||
|
||||
Callers should prepare a copy of X because this function may modify it
|
||||
in place. */
|
||||
|
||||
static void
|
||||
canonicalize_address_mult (rtx x)
|
||||
{
|
||||
subrtx_var_iterator::array_type array;
|
||||
FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
|
||||
{
|
||||
rtx sub = *iter;
|
||||
|
||||
if (GET_CODE (sub) == ASHIFT
|
||||
&& CONST_INT_P (XEXP (sub, 1))
|
||||
&& INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (GET_MODE (sub))
|
||||
&& INTVAL (XEXP (sub, 1)) >= 0)
|
||||
{
|
||||
HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1));
|
||||
PUT_CODE (sub, MULT);
|
||||
XEXP (sub, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
|
||||
GET_MODE (sub));
|
||||
iter.skip_subrtxes ();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Maximum number of sub expressions in address. We set it to
|
||||
a small integer since it's unlikely to have a complicated
|
||||
address expression. */
|
||||
|
||||
#define MAX_CANON_ADDR_PARTS (5)
|
||||
|
||||
/* Collect sub expressions in address X with PLUS as the seperator.
|
||||
Sub expressions are stored in vector ADDR_PARTS. */
|
||||
|
||||
static void
|
||||
collect_address_parts (rtx x, vec<rtx> *addr_parts)
|
||||
{
|
||||
subrtx_var_iterator::array_type array;
|
||||
FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
|
||||
{
|
||||
rtx sub = *iter;
|
||||
|
||||
if (GET_CODE (sub) != PLUS)
|
||||
{
|
||||
addr_parts->safe_push (sub);
|
||||
iter.skip_subrtxes ();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compare function for sorting sub expressions X and Y based on
|
||||
precedence defined for communitive operations. */
|
||||
|
||||
static int
|
||||
compare_address_parts (const void *x, const void *y)
|
||||
{
|
||||
const rtx *rx = (const rtx *)x;
|
||||
const rtx *ry = (const rtx *)y;
|
||||
int px = commutative_operand_precedence (*rx);
|
||||
int py = commutative_operand_precedence (*ry);
|
||||
|
||||
return (py - px);
|
||||
}
|
||||
|
||||
/* Return a canonical version address for X by following steps:
|
||||
1) Rewrite ASHIFT into MULT recursively.
|
||||
2) Divide address into sub expressions with PLUS as the
|
||||
separator.
|
||||
3) Sort sub expressions according to precedence defined
|
||||
for communative operations.
|
||||
4) Simplify CONST_INT_P sub expressions.
|
||||
5) Create new canonicalized address and return.
|
||||
Callers should prepare a copy of X because this function may
|
||||
modify it in place. */
|
||||
|
||||
static rtx
|
||||
canonicalize_address (rtx x)
|
||||
{
|
||||
rtx res;
|
||||
unsigned int i, j;
|
||||
machine_mode mode = GET_MODE (x);
|
||||
auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
|
||||
|
||||
/* Rewrite ASHIFT into MULT. */
|
||||
canonicalize_address_mult (x);
|
||||
/* Divide address into sub expressions. */
|
||||
collect_address_parts (x, &addr_parts);
|
||||
/* Unlikely to have very complicated address. */
|
||||
if (addr_parts.length () < 2
|
||||
|| addr_parts.length () > MAX_CANON_ADDR_PARTS)
|
||||
return x;
|
||||
|
||||
/* Sort sub expressions according to canonicalization precedence. */
|
||||
addr_parts.qsort (compare_address_parts);
|
||||
|
||||
/* Simplify all constant int summary if possible. */
|
||||
for (i = 0; i < addr_parts.length (); i++)
|
||||
if (CONST_INT_P (addr_parts[i]))
|
||||
break;
|
||||
|
||||
for (j = i + 1; j < addr_parts.length (); j++)
|
||||
{
|
||||
gcc_assert (CONST_INT_P (addr_parts[j]));
|
||||
addr_parts[i] = simplify_gen_binary (PLUS, mode,
|
||||
addr_parts[i],
|
||||
addr_parts[j]);
|
||||
}
|
||||
|
||||
/* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */
|
||||
res = addr_parts[0];
|
||||
for (j = 1; j < i; j++)
|
||||
res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
|
||||
|
||||
/* Pickup the last CONST_INT_P sub expression. */
|
||||
if (i < addr_parts.length ())
|
||||
res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Given invariant DEF and its address USE, check if the corresponding
|
||||
invariant expr can be propagated into the use or not. */
|
||||
|
||||
@ -761,7 +886,7 @@ static bool
|
||||
inv_can_prop_to_addr_use (struct def *def, df_ref use)
|
||||
{
|
||||
struct invariant *inv;
|
||||
rtx *pos = DF_REF_REAL_LOC (use), def_set;
|
||||
rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
|
||||
rtx_insn *use_insn = DF_REF_INSN (use);
|
||||
rtx_insn *def_insn;
|
||||
bool ok;
|
||||
@ -778,6 +903,29 @@ inv_can_prop_to_addr_use (struct def *def, df_ref use)
|
||||
|
||||
validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
|
||||
ok = verify_changes (0);
|
||||
/* Try harder with canonicalization in address expression. */
|
||||
if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
|
||||
{
|
||||
rtx src, dest, mem = NULL_RTX;
|
||||
|
||||
src = SET_SRC (use_set);
|
||||
dest = SET_DEST (use_set);
|
||||
if (MEM_P (src))
|
||||
mem = src;
|
||||
else if (MEM_P (dest))
|
||||
mem = dest;
|
||||
|
||||
if (mem != NULL_RTX
|
||||
&& !memory_address_addr_space_p (GET_MODE (mem),
|
||||
XEXP (mem, 0),
|
||||
MEM_ADDR_SPACE (mem)))
|
||||
{
|
||||
rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
|
||||
if (memory_address_addr_space_p (GET_MODE (mem),
|
||||
addr, MEM_ADDR_SPACE (mem)))
|
||||
ok = true;
|
||||
}
|
||||
}
|
||||
cancel_changes (0);
|
||||
return ok;
|
||||
}
|
||||
|
@ -1,3 +1,8 @@
|
||||
2016-03-02 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/69052
|
||||
* gcc.target/i386/pr69052.c: New test.
|
||||
|
||||
2016-03-02 Alan Modra <amodra@gmail.com>
|
||||
|
||||
* gcc.dg/pr69990.c: New.
|
||||
|
54
gcc/testsuite/gcc.target/i386/pr69052.c
Normal file
54
gcc/testsuite/gcc.target/i386/pr69052.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target pie } */
|
||||
/* { dg-options "-O2 -fPIE -pie" } */
|
||||
|
||||
int look_nbits[256], loop_sym[256];
|
||||
const int ind[] = {
|
||||
0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
|
||||
12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
|
||||
35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
|
||||
58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
|
||||
};
|
||||
int out[256];
|
||||
extern void bar (int *, int *);
|
||||
void foo (int *l1, int *l2, int *v, int *v1, int *m1, int i)
|
||||
{
|
||||
int L = i + 1, b = 20;
|
||||
int result, k;
|
||||
|
||||
for (k = 1; k < 64; k++)
|
||||
{
|
||||
int look = (((L >> (b - 8))) & ((1 << 8) - 1));
|
||||
int nb = l1[look];
|
||||
int code;
|
||||
int r;
|
||||
|
||||
if (nb)
|
||||
{
|
||||
b -= nb;
|
||||
result = l2[look];
|
||||
}
|
||||
else
|
||||
{
|
||||
nb = 9;
|
||||
code = (((L >> (b -= nb))) & ((1 << nb) - 1));
|
||||
result = v[(code + v1[nb])];
|
||||
}
|
||||
r = result >> 4;
|
||||
result &= 15;
|
||||
if (result)
|
||||
{
|
||||
k += r;
|
||||
r = (((L >> (b -= result))) & ((1 << result) - 1));
|
||||
if (r < (1 << (result - 1)))
|
||||
result = r + (((-1) << result) + 1);
|
||||
else
|
||||
result = r;
|
||||
|
||||
out[ind[k]] = result;
|
||||
}
|
||||
bar (&L, &b);
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "leal\[ \t\]ind@GOTOFF\\(%\[^,\]*\\), %" { target ia32 } } } */
|
Loading…
Reference in New Issue
Block a user