Run pass_sink_code once more before store_merging

Gimple sink code pass runs quite early, there may be some new
oppertunities exposed by later gimple optmization passes, this patch
runs the sink code pass once more before store_merging.  For detailed
discussion, please refer to:
https://gcc.gnu.org/pipermail/gcc-patches/2020-December/562352.html

Tested the SPEC2017 performance on P8LE, 544.nab_r is improved
by 2.43%, but no big changes to other cases, GEOMEAN is improved quite
small with 0.25%.

gcc/ChangeLog:

2021-05-18  Xionghu Luo  <luoxhu@linux.ibm.com>

	* passes.def: Add sink_code pass before store_merging.
	* tree-ssa-sink.c (pass_sink_code:clone): New.

gcc/testsuite/ChangeLog:

2021-05-18  Xionghu Luo  <luoxhu@linux.ibm.com>

	* gcc.dg/tree-ssa/ssa-sink-1.c: Adjust.
	* gcc.dg/tree-ssa/ssa-sink-2.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-3.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-4.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-5.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-6.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-7.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-8.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-9.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-10.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-13.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-14.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-16.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-17.c: Ditto.
	* gcc.dg/tree-ssa/ssa-sink-18.c: New.
This commit is contained in:
Xionghu Luo 2021-05-18 21:34:18 -05:00
parent 39ed6a88c7
commit de56f95afa
17 changed files with 229 additions and 30 deletions

View File

@ -348,6 +348,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_phiopt, false /* early_p */);
NEXT_PASS (pass_fold_builtins);
NEXT_PASS (pass_optimize_widening_mul);
NEXT_PASS (pass_sink_code);
NEXT_PASS (pass_store_merging);
NEXT_PASS (pass_tail_calls);
/* If DCE is not run before checking for uninitialized uses,

View File

@ -7,4 +7,4 @@ foo (int a, int b, int c)
return c ? x : a;
}
/* We should sink the x = a * b calculation into the branch that returns x. */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */

View File

@ -16,4 +16,4 @@ void foo (void)
}
}
/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink1" } } */

View File

@ -21,5 +21,5 @@ void test ()
/* We should sink/merge all stores and end up with a single BB. */
/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink" } } */
/* { dg-final { scan-tree-dump-times "<bb " 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink1" } } */
/* { dg-final { scan-tree-dump-times "<bb " 1 "sink1" } } */

View File

@ -13,5 +13,5 @@ void foo (int b)
/* We should have sunk the store and inserted a PHI to merge the
stored values. */
/* { dg-final { scan-tree-dump-times " = PHI" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "x = " 1 "sink" } } */
/* { dg-final { scan-tree-dump-times " = PHI" 1 "sink1" } } */
/* { dg-final { scan-tree-dump-times "x = " 1 "sink1" } } */

View File

@ -10,5 +10,5 @@ int f(int n)
return j;
}
/* { dg-final { scan-tree-dump "Sinking j_. = __builtin_ffs" "sink" } } */
/* { dg-final { scan-tree-dump "Sinking j_. = __builtin_ffs" "sink1" } } */
/* { dg-final { scan-tree-dump "return 2;" "optimized" } } */

View File

@ -12,4 +12,4 @@ int my_f(int a, int b)
}
/* We should sink the call to pure_f to the if block. */
/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink" } } */
/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink1" } } */

View File

@ -0,0 +1,212 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-sink-stats" } */
#include <stdint.h>
#define HLOG 16
#define MAX_LIT (1 << 5)
typedef const uint8_t *LZF_HSLOT;
typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
int
compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
{
LZF_STATE htab;
uint8_t *ip = in_data;
uint8_t *op = out_data;
uint8_t *in_end = ip + in_len;
uint8_t *out_end = op + out_len;
uint8_t *ref;
unsigned long off;
unsigned int hval;
int lit;
if (!in_len || !out_len)
return 0;
lit = 0;
op++;
hval = (((ip[0]) << 8) | ip[1]);
while (ip < in_end - 2)
{
uint8_t *hslot;
hval = (((hval) << 8) | ip[2]);
hslot = (uint8_t*)(htab + (((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1)));
ref = *hslot + in_data;
*hslot = ip - in_data;
if (1 && (off = ip - ref - 1) < (1 << 13) && ref > in_data
&& ref[2] == ip[2]
&& ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0]))
{
unsigned int len = 2;
unsigned int maxlen = in_end - ip - len;
maxlen
= maxlen > ((1 << 8) + (1 << 3)) ? ((1 << 8) + (1 << 3)) : maxlen;
if ((op + 3 + 1 >= out_end) != 0)
if (op - !lit + 3 + 1 >= out_end)
return 0;
op[-lit - 1] = lit - 1;
op -= !lit;
for (;;)
{
if (maxlen > 16)
{
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
len++;
if (ref[len] != ip[len])
break;
}
do
{
len++;
}
while (len < maxlen && ip[len] == ref[len]);
break;
}
len -= 2;
ip++;
if (len < 7)
{
*op++ = (off >> 8) + (len << 5);
}
else
{
*op++ = (off >> 8) + (7 << 5);
*op++ = len - 7;
}
*op++ = off;
lit = 0;
op++;
ip += len + 1;
if (ip >= in_end - 2)
break;
--ip;
--ip;
hval = (((ip[0]) << 8) | ip[1]);
hval = (((hval) << 8) | ip[2]);
htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))]
= (LZF_HSLOT)(ip - in_data);
ip++;
hval = (((hval) << 8) | ip[2]);
htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))]
= (LZF_HSLOT)(ip - in_data);
ip++;
}
else
{
if (op >= out_end)
return 0;
lit++;
*op++ = *ip++;
if (lit == (1 << 5))
{
op[-lit - 1] = lit - 1;
lit = 0;
op++;
}
}
}
if (op + 3 > out_end) /* at most 3 bytes can be missing here */
return 0;
while (ip < in_end)
{
lit++;
*op++ = *ip++;
if (lit == MAX_LIT)
{
op[-lit - 1] = lit - 1; /* stop run */
lit = 0;
op++; /* start run */
}
}
op[-lit - 1] = lit - 1; /* end run */
op -= !lit; /* undo run if length is zero */
return op - out_data;
}
/* For this case, pass sink2 sinks statements from hot loop header to loop
exits after gimple loop optimizations, which generates instructions executed
each iteration in loop, but the results are used outside of loop:
With -m64,
"Sinking _367 = (uint8_t *) _320;
from bb 31 to bb 90
Sinking _320 = _321 + ivtmp.25_326;
from bb 31 to bb 90
Sinking _321 = (unsigned long) ip_229;
from bb 31 to bb 90
Sinking len_158 = _322 + 4294967295;
from bb 31 to bb 33"
When -m32, Power and X86 will sink 3 instructions, but arm ilp32 couldn't
sink due to ivopts chooses two IV candidates instead of one, which is
expected, so this case is restricted to lp64 only so far. */
/* { dg-final { scan-tree-dump-times "Sunk statements: 4" 1 "sink2" { target lp64 } } } */

View File

@ -9,4 +9,4 @@ bar (int a, int b, int c)
return y;
}
/* We should sink the x = a * b calculation into the else branch */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */

View File

@ -1,15 +0,0 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-sink-stats" } */
extern void foo(int a);
int
main (int argc)
{
int a;
a = argc + 1;
if (argc + 3)
{
foo (a);
}
}
/* We should sink the a = argc + 1 calculation into the if branch */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */

View File

@ -17,4 +17,4 @@ main (int argc)
foo2 (a);
}
/* We should sink the first a = b + c calculation into the else branch */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */

View File

@ -44,4 +44,4 @@ void foo(int16_t runs[], uint8_t alpha[], int x, int count)
}
/* We should not sink the next_runs = runs + x calculation after the loop. */
/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink1" } } */

View File

@ -14,4 +14,4 @@ int foo(int *a, int r)
/* *a = 1 should be sunk to the else block. */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */

View File

@ -15,4 +15,4 @@ int foo(int *a, int r, short *b)
/* *a = 1 should be sunk to the else block. */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */

View File

@ -24,4 +24,4 @@ int foo(int *a, int r, short *b)
/* *a = 1 should be sunk into the default case. */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */

View File

@ -15,4 +15,4 @@ int foo(int *a, int r, int *b)
/* *a = 1 should be sunk to the else block. */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */

View File

@ -819,6 +819,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *) { return flag_tree_sink != 0; }
virtual unsigned int execute (function *);
opt_pass *clone (void) { return new pass_sink_code (m_ctxt); }
}; // class pass_sink_code