Add GIMPLE switch support to loop unswitching

This patch adds support to unswitch loops with switch statements
based on invariant index.  It furthermore reworks the cost model
to allow an overall budget of statements to be created per original
loop by all unswitching opportunities in the loop.  Compared to
the original all unswitching opportunities in a loop are
pre-evaluated before the first transform which will allow future
changes to select the most profitable candidates first.

To efficiently support switch statements the pass now uses
ranger to simplify switch statements and conditions in loop
copies based on ranges extracted from the recorded set of
predicates unswitched.

gcc/ChangeLog:

	* dbgcnt.def (DEBUG_COUNTER): Add loop_unswitch counter.
	* params.opt (max-unswitch-level): Remove.
	* doc/invoke.texi (max-unswitch-level): Likewise.
	* tree-cfg.cc (gimple_lv_add_condition_to_bb): Support not
	gimplified expressions.
	* tree-ssa-loop-unswitch.cc (struct unswitch_predicate): New.
	(tree_may_unswitch_on): Rename to ...
	(find_unswitching_predicates_for_bb): ... this and handle
	switch statements.
	(get_predicates_for_bb): Likewise.
	(set_predicates_for_bb): Likewise.
	(init_loop_unswitch_info): Likewise.
	(tree_ssa_unswitch_loops): Prepare stuff before calling
	tree_unswitch_single_loop.
	(tree_unswitch_single_loop): Rework the function using
	pre-computed predicates and with a per original loop cost model.
	(merge_last): New.
	(add_predicate_to_path): Likewise.
	(find_range_for_lhs): Likewise.
	(simplify_using_entry_checks): Rename to ...
	(evaluate_control_stmt_using_entry_checks): ... this, handle
	switch statements and improve simplifications using ranger.
	(simplify_loop_version): Rework using
	evaluate_control_stmt_using_entry_checks.
	(evaluate_bbs): New.
	(evaluate_loop_insns_for_predicate): Likewise.
	(tree_unswitch_loop): Adjust to allow switch statements and
	pass in the edge to unswitch.
	(clean_up_after_unswitching): New.
	(pass_tree_unswitch::execute): Pass down fun.

gcc/testsuite/ChangeLog:

	* gcc.dg/loop-unswitch-7.c: New test.
	* gcc.dg/loop-unswitch-8.c: New test.
	* gcc.dg/loop-unswitch-9.c: New test.
	* gcc.dg/loop-unswitch-10.c: New test.
	* gcc.dg/loop-unswitch-11.c: New test.
	* gcc.dg/loop-unswitch-12.c: New test.
	* gcc.dg/loop-unswitch-13.c: New test.
	* gcc.dg/loop-unswitch-14.c: New test.
	* gcc.dg/loop-unswitch-15.c: New test.
	* gcc.dg/loop-unswitch-16.c: New test.
	* gcc.dg/loop-unswitch-17.c: New test.
	* gcc.dg/torture/20220518-1.c: New test.
	* gcc.dg/torture/20220518-2.c: New test.
	* gcc.dg/torture/20220525-1.c: New test.
	* gcc.dg/alias-10.c: Adjust.
	* gcc.dg/tree-ssa/loop-6.c: Likewise.
	* gcc.dg/loop-unswitch-1.c: Likewise.

Co-authored-by: Richard Biener  <rguenther@suse.de>
This commit is contained in:
Martin Liska 2021-11-22 13:54:20 +01:00 committed by Richard Biener
parent 0d344b5576
commit a1c9f779f7
22 changed files with 1328 additions and 292 deletions

View File

@ -187,6 +187,7 @@ DEBUG_COUNTER (ira_move)
DEBUG_COUNTER (ivopts_loop)
DEBUG_COUNTER (lim)
DEBUG_COUNTER (local_alloc_for_sched)
DEBUG_COUNTER (loop_unswitch)
DEBUG_COUNTER (match)
DEBUG_COUNTER (merged_ipa_icf)
DEBUG_COUNTER (phiopt_edge_range)

View File

@ -14204,9 +14204,6 @@ The maximum depth of a loop nest suitable for complete peeling.
@item max-unswitch-insns
The maximum number of insns of an unswitched loop.
@item max-unswitch-level
The maximum number of branches unswitched in a single loop.
@item lim-expensive
The minimum cost of an expensive expression in the loop invariant motion.

View File

@ -745,10 +745,6 @@ The maximum number of instructions to consider to unroll in a loop.
Common Joined UInteger Var(param_max_unswitch_insns) Init(50) Param Optimization
The maximum number of insns of an unswitched loop.
-param=max-unswitch-level=
Common Joined UInteger Var(param_max_unswitch_level) Init(3) Param Optimization
The maximum number of unswitchings in a single loop.
-param=max-variable-expansions-in-unroller=
Common Joined UInteger Var(param_max_variable_expansions) Init(1) Param Optimization
If -fvariable-expansion-in-unroller is used, the maximum number of times that an individual variable will be expanded during loop unrolling.

View File

@ -28,4 +28,4 @@ void foo (bitmap head, bitmap_element *elt)
}
/* { dg-final { scan-tree-dump-times "Unswitching" 1 "unswitch"} } */
/* { dg-final { scan-tree-dump-times "unswitching" 1 "unswitch"} } */

View File

@ -33,4 +33,4 @@ parse_tag: ;
}
/* Test that we actually unswitched something. */
/* { dg-final { scan-tree-dump "Unswitching loop" "unswitch" } } */
/* { dg-final { scan-tree-dump "unswitching loop" "unswitch" } } */

View File

@ -0,0 +1,56 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
int
__attribute__((noipa))
foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
{
for (int i = 0; i < size; i++)
{
double tmp, tmp2;
switch(order)
{
case 0:
tmp = -8 * a[i];
tmp2 = 2 * b[i];
break;
case 1:
tmp = 3 * a[i] - 2 * b[i];
tmp2 = 5 * b[i] - 2 * c[i];
break;
case 2:
tmp = 9 * a[i] + 2 * b[i] + c[i];
tmp2 = 4 * b[i] + 2 * c[i] + 8 * d[i];
break;
case 3:
tmp = 3 * a[i] + 2 * b[i] - c[i];
tmp2 = b[i] - 2 * c[i] + 8 * d[i];
break;
defaut:
__builtin_unreachable ();
}
double x = 3 * tmp + d[i] + tmp;
double y = 3.4f * tmp + d[i] + tmp2;
r[i] = x + y;
}
return 0;
}
#define N 16 * 1024
double aa[N], bb[N], cc[N], dd[N], rr[N];
int main()
{
for (int i = 0; i < 100 * 1000; i++)
foo (aa, bb, cc, dd, rr, N, i % 4);
}
/* Test that we actually unswitched something. */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 0" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 1" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 2" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 3" 1 "unswitch" } } */

View File

@ -0,0 +1,45 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
int
foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
{
for (int i = 0; i < size; i++)
{
double tmp, tmp2;
switch(order)
{
case 5 ... 6:
case 9:
tmp = -8 * a[i];
tmp2 = 2 * b[i];
break;
case 11:
tmp = 3 * a[i] - 2 * b[i];
tmp2 = 5 * b[i] - 2 * c[i];
break;
case 22:
tmp = 9 * a[i] + 2 * b[i] + c[i];
tmp2 = 4 * b[i] + 2 * c[i] + 8 * d[i];
break;
case 33:
tmp = 3 * a[i] + 2 * b[i] - c[i];
tmp2 = b[i] - 2 * c[i] + 8 * d[i];
break;
defaut:
__builtin_unreachable ();
}
double x = 3 * tmp + d[i] + tmp;
double y = 3.4f * tmp + d[i] + tmp2;
r[i] = x + y;
}
return 0;
}
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* \\+ 4294967291.*order.* == 9" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 1" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 2" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 3" 1 "unswitch" } } */

View File

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
int
foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
{
for (int i = 0; i < size; i++)
{
double tmp;
if (order == 1)
tmp = -8 * a[i];
else
tmp = -4 * b[i];
double x = 3 * tmp + d[i] + tmp;
if (order == 1)
x += 2;
double y = 3.4f * tmp + d[i];
r[i] = x + y;
}
return 0;
}
/* { dg-final { scan-tree-dump-times "unswitching loop . on .if. with condition: order.* == 1" 1 "unswitch" } } */

View File

@ -0,0 +1,35 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fno-thread-jumps -fdump-tree-unswitch-optimized" } */
int
foo(double *a, double *b, double *c, double *d, double *r, int size, unsigned order)
{
for (int i = 0; i < size; i++)
{
double tmp;
switch (order)
{
case 0 ... 4:
tmp = -8 * a[i];
break;
default:
tmp = -4 * b[i];
break;
}
double x = 3 * tmp + d[i] + tmp;
/* This and the case 0 ... 4 condition should only be unswitched once
since they are mutually excluded. */
if (order >= 5)
x += 2;
double y = 3.4f * tmp + d[i];
r[i] = x + y;
}
return 0;
}
/* { dg-final { scan-tree-dump-times "unswitching loop . on .\[^\n\r\]*. with condition" 1 "unswitch" } } */

View File

@ -0,0 +1,60 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized --param=max-unswitch-insns=1000" } */
int
__attribute__((noipa))
foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
{
for (int i = 0; i < size; i++)
{
double tmp, tmp2;
if (order <= 0)
tmp = 123;
switch(order)
{
case 0:
tmp += -8 * a[i];
tmp2 = 2 * b[i];
break;
case 1:
tmp = 3 * a[i] - 2 * b[i];
tmp2 = 5 * b[i] - 2 * c[i];
break;
case 2:
tmp = 9 * a[i] + 2 * b[i] + c[i];
tmp2 = 4 * b[i] + 2 * c[i] + 8 * d[i];
break;
case 3:
tmp = 3 * a[i] + 2 * b[i] - c[i];
tmp2 = b[i] - 2 * c[i] + 8 * d[i];
break;
defaut:
__builtin_unreachable ();
}
double x = 3 * tmp + d[i] + tmp;
double y = 3.4f * tmp + d[i] + tmp2;
r[i] = x + y;
}
return 0;
}
#define N 16 * 1024
double aa[N], bb[N], cc[N], dd[N], rr[N];
int main()
{
for (int i = 0; i < 100 * 1000; i++)
foo (aa, bb, cc, dd, rr, N, i % 4);
}
/* Test that we actually unswitched something. */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* <= 0" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 0" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 1" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 2" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .switch. with condition: order.* == 3" 1 "unswitch" } } */

View File

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
void bar();
void baz();
void foo (int a, int b, int n)
{
for (int i = 0; i < n; ++i)
if (a < b)
bar ();
else
baz ();
}
/* { dg-final { scan-tree-dump "unswitching loop . on .if. with condition:" "unswitch" } } */

View File

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized --param max-unswitch-insns=100" } */
void bar (int);
void foo (int a, int b, int c, int n)
{
for (int i = 0; i < n; ++i)
{
if (a > 5)
bar (1);
if (b < 10)
bar (2);
if (c != 5)
bar (3);
}
}
/* Verify we can unswitch all permutations of the predicates. */
/* { dg-final { scan-tree-dump-times "unswitching loop . on .if. with condition" 7 "unswitch" } } */
/* { dg-final { scan-tree-dump "unswitching loop . on .if. with condition: a" "unswitch" } } */
/* { dg-final { scan-tree-dump "unswitching loop . on .if. with condition: b" "unswitch" } } */
/* { dg-final { scan-tree-dump "unswitching loop . on .if. with condition: c" "unswitch" } } */

View File

@ -0,0 +1,24 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
int foo (int a)
{
do
{
if (a == 1)
return 0;
switch (a)
{
case 1:
return 5;
case 2:
return 7;
case 3:
return 11;
default:;
}
}
while (1);
}
/* { dg-final { scan-tree-dump-times "unswitching loop" 3 "unswitch" } } */

View File

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fno-thread-jumps -fdump-tree-unswitch-optimized" } */
int
foo(double *a, double *b, double *c, double *d, double *r, int size, float order)
{
for (int i = 0; i < size; i++)
{
double tmp;
if (order == 1.f)
tmp = -8 * a[i];
else
tmp = -4 * b[i];
double x = 3 * tmp + d[i] + tmp;
if (order == 1.f)
x += 2;
double y = 3.4f * tmp + d[i];
r[i] = x + y;
}
return 0;
}
/* { dg-final { scan-tree-dump-times "unswitching loop . on .if. with condition: order.* == 1.0e" 1 "unswitch" } } */

View File

@ -0,0 +1,31 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
int
foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
{
for (int i = 0; i < size; i++)
{
double tmp;
if (order < 3)
tmp = -8 * a[i];
else
tmp = -4 * b[i];
double x = 3 * tmp + d[i] + tmp;
if (5 > order)
x += 2;
if (order == 12345)
x *= 5;
double y = 3.4f * tmp + d[i];
r[i] = x + y;
}
return 0;
}
/* { dg-final { scan-tree-dump-times "unswitching loop . on .if. with condition: order" 3 "unswitch" } } */

View File

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-optimized" } */
int
foo(double *a, double *b, double *c, double *d, double *r, int size, int order)
{
for (int i = 0; i < size; i++)
{
double tmp;
if (order == 1)
tmp = -8 * a[i];
else
{
if (order == 2)
tmp = -4 * b[i];
else
tmp = a[i];
}
r[i] = 3.4f * tmp + d[i];
}
return 0;
}
/* { dg-final { scan-tree-dump-times "unswitching loop . on .if. with condition: order" 2 "unswitch" } } */

View File

@ -0,0 +1,39 @@
/* { dg-do compile } */
/* { dg-additional-options "-funswitch-loops" } */
enum {
MOD_WVG_MASK_TEX_USE_INT,
MOD_WVG_MASK_TEX_USE_RED,
MOD_WVG_MASK_TEX_USE_BLUE,
MOD_WVG_MASK_TEX_USE_SAT,
MOD_WVG_MASK_TEX_USE_VAL,
MOD_WVG_MASK_TEX_USE_ALPHA
} foo_num;
float *foo_org_w;
int *foo_new_w;
float foo_fact;
int foo_tex_use_channel, foo_i, foo_texres_0;
void foo()
{
for (; foo_num;)
switch (foo_tex_use_channel) {
case MOD_WVG_MASK_TEX_USE_INT:
foo_org_w[foo_i] = foo_new_w[foo_i] * foo_texres_0;
break;
case MOD_WVG_MASK_TEX_USE_RED:
foo_org_w[foo_i] = 0;
case MOD_WVG_MASK_TEX_USE_BLUE:
foo_org_w[foo_i] = foo_fact + foo_org_w[foo_i];
break;
case MOD_WVG_MASK_TEX_USE_SAT:
foo_org_w[foo_i] = foo_fact;
break;
case MOD_WVG_MASK_TEX_USE_VAL:
foo_org_w[foo_i] = 0;
case MOD_WVG_MASK_TEX_USE_ALPHA:
foo_org_w[foo_i] = foo_fact + foo_org_w[foo_i];
break;
default:
foo_org_w[foo_i] = foo_new_w[foo_i] * foo_texres_0;
}
}

View File

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-additional-options "-funswitch-loops" } */
int Get_Spline_Val_sp_0, Get_Spline_Val_k;
double Get_Spline_Val_p, Get_Spline_Val_se_0_0_0;
double *Get_Spline_Val_v;
void Get_Spline_Val() {
int i;
for (;;)
if (i > Get_Spline_Val_sp_0)
Get_Spline_Val_k = Get_Spline_Val_se_0_0_0;
else if (Get_Spline_Val_sp_0 == 1)
Get_Spline_Val_v[Get_Spline_Val_k] = Get_Spline_Val_p;
}

View File

@ -0,0 +1,33 @@
/* { dg-do compile } */
/* { dg-additional-options "-funswitch-loops" } */
int LIST_1, mb_pred_b_d4x4spatial_dec_picture_l0_rFrame,
mb_pred_b_d4x4spatial_dec_picture_l1_rFrame;
typedef struct {
char ref_idx[2];
} PicMotionParams;
PicMotionParams mb_pred_b_d4x4spatial_dec_picture_mv_info;
int get_colocated_info_4x4___trans_tmp_1, get_colocated_info_4x4_list1_0;
int get_colocated_info_4x4()
{
int moving =
get_colocated_info_4x4_list1_0 && get_colocated_info_4x4___trans_tmp_1;
return moving;
}
void mb_pred_b_d4x4spatial_dec_picture()
{
char k;
for (;;)
{
k = 0;
for (; k < 4; k++)
if (mb_pred_b_d4x4spatial_dec_picture_l0_rFrame
|| mb_pred_b_d4x4spatial_dec_picture_l1_rFrame == 0)
{
int is_not_moving = get_colocated_info_4x4();
if (mb_pred_b_d4x4spatial_dec_picture_l1_rFrame)
if (is_not_moving)
mb_pred_b_d4x4spatial_dec_picture_mv_info.ref_idx[LIST_1] = 1;
}
}
}

View File

@ -19,7 +19,7 @@ void xxx(void)
/* Loop should be unswitched. */
/* { dg-final { scan-tree-dump-times "Unswitching loop" 1 "unswitch" } } */
/* { dg-final { scan-tree-dump-times "unswitching loop" 1 "unswitch" } } */
/* In effect there should be exactly three conditional jumps in the final program. */

View File

@ -9028,11 +9028,16 @@ gimple_lv_add_condition_to_bb (basic_block first_head ATTRIBUTE_UNUSED,
edge e0;
/* Build new conditional expr */
gsi = gsi_last_bb (cond_bb);
cond_expr = force_gimple_operand_gsi_1 (&gsi, cond_expr,
is_gimple_condexpr_for_cond,
NULL_TREE, false,
GSI_CONTINUE_LINKING);
new_cond_expr = gimple_build_cond_from_tree (cond_expr,
NULL_TREE, NULL_TREE);
/* Add new cond in cond_bb. */
gsi = gsi_last_bb (cond_bb);
gsi_insert_after (&gsi, new_cond_expr, GSI_NEW_STMT);
/* Adjust edges appropriately to connect new head with first head

File diff suppressed because it is too large Load Diff