Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)

Use determine_value_range to get value range info for fold convert expressions
with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
wrapping overflow inner type.  i.e.:

(long unsigned int)((unsigned int)n * 10 + 1)
=>
(long unsigned int)n * (long unsigned int)10 + (long unsigned int)1

With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.

PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog

	2020-05-11  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
	determine_value_range, Add fold conversion of MULT_EXPR, fix the
	previous PLUS_EXPR.

gcc/testsuite/ChangeLog

	2020-05-11  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* gcc.dg/tree-ssa/pr83403-1.c: New test.
	* gcc.dg/tree-ssa/pr83403-2.c: New test.
	* gcc.dg/tree-ssa/pr83403.h: New header.
This commit is contained in:
Xionghu Luo 2020-05-10 21:06:20 -05:00
parent e7ae6d32c7
commit 0447929f11
6 changed files with 74 additions and 10 deletions

View File

@ -1,3 +1,10 @@
2020-05-11 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
determine_value_range, Add fold conversion of MULT_EXPR, fix the
previous PLUS_EXPR.
2020-05-10 Gerald Pfeifer <gerald@pfeifer.com>
* config/i386/i386-c.c (ix86_target_macros): Define _ILP32 and

View File

@ -1,3 +1,10 @@
2020-05-11 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* gcc.dg/tree-ssa/pr83403-1.c: New test.
* gcc.dg/tree-ssa/pr83403-2.c: New test.
* gcc.dg/tree-ssa/pr83403.h: New header.
2020-05-10 Harald Anlauf <anlauf@gmx.de>
PR fortran/93499

View File

@ -0,0 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
#define TYPE unsigned int
#include "pr83403.h"
/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */

View File

@ -0,0 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
#define TYPE int
#include "pr83403.h"
/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */

View File

@ -0,0 +1,30 @@
__attribute__ ((noinline)) void
calculate (const double *__restrict__ A, const double *__restrict__ B,
double *__restrict__ C)
{
TYPE m = 0;
TYPE n = 0;
TYPE k = 0;
A = (const double *) __builtin_assume_aligned (A, 16);
B = (const double *) __builtin_assume_aligned (B, 16);
C = (double *) __builtin_assume_aligned (C, 16);
for (n = 0; n < 9; n++)
{
for (m = 0; m < 10; m++)
{
C[(n * 10) + m] = 0.0;
}
for (k = 0; k < 17; k++)
{
#pragma simd
for (m = 0; m < 10; m++)
{
C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
}
}
}
}

View File

@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
wide_int minv, maxv;
/* If inner type has wrapping overflow behavior, fold conversion
for below case:
(T1)(X - CST) -> (T1)X - (T1)CST
if X - CST doesn't overflow by range information. Also handle
(T1)(X + CST) as (T1)(X - (-CST)). */
(T1)(X *+- CST) -> (T1)X *+- (T1)CST
if X *+- CST doesn't overflow by range information. */
if (TYPE_UNSIGNED (itype)
&& TYPE_OVERFLOW_WRAPS (itype)
&& TREE_CODE (op0) == SSA_NAME
&& TREE_CODE (op1) == INTEGER_CST
&& icode != MULT_EXPR
&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
{
wi::overflow_type overflow = wi::OVF_NONE;
signop sign = UNSIGNED;
if (icode == PLUS_EXPR)
op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
if (wi::geu_p (minv, wi::to_wide (op1)))
wi::add (maxv, wi::to_wide (op1), sign, &overflow);
else if (icode == MULT_EXPR)
wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
else
wi::sub (minv, wi::to_wide (op1), sign, &overflow);
if (overflow == wi::OVF_NONE)
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
return expr_to_aff_combination (comb, MINUS_EXPR, otype,
op0, op1);
return expr_to_aff_combination (comb, icode, otype, op0,
op1);
}
}
}