Fix PR45199: do not aggregate memory accesses to the same array for -ftree-loop-distribute-patterns
2010-11-30 Sebastian Pop <sebastian.pop@amd.com> PR tree-optimization/45199 * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. (stores_zero_from_loop): Call mem_write_stride_of_same_size_as_unit_type_p. * tree-data-ref.h (stride_of_unit_type_p): New. * tree-loop-distribution.c (generate_memset_zero): Simplified. Call stride_of_unit_type_p. (build_rdg_partition_for_component): Do not call rdg_flag_similar_memory_accesses when flag_tree_loop_distribute_patterns is set. * gcc.dg/tree-ssa/ldist-15.c: New. * gcc.dg/tree-ssa/ldist-16.c: New. * gfortran.dg/ldist-pr45199.f: New. From-SVN: r167380
This commit is contained in:
parent
b2087e8dad
commit
5e37ea0ef1
@ -1,3 +1,16 @@
|
||||
2010-12-02 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
PR tree-optimization/45199
|
||||
* tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New.
|
||||
(stores_zero_from_loop): Call
|
||||
mem_write_stride_of_same_size_as_unit_type_p.
|
||||
* tree-data-ref.h (stride_of_unit_type_p): New.
|
||||
* tree-loop-distribution.c (generate_memset_zero): Simplified.
|
||||
Call stride_of_unit_type_p.
|
||||
(build_rdg_partition_for_component): Do not call
|
||||
rdg_flag_similar_memory_accesses when
|
||||
flag_tree_loop_distribute_patterns is set.
|
||||
|
||||
2010-12-02 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Disregard
|
||||
|
@ -1,3 +1,10 @@
|
||||
2010-12-02 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
PR tree-optimization/45199
|
||||
* gcc.dg/tree-ssa/ldist-15.c: New.
|
||||
* gcc.dg/tree-ssa/ldist-16.c: New.
|
||||
* gfortran.dg/ldist-pr45199.f: New.
|
||||
|
||||
2010-12-02 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/46723
|
||||
@ -23,8 +30,8 @@
|
||||
2010-12-02 Nicola Pero <nicola.pero@meta-innovation.com>
|
||||
|
||||
* objc.dg/exceptions-6.m: New.
|
||||
* obj-c++.dg/exceptions-6.mm: New.
|
||||
|
||||
* obj-c++.dg/exceptions-6.mm: New.
|
||||
|
||||
2010-12-01 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* gcc.c-torture/execute/bcp-1.c: Make ready for -fuse-linker-plugin
|
||||
|
23
gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
Normal file
23
gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c
Normal file
@ -0,0 +1,23 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fdump-tree-ldist-details" } */
|
||||
|
||||
int x[1000];
|
||||
|
||||
void foo (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
x[2*i] = 0;
|
||||
x[2*i + 1] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* We should not apply loop distribution as it is not beneficial from
|
||||
a data locality point of view. Also it is not possible to generate
|
||||
a memset (0) as the write has a stride of 2. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
21
gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
Normal file
21
gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fdump-tree-ldist-details" } */
|
||||
|
||||
int x[1000];
|
||||
|
||||
void foo (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
x[i] = 0;
|
||||
x[2*i + 1] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* We should apply loop distribution and generate a memset (0). */
|
||||
|
||||
/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */
|
||||
/* { dg-final { cleanup-tree-dump "ldist" } } */
|
27
gcc/testsuite/gfortran.dg/ldist-pr45199.f
Normal file
27
gcc/testsuite/gfortran.dg/ldist-pr45199.f
Normal file
@ -0,0 +1,27 @@
|
||||
! { dg-do compile }
|
||||
! { dg-options "-O3 -fdump-tree-ldist-details" }
|
||||
|
||||
parameter(numlev=3,numoblev=1000)
|
||||
integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev)
|
||||
logical l_numob(numoblev,numlev)
|
||||
do ixe=1,numoblev
|
||||
do iye=1,numlev
|
||||
i_otyp(ixe,iye)=0
|
||||
i_styp(ixe,iye)=0
|
||||
l_numob(ixe,iye)=.false.
|
||||
enddo
|
||||
enddo
|
||||
do i=1,m
|
||||
do j=1,n
|
||||
if (l_numob(i,j)) then
|
||||
write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j)
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
end
|
||||
|
||||
! GCC should apply memset zero loop distribution and it should not ICE.
|
||||
|
||||
! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } }
|
||||
! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } }
|
||||
! { dg-final { cleanup-tree-dump "ldist" } }
|
@ -4974,6 +4974,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
|
||||
free (bbs);
|
||||
}
|
||||
|
||||
/* Returns true when STMT is an assignment that contains a data
|
||||
reference on its LHS with a stride of the same size as its unit
|
||||
type. */
|
||||
|
||||
static bool
|
||||
mem_write_stride_of_same_size_as_unit_type_p (gimple stmt)
|
||||
{
|
||||
struct data_reference *dr = XCNEW (struct data_reference);
|
||||
tree op0 = gimple_assign_lhs (stmt);
|
||||
bool res;
|
||||
|
||||
DR_STMT (dr) = stmt;
|
||||
DR_REF (dr) = op0;
|
||||
|
||||
res = dr_analyze_innermost (dr)
|
||||
&& stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
|
||||
|
||||
free_data_ref (dr);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Initialize STMTS with all the statements of LOOP that contain a
|
||||
store to memory of the form "A[i] = 0". */
|
||||
|
||||
@ -4994,7 +5015,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
|
||||
&& is_gimple_assign (stmt)
|
||||
&& gimple_assign_rhs_code (stmt) == INTEGER_CST
|
||||
&& (op = gimple_assign_rhs1 (stmt))
|
||||
&& (integer_zerop (op) || real_zerop (op)))
|
||||
&& (integer_zerop (op) || real_zerop (op))
|
||||
&& mem_write_stride_of_same_size_as_unit_type_p (stmt))
|
||||
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
|
||||
|
||||
free (bbs);
|
||||
|
@ -603,6 +603,17 @@ void remove_similar_memory_refs (VEC (gimple, heap) **);
|
||||
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
|
||||
bool have_similar_memory_accesses (gimple, gimple);
|
||||
|
||||
/* Returns true when STRIDE is equal in absolute value to the size of
|
||||
the unit type of TYPE. */
|
||||
|
||||
static inline bool
|
||||
stride_of_unit_type_p (tree stride, tree type)
|
||||
{
|
||||
return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride),
|
||||
stride),
|
||||
TYPE_SIZE_UNIT (type));
|
||||
}
|
||||
|
||||
/* Determines whether RDG vertices V1 and V2 access to similar memory
|
||||
locations, in which case they have to be in the same partition. */
|
||||
|
||||
|
@ -258,42 +258,27 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
|
||||
if (!dr_analyze_innermost (dr))
|
||||
goto end;
|
||||
|
||||
/* Test for a positive stride, iterating over every element. */
|
||||
if (integer_zerop (size_binop (MINUS_EXPR,
|
||||
fold_convert (sizetype, DR_STEP (dr)),
|
||||
TYPE_SIZE_UNIT (TREE_TYPE (op0)))))
|
||||
{
|
||||
addr_base = fold_convert_loc (loc, sizetype,
|
||||
size_binop_loc (loc, PLUS_EXPR,
|
||||
DR_OFFSET (dr),
|
||||
DR_INIT (dr)));
|
||||
addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
|
||||
TREE_TYPE (DR_BASE_ADDRESS (dr)),
|
||||
DR_BASE_ADDRESS (dr), addr_base);
|
||||
if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)))
|
||||
goto end;
|
||||
|
||||
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
|
||||
}
|
||||
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
|
||||
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
|
||||
addr_base = fold_convert_loc (loc, sizetype, addr_base);
|
||||
|
||||
/* Test for a negative stride, iterating over every element. */
|
||||
else if (integer_zerop (size_binop (PLUS_EXPR,
|
||||
TYPE_SIZE_UNIT (TREE_TYPE (op0)),
|
||||
fold_convert (sizetype, DR_STEP (dr)))))
|
||||
if (integer_zerop (size_binop (PLUS_EXPR,
|
||||
TYPE_SIZE_UNIT (TREE_TYPE (op0)),
|
||||
fold_convert (sizetype, DR_STEP (dr)))))
|
||||
{
|
||||
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
|
||||
|
||||
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
|
||||
addr_base = fold_convert_loc (loc, sizetype, addr_base);
|
||||
addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
|
||||
fold_convert_loc (loc, sizetype, nb_bytes));
|
||||
addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
|
||||
TYPE_SIZE_UNIT (TREE_TYPE (op0)));
|
||||
addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
|
||||
TREE_TYPE (DR_BASE_ADDRESS (dr)),
|
||||
DR_BASE_ADDRESS (dr), addr_base);
|
||||
}
|
||||
else
|
||||
goto end;
|
||||
|
||||
addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
|
||||
TREE_TYPE (DR_BASE_ADDRESS (dr)),
|
||||
DR_BASE_ADDRESS (dr), addr_base);
|
||||
mem = force_gimple_operand (addr_base, &stmts, true, NULL);
|
||||
gimple_seq_add_seq (&stmt_list, stmts);
|
||||
|
||||
@ -781,8 +766,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c,
|
||||
and determine those vertices that have some memory affinity with
|
||||
the current nodes in the component: these are stores to the same
|
||||
arrays, i.e. we're taking care of cache locality. */
|
||||
rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
|
||||
other_stores);
|
||||
if (!flag_tree_loop_distribute_patterns)
|
||||
rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
|
||||
other_stores);
|
||||
|
||||
rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user