re PR tree-optimization/32377 (can't determine dependence (source/destination overlap without more than size))
2007-10-31 Sebastian Pop <sebastian.pop@amd.com> PR tree-optimization/32377 * tree-data-ref.c (compute_overlap_steps_for_affine_univar): Make it work also for unknown number of iterations. (analyze_subscript_affine_affine): Clean up. Don't fail when the number of iterations is not known. * gfortran.dg/vect/pr32377.f90: New. From-SVN: r129797
This commit is contained in:
parent
ac029795f3
commit
2c26cbfd23
|
@ -1,3 +1,11 @@
|
|||
2007-10-31 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
PR tree-optimization/32377
|
||||
* tree-data-ref.c (compute_overlap_steps_for_affine_univar): Make it
|
||||
work also for unknown number of iterations.
|
||||
(analyze_subscript_affine_affine): Clean up. Don't fail when the
|
||||
number of iterations is not known.
|
||||
|
||||
2007-10-31 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/33779
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2007-10-31 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
PR tree-optimization/32377
|
||||
* gfortran.dg/vect/pr32377.f90: New.
|
||||
|
||||
2007-10-31 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/33779
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
! { dg-do compile }
|
||||
! { dg-require-effective-target vect_float }
|
||||
|
||||
subroutine s243(ntimes,ld,n,ctime,dtime,a,b,c,d,e,aa,bb,cc)
|
||||
|
||||
integer ntimes,ld,n,i,nl
|
||||
real a(n),b(n),c(n),d(n),e(n),aa(ld,n),bb(ld,n),cc(ld,n)
|
||||
real t1,t2,chksum,ctime,dtime,cs1d
|
||||
b(:n-1)= b(:n-1)+(c(:n-1)+e(:n-1))*d(:n-1)
|
||||
a(:n-1)= b(:n-1)+a(2:n)*d(:n-1)
|
||||
return
|
||||
end subroutine s243
|
||||
|
||||
! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } }
|
||||
! { dg-final { cleanup-tree-dump "vect" } }
|
|
@ -1819,9 +1819,15 @@ compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
|
|||
step_overlaps_a = step_b / gcd_steps_a_b;
|
||||
step_overlaps_b = step_a / gcd_steps_a_b;
|
||||
|
||||
tau2 = FLOOR_DIV (niter, step_overlaps_a);
|
||||
tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
|
||||
last_conflict = tau2;
|
||||
if (niter > 0)
|
||||
{
|
||||
tau2 = FLOOR_DIV (niter, step_overlaps_a);
|
||||
tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
|
||||
last_conflict = tau2;
|
||||
*last_conflicts = build_int_cst (NULL_TREE, last_conflict);
|
||||
}
|
||||
else
|
||||
*last_conflicts = chrec_dont_know;
|
||||
|
||||
*overlaps_a = affine_fn_univar (integer_zero_node, dim,
|
||||
build_int_cst (NULL_TREE,
|
||||
|
@ -1829,7 +1835,6 @@ compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
|
|||
*overlaps_b = affine_fn_univar (integer_zero_node, dim,
|
||||
build_int_cst (NULL_TREE,
|
||||
step_overlaps_b));
|
||||
*last_conflicts = build_int_cst (NULL_TREE, last_conflict);
|
||||
}
|
||||
|
||||
else
|
||||
|
@ -1985,7 +1990,6 @@ analyze_subscript_affine_affine (tree chrec_a,
|
|||
{
|
||||
unsigned nb_vars_a, nb_vars_b, dim;
|
||||
HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
|
||||
HOST_WIDE_INT tau1, tau2;
|
||||
lambda_matrix A, U, S;
|
||||
|
||||
if (eq_evolutions_p (chrec_a, chrec_b))
|
||||
|
@ -2043,18 +2047,7 @@ analyze_subscript_affine_affine (tree chrec_a,
|
|||
false);
|
||||
niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
|
||||
false);
|
||||
if (niter_a < 0 || niter_b < 0)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
|
||||
*overlaps_a = conflict_fn_not_known ();
|
||||
*overlaps_b = conflict_fn_not_known ();
|
||||
*last_conflicts = chrec_dont_know;
|
||||
goto end_analyze_subs_aa;
|
||||
}
|
||||
|
||||
niter = MIN (niter_a, niter_b);
|
||||
|
||||
step_a = int_cst_value (CHREC_RIGHT (chrec_a));
|
||||
step_b = int_cst_value (CHREC_RIGHT (chrec_b));
|
||||
|
||||
|
@ -2138,31 +2131,7 @@ analyze_subscript_affine_affine (tree chrec_a,
|
|||
|
||||
| x0 = i0 + i1 * t,
|
||||
| y0 = j0 + j1 * t. */
|
||||
|
||||
HOST_WIDE_INT i0, j0, i1, j1;
|
||||
|
||||
/* X0 and Y0 are the first iterations for which there is a
|
||||
dependence. X0, Y0 are two solutions of the Diophantine
|
||||
equation: chrec_a (X0) = chrec_b (Y0). */
|
||||
HOST_WIDE_INT x0, y0;
|
||||
HOST_WIDE_INT niter, niter_a, niter_b;
|
||||
|
||||
niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
|
||||
false);
|
||||
niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
|
||||
false);
|
||||
|
||||
if (niter_a < 0 || niter_b < 0)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
|
||||
*overlaps_a = conflict_fn_not_known ();
|
||||
*overlaps_b = conflict_fn_not_known ();
|
||||
*last_conflicts = chrec_dont_know;
|
||||
goto end_analyze_subs_aa;
|
||||
}
|
||||
|
||||
niter = MIN (niter_a, niter_b);
|
||||
HOST_WIDE_INT i0, j0, i1, j1;
|
||||
|
||||
i0 = U[0][0] * gamma / gcd_alpha_beta;
|
||||
j0 = U[0][1] * gamma / gcd_alpha_beta;
|
||||
|
@ -2179,80 +2148,72 @@ analyze_subscript_affine_affine (tree chrec_a,
|
|||
*overlaps_a = conflict_fn_no_dependence ();
|
||||
*overlaps_b = conflict_fn_no_dependence ();
|
||||
*last_conflicts = integer_zero_node;
|
||||
goto end_analyze_subs_aa;
|
||||
}
|
||||
|
||||
else
|
||||
if (i1 > 0 && j1 > 0)
|
||||
{
|
||||
if (i1 > 0)
|
||||
HOST_WIDE_INT niter_a = estimated_loop_iterations_int
|
||||
(get_chrec_loop (chrec_a), false);
|
||||
HOST_WIDE_INT niter_b = estimated_loop_iterations_int
|
||||
(get_chrec_loop (chrec_b), false);
|
||||
HOST_WIDE_INT niter = MIN (niter_a, niter_b);
|
||||
|
||||
/* (X0, Y0) is a solution of the Diophantine equation:
|
||||
"chrec_a (X0) = chrec_b (Y0)". */
|
||||
HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
|
||||
CEIL (-j0, j1));
|
||||
HOST_WIDE_INT x0 = i1 * tau1 + i0;
|
||||
HOST_WIDE_INT y0 = j1 * tau1 + j0;
|
||||
|
||||
/* (X1, Y1) is the smallest positive solution of the eq
|
||||
"chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
|
||||
first conflict occurs. */
|
||||
HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
|
||||
HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
|
||||
HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
|
||||
|
||||
if (niter > 0)
|
||||
{
|
||||
tau1 = CEIL (-i0, i1);
|
||||
tau2 = FLOOR_DIV (niter - i0, i1);
|
||||
HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
|
||||
FLOOR_DIV (niter - j0, j1));
|
||||
HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
|
||||
|
||||
if (j1 > 0)
|
||||
/* If the overlap occurs outside of the bounds of the
|
||||
loop, there is no dependence. */
|
||||
if (x1 > niter || y1 > niter)
|
||||
{
|
||||
int last_conflict, min_multiple;
|
||||
tau1 = MAX (tau1, CEIL (-j0, j1));
|
||||
tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
|
||||
|
||||
x0 = i1 * tau1 + i0;
|
||||
y0 = j1 * tau1 + j0;
|
||||
|
||||
/* At this point (x0, y0) is one of the
|
||||
solutions to the Diophantine equation. The
|
||||
next step has to compute the smallest
|
||||
positive solution: the first conflicts. */
|
||||
min_multiple = MIN (x0 / i1, y0 / j1);
|
||||
x0 -= i1 * min_multiple;
|
||||
y0 -= j1 * min_multiple;
|
||||
|
||||
tau1 = (x0 - i0)/i1;
|
||||
last_conflict = tau2 - tau1;
|
||||
|
||||
/* If the overlap occurs outside of the bounds of the
|
||||
loop, there is no dependence. */
|
||||
if (x0 > niter || y0 > niter)
|
||||
{
|
||||
*overlaps_a = conflict_fn_no_dependence ();
|
||||
*overlaps_b = conflict_fn_no_dependence ();
|
||||
*last_conflicts = integer_zero_node;
|
||||
}
|
||||
else
|
||||
{
|
||||
*overlaps_a
|
||||
= conflict_fn (1,
|
||||
affine_fn_univar (build_int_cst (NULL_TREE, x0),
|
||||
1,
|
||||
build_int_cst (NULL_TREE, i1)));
|
||||
*overlaps_b
|
||||
= conflict_fn (1,
|
||||
affine_fn_univar (build_int_cst (NULL_TREE, y0),
|
||||
1,
|
||||
build_int_cst (NULL_TREE, j1)));
|
||||
*last_conflicts = build_int_cst (NULL_TREE, last_conflict);
|
||||
}
|
||||
*overlaps_a = conflict_fn_no_dependence ();
|
||||
*overlaps_b = conflict_fn_no_dependence ();
|
||||
*last_conflicts = integer_zero_node;
|
||||
goto end_analyze_subs_aa;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* FIXME: For the moment, the upper bound of the
|
||||
iteration domain for j is not checked. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
|
||||
*overlaps_a = conflict_fn_not_known ();
|
||||
*overlaps_b = conflict_fn_not_known ();
|
||||
*last_conflicts = chrec_dont_know;
|
||||
}
|
||||
*last_conflicts = build_int_cst (NULL_TREE, last_conflict);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* FIXME: For the moment, the upper bound of the
|
||||
iteration domain for i is not checked. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
|
||||
*overlaps_a = conflict_fn_not_known ();
|
||||
*overlaps_b = conflict_fn_not_known ();
|
||||
*last_conflicts = chrec_dont_know;
|
||||
}
|
||||
*last_conflicts = chrec_dont_know;
|
||||
|
||||
*overlaps_a
|
||||
= conflict_fn (1,
|
||||
affine_fn_univar (build_int_cst (NULL_TREE, x1),
|
||||
1,
|
||||
build_int_cst (NULL_TREE, i1)));
|
||||
*overlaps_b
|
||||
= conflict_fn (1,
|
||||
affine_fn_univar (build_int_cst (NULL_TREE, y1),
|
||||
1,
|
||||
build_int_cst (NULL_TREE, j1)));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* FIXME: For the moment, the upper bound of the
|
||||
iteration domain for i and j is not checked. */
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
|
||||
*overlaps_a = conflict_fn_not_known ();
|
||||
*overlaps_b = conflict_fn_not_known ();
|
||||
*last_conflicts = chrec_dont_know;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -2264,7 +2225,6 @@ analyze_subscript_affine_affine (tree chrec_a,
|
|||
*last_conflicts = chrec_dont_know;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
|
|
Loading…
Reference in New Issue