gcc/libgomp/testsuite/libgomp.c/loop-19.c
Jakub Jelinek 5acef69f9d openmp: Optimize triangular loop logical iterator to actual iterators computation using search for quadratic equation root(s)
This patch implements the optimized logical to actual iterators
computation for triangular loops.

I have a rough implementation using integers, but this one uses floating
point.  There is a small problem that -fopenmp programs aren't linked with
-lm, so it does it only if the hw has sqrt optab (and uses ifn rather than
__builtin_sqrt because it obviously doesn't need errno handling etc.).

Do you think it is ok this way, or should I use the integral computation
using inlined isqrt (we have inequation of the form
start >= x * t10 + t11 * (((x - 1) * x) / 2)
where t10 and t11 are signed long long values and start unsigned long long,
and the division by 2 actually is a problem for accuracy in some cases, so
if we do it in integral, we need to do actually
      long long t12 = 2 * t10 - t11;
      unsigned long long t13 = t12 * t12 + start * 8 * t11;
      unsigned long long isqrt_ = isqrtull (t13);
      long long x = (((long long) isqrt_ - t12) / t11) >> 1;
with careful overflow checking on all the computations before isqrtull
(and on overflows use the fallback implementation).

2020-07-09  Jakub Jelinek  <jakub@redhat.com>

	* omp-general.h (struct omp_for_data): Add min_inner_iterations
	and factor members.
	* omp-general.c (omp_extract_for_data): Initialize them and remember
	them in OMP_CLAUSE_COLLAPSE_COUNT if needed and restore from there.
	* omp-expand.c (expand_omp_for_init_counts): Fix up computation of
	counts[fd->last_nonrect] if fd->loop.n2 is INTEGER_CST.
	(expand_omp_for_init_vars): For
	fd->first_nonrect + 1 == fd->last_nonrect loops with for now
	INTEGER_CST fd->loop.n2 find quadratic equation roots instead of
	using fallback method when possible.

	* testsuite/libgomp.c/loop-19.c: New test.
	* testsuite/libgomp.c/loop-20.c: New test.
2020-07-09 12:07:17 +02:00

87 lines
2.4 KiB
C

/* { dg-do run } */
extern void abort (void);
int x, i, j;
volatile int a, b, c, d, e, f, g, h;
int k[16][67];
int
main ()
{
int niters;
for (i = 0; i < 16; i++)
for (j = i * 2 + 1; j < 4 * i + 3; j++)
k[i][j] = 1;
a = 0; b = 16; c = 1; d = 2; e = 1; f = 4; g = 3; h = 1;
niters = 0; i = -100; j = -100; x = -100;
#pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
for (i = 0; i < 16; i++)
for (j = i * 2 + 1; j < 4 * i + 3; j++)
{
if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 3 + i * 4 || k[i][j] != 1)
abort ();
k[i][j]++;
x = i * 1024 + (j & 1023);
niters++;
}
if (i != 16 || j != 63 || x != 15422 || niters != 272)
abort ();
niters = 0; i = -100; j = -100; x = -100;
#pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
for (i = a; i < b; i += c)
for (j = d * i + e; j < g + i * f; j += h)
{
if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 3 + i * 4 || k[i][j] != 2)
abort ();
k[i][j]++;
x = i * 1024 + (j & 1023);
niters++;
}
if (i != 16 || j != 63 || x != 15422 || niters != 272)
abort ();
for (i = 0; i < 16; i++)
for (j = i * 2 + 1; j < 4 * i + 3; j++)
if (k[i][j] == 3)
k[i][j] = 0;
else
abort ();
for (i = 0; i < 16; i++)
for (j = i * 2 + 1; j < 2 * i + 7; j++)
k[i][j] = 1;
a = 0; b = 16; c = 1; d = 2; e = 1; f = 2; g = 7; h = 1;
niters = 0; i = -100; j = -100; x = -100;
#pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
for (i = 0; i < 16; i++)
for (j = i * 2 + 1; j < 2 * i + 7; j++)
{
if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 7 + i * 2 || k[i][j] != 1)
abort ();
k[i][j]++;
x = i * 1024 + (j & 1023);
niters++;
}
if (i != 16 || j != 37 || x != 15396 || niters != 96)
abort ();
niters = 0; i = -100; j = -100; x = -100;
#pragma omp parallel for collapse(2) lastprivate (i, j, x) reduction(+:niters)
for (i = a; i < b; i += c)
for (j = d * i + e; j < g + i * f; j += h)
{
if (i < 0 || i >= 16 || j < 2 * i + 1 || j >= 7 + i * 2 || k[i][j] != 2)
abort ();
k[i][j]++;
x = i * 1024 + (j & 1023);
niters++;
}
if (i != 16 || j != 37 || x != 15396 || niters != 96)
abort ();
for (i = 0; i < 16; i++)
for (j = i * 2 + 1; j < 2 * i + 7; j++)
if (k[i][j] == 3)
k[i][j] = 0;
else
abort ();
return 0;
}