gcc/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
Thomas Schwinge 22cff118f7 Add '-Wopenacc-parallelism'
... to diagnose potentially suboptimal choices regarding OpenACC parallelism.

Not enabled by default: too noisy ("*potentially* suboptimal choices"); see
XFAILed 'dg-bogus'es.

	gcc/c-family/
	* c.opt (Wopenacc-parallelism): New.
	gcc/fortran/
	* lang.opt (Wopenacc-parallelism): New.
	gcc/
	* omp-offload.c (oacc_validate_dims): Implement
	'-Wopenacc-parallelism'.
	* doc/invoke.texi (-Wopenacc-parallelism): Document.
	gcc/testsuite/
	* c-c++-common/goacc/diag-parallelism-1.c: New.
	* c-c++-common/goacc/acc-icf.c: Specify '-Wopenacc-parallelism',
	and match diagnostics, as appropriate.
	* c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise.
	* c-c++-common/goacc/classify-kernels.c: Likewise.
	* c-c++-common/goacc/classify-parallel.c: Likewise.
	* c-c++-common/goacc/classify-routine.c: Likewise.
	* c-c++-common/goacc/classify-serial.c: Likewise.
	* c-c++-common/goacc/kernels-decompose-1.c: Likewise.
	* c-c++-common/goacc/kernels-decompose-2.c: Likewise.
	* c-c++-common/goacc/parallel-dims-1.c: Likewise.
	* c-c++-common/goacc/parallel-reduction.c: Likewise.
	* c-c++-common/goacc/pr70688.c: Likewise.
	* c-c++-common/goacc/routine-1.c: Likewise.
	* c-c++-common/goacc/routine-level-of-parallelism-2.c: Likewise.
	* c-c++-common/goacc/uninit-dim-clause.c: Likewise.
	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
	* gfortran.dg/goacc/classify-parallel.f95: Likewise.
	* gfortran.dg/goacc/classify-routine.f95: Likewise.
	* gfortran.dg/goacc/classify-serial.f95: Likewise.
	* gfortran.dg/goacc/kernels-decompose-1.f95: Likewise.
	* gfortran.dg/goacc/kernels-decompose-2.f95: Likewise.
	* gfortran.dg/goacc/parallel-tree.f95: Likewise.
	* gfortran.dg/goacc/routine-4.f90: Likewise.
	* gfortran.dg/goacc/routine-level-of-parallelism-1.f90: Likewise.
	* gfortran.dg/goacc/routine-module-mod-1.f90: Likewise.
	* gfortran.dg/goacc/routine-multiple-directives-1.f90: Likewise.
	* gfortran.dg/goacc/uninit-dim-clause.f95: Likewise.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Specify
	'-Wopenacc-parallelism', and match diagnostics, as appropriate.
	* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/mode-transitions.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/private-variables.c:
	Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/static-variable-1.c:
	Likewise.
	* testsuite/libgomp.oacc-fortran/optional-private.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/par-reduction-2-1.f: Likewise.
	* testsuite/libgomp.oacc-fortran/par-reduction-2-2.f: Likewise.
	* testsuite/libgomp.oacc-fortran/parallel-dims.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/pr84028.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/routine-7.f90: Likewise.

Co-Authored-By: Nathan Sidwell <nathan@codesourcery.com>
Co-Authored-By: Tom de Vries <vries@codesourcery.com>
Co-Authored-By: Julian Brown <julian@codesourcery.com>
Co-Authored-By: Kwok Cheung Yeung <kcy@codesourcery.com>
2021-04-26 12:32:00 +02:00

488 lines
10 KiB
C

/* Tests of reduction on loop directive. */
/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
aspects of that functionality. */
#include <assert.h>
/* Test of reduction on loop directive (gangs, non-private reduction
variable). */
void g_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */
{
#pragma acc loop gang reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
res = hres = 1;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */
{
#pragma acc loop gang reduction(*:res)
for (i = 0; i < 12; i++)
res *= arr[i];
}
for (i = 0; i < 12; i++)
hres *= arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs and vectors, non-private
reduction variable). */
void gv_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-1 } */
{
#pragma acc loop gang vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs and workers, non-private
reduction variable). */
void gw_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */
{
#pragma acc loop gang worker reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable). */
void gwv_np_1()
{
int i, arr[1024], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
}
for (i = 0; i < 1024; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable: separate gang and worker/vector loops). */
void gwv_np_2()
{
int i, j, arr[32768], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
}
/* "res" is non-private, and is not available until after the parallel
region. */
}
for (i = 0; i < 32768; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs, workers and vectors, non-private
reduction variable: separate gang and worker/vector loops). */
void gwv_np_3()
{
int i, j;
double arr[32768], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copyin(arr)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
}
}
for (i = 0; i < 32768; i++)
hres += arr[i];
assert (res == hres);
}
/* Test of reduction on loop directive (gangs, workers and vectors, multiple
non-private reduction variables, float type). */
void gwv_np_4()
{
int i, j;
float arr[32768];
float res = 0, mres = 0, hres = 0, hmres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i % (32768 / 64);
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res) reduction(max:mres)
for (j = 0; j < 32; j++)
{
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
for (i = 0; i < 1024; i++)
{
res += arr[j * 1024 + i];
if (arr[j * 1024 + i] > mres)
mres = arr[j * 1024 + i];
}
#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
for (i = 0; i < 1024; i++)
{
res += arr[j * 1024 + (1023 - i)];
if (arr[j * 1024 + (1023 - i)] > mres)
mres = arr[j * 1024 + (1023 - i)];
}
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 1024; i++)
{
hres += arr[j * 1024 + i];
hres += arr[j * 1024 + (1023 - i)];
if (arr[j * 1024 + i] > hmres)
hmres = arr[j * 1024 + i];
if (arr[j * 1024 + (1023 - i)] > hmres)
hmres = arr[j * 1024 + (1023 - i)];
}
assert (hres <= 16777216);
assert (res == hres);
assert (hmres <= 16777216);
assert (mres == hmres);
}
/* Test of reduction on loop directive (vectors, private reduction
variable). */
void v_p_1()
{
int i, j, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "" { target *-*-* } .-2 } */
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = 0;
#pragma acc loop vector reduction(+:res)
for (i = 0; i < 32; i++)
res += arr[j * 32 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = 0;
for (i = 0; i < 32; i++)
hres += arr[j * 32 + i];
assert (out[j] == hres);
}
}
/* Test of reduction on loop directive (vector reduction in
gang-partitioned/worker-partitioned mode, private reduction variable). */
void v_p_2()
{
int i, j, k;
double ina[1024], inb[1024], out[1024], acc;
for (j = 0; j < 32; j++)
for (i = 0; i < 32; i++)
{
ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
inb[j * 32 + i] = (double) (i + j);
}
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(acc) copyin(ina, inb) copyout(out)
{
#pragma acc loop gang worker
for (k = 0; k < 32; k++)
for (j = 0; j < 32; j++)
{
acc = 0;
#pragma acc loop vector reduction(+:acc)
for (i = 0; i < 32; i++)
acc += ina[k * 32 + i] * inb[i * 32 + j];
out[k * 32 + j] = acc;
}
}
for (j = 0; j < 32; j++)
for (i = 0; i < 32; i++)
assert (out[j * 32 + i] == (i + j) * 2);
}
/* Test of reduction on loop directive (workers, private reduction
variable). */
void w_p_1()
{
int i, j, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-2 } */
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = 0;
#pragma acc loop worker reduction(+:res)
for (i = 0; i < 32; i++)
res += arr[j * 32 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = 0;
for (i = 0; i < 32; i++)
hres += arr[j * 32 + i];
assert (out[j] == hres);
}
}
/* Test of reduction on loop directive (workers and vectors, private reduction
variable). */
void wv_p_1()
{
int i, j, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = 0;
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 32; i++)
res += arr[j * 32 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = 0;
for (i = 0; i < 32; i++)
hres += arr[j * 32 + i];
assert (out[j] == hres);
}
}
/* Test of reduction on loop directive (workers and vectors, private reduction
variable). */
void wv_p_2()
{
int i, j, arr[32768], out[32], res = 0, hres = 0;
for (i = 0; i < 32768; i++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyout(out)
{
#pragma acc loop gang
for (j = 0; j < 32; j++)
{
res = j;
#pragma acc loop worker reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[j * 1024 + i];
#pragma acc loop vector reduction(+:res)
for (i = 1023; i >= 0; i--)
res += arr[j * 1024 + i];
out[j] = res;
}
}
for (j = 0; j < 32; j++)
{
hres = j;
for (i = 0; i < 1024; i++)
hres += arr[j * 1024 + i] * 2;
assert (out[j] == hres);
}
}
/* Test of reduction on loop directive (workers and vectors, private reduction
variable: gang-redundant mode). */
void wv_p_3()
{
int i, arr[1024], out[32], res = 0, hres = 0;
for (i = 0; i < 1024; i++)
arr[i] = i ^ 33;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
private(res) copyin(arr) copyout(out)
{
/* Private variables aren't initialized by default in openacc. */
res = 0;
/* "res" should be available at the end of the following loop (and should
have the same value redundantly in each gang). */
#pragma acc loop worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
res += arr[i];
#pragma acc loop gang (static: 1)
for (i = 0; i < 32; i++)
out[i] = res;
}
for (i = 0; i < 1024; i++)
hres += arr[i];
for (i = 0; i < 32; i++)
assert (out[i] == hres);
}
int main()
{
g_np_1();
gv_np_1();
gw_np_1();
gwv_np_1();
gwv_np_2();
gwv_np_3();
gwv_np_4();
v_p_1();
v_p_2();
w_p_1();
wv_p_1();
wv_p_2();
wv_p_3();
return 0;
}