2620c80db0
gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: Clean up dg-* directives. * c-c++-common/goacc/loop-clauses.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/combined-directives.f90: Likewise. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-5.f95: Likewise. * gfortran.dg/goacc/loop-6.f95: Likewise. * gfortran.dg/goacc/loop-tree-1.f90: Likewise. * c-c++-common/goacc-gomp/nesting-1.c: Update. * c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise. * c-c++-common/goacc/clauses-fail.c: Likewise. * c-c++-common/goacc/parallel-1.c: Likewise. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. * c-c++-common/goacc/routine-3.c: Likewise. * c-c++-common/goacc/routine-4.c: Likewise. * c-c++-common/goacc/routine-5.c: Likewise. * c-c++-common/goacc/tile.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/combined-directives.f90: Likewise. * c-c++-common/goacc/nesting-1.c: Move dg-error test cases into... * c-c++-common/goacc/nesting-fail-1.c: ... this file. Update. * c-c++-common/goacc/kernels-1.c: Update. Incorporate... * c-c++-common/goacc/kernels-empty.c: ... this file, and... * c-c++-common/goacc/kernels-eternal.c: ... this file, and... * c-c++-common/goacc/kernels-noreturn.c: ... this file. * c-c++-common/goacc/host_data-1.c: New file. Incorporate... * c-c++-common/goacc/use_device-1.c: ... this file. * c-c++-common/goacc/host_data-2.c: New file. Incorporate... * c-c++-common/goacc/host_data-5.c: ... this file, and... * c-c++-common/goacc/host_data-6.c: ... this file. * c-c++-common/goacc/loop-2-kernels.c: New file. * c-c++-common/goacc/loop-2-parallel.c: Likewise. * c-c++-common/goacc/loop-3.c: Likewise. * g++.dg/goacc/reference.C: Likewise. * g++.dg/goacc/routine-1.C: Likewise. * g++.dg/goacc/routine-2.C: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Update. * testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/vector-loop.c: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise. XFAIL. * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update. Incorporate... * testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c: ... this file. * testsuite/libgomp.oacc-c++/template-reduction.C: New file. * testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/private-variables.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise. * testsuite/libgomp.oacc-fortran/clauses-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/default-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/firstprivate-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/gang-static-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/if-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90: Likewise. * testsuite/libgomp.oacc-fortran/pr68813.f90: Likewise. * testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Merge this file... * testsuite/libgomp.oacc-c-c++-common/parallel-1.c: ..., and this file into... * testsuite/libgomp.oacc-c-c++-common/data-clauses.h: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c: New file. * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-2.c: Rename to... * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/parallel-2.c: Rename to... * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: New file. Incorporate... * testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: ... this file. * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Remove file. Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com> Co-Authored-By: Chung-Lin Tang <cltang@codesourcery.com> Co-Authored-By: James Norris <jnorris@codesourcery.com> Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Nathan Sidwell <nathan@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r234575
954 lines
19 KiB
C
954 lines
19 KiB
C
#include <assert.h>
|
|
#include <openacc.h>
|
|
|
|
typedef struct {
|
|
int x, y;
|
|
} vec2;
|
|
|
|
typedef struct {
|
|
int x, y, z;
|
|
int attr[13];
|
|
} vec3_attr;
|
|
|
|
|
|
/* Test of gang-private variables declared in local scope with parallel
|
|
directive. */
|
|
|
|
void local_g_1()
|
|
{
|
|
int i, arr[32];
|
|
|
|
for (i = 0; i < 32; i++)
|
|
arr[i] = 3;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
int x;
|
|
|
|
#pragma acc loop gang(static:1)
|
|
for (i = 0; i < 32; i++)
|
|
x = i * 2;
|
|
|
|
#pragma acc loop gang(static:1)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
if (acc_on_device (acc_device_host))
|
|
x = i * 2;
|
|
arr[i] += x;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
assert (arr[i] == 3 + i * 2);
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared in a local scope, broadcasting
|
|
to vector-partitioned mode. Back-to-back worker loops. */
|
|
|
|
void local_w_1()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
int x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
int x = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared in a local scope, broadcasting
|
|
to vector-partitioned mode. Successive vector loops. */
|
|
|
|
void local_w_2()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
int x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
|
|
x = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared in a local scope, broadcasting
|
|
to vector-partitioned mode. Aggregate worker variable. */
|
|
|
|
void local_w_3()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
vec2 pt;
|
|
|
|
pt.x = i ^ j * 3;
|
|
pt.y = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt.x * k;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared in a local scope, broadcasting
|
|
to vector-partitioned mode. Addressable worker variable. */
|
|
|
|
void local_w_4()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
vec2 pt, *ptp;
|
|
|
|
ptp = &pt;
|
|
|
|
pt.x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += ptp->x * k;
|
|
|
|
ptp->y = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared in a local scope, broadcasting
|
|
to vector-partitioned mode. Array worker variable. */
|
|
|
|
void local_w_5()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
int pt[2];
|
|
|
|
pt[0] = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
|
|
|
pt[1] = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of gang-private variables declared on loop directive. */
|
|
|
|
void loop_g_1()
|
|
{
|
|
int x = 5, i, arr[32];
|
|
|
|
for (i = 0; i < 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang private(x)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
x = i * 2;
|
|
arr[i] += x;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
assert (arr[i] == i * 3);
|
|
}
|
|
|
|
|
|
/* Test of gang-private variables declared on loop directive, with broadcasting
|
|
to partitioned workers. */
|
|
|
|
void loop_g_2()
|
|
{
|
|
int x = 5, i, arr[32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang private(x)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
x = i * 2;
|
|
|
|
#pragma acc loop worker
|
|
for (int j = 0; j < 32; j++)
|
|
arr[i * 32 + j] += x;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + (i / 32) * 2);
|
|
}
|
|
|
|
|
|
/* Test of gang-private variables declared on loop directive, with broadcasting
|
|
to partitioned vectors. */
|
|
|
|
void loop_g_3()
|
|
{
|
|
int x = 5, i, arr[32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang private(x)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
x = i * 2;
|
|
|
|
#pragma acc loop vector
|
|
for (int j = 0; j < 32; j++)
|
|
arr[i * 32 + j] += x;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + (i / 32) * 2);
|
|
}
|
|
|
|
|
|
/* Test of gang-private addressable variable declared on loop directive, with
|
|
broadcasting to partitioned workers. */
|
|
|
|
void loop_g_4()
|
|
{
|
|
int x = 5, i, arr[32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang private(x)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
int *p = &x;
|
|
|
|
x = i * 2;
|
|
|
|
#pragma acc loop worker
|
|
for (int j = 0; j < 32; j++)
|
|
arr[i * 32 + j] += x;
|
|
|
|
(*p)--;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + (i / 32) * 2);
|
|
}
|
|
|
|
|
|
/* Test of gang-private array variable declared on loop directive, with
|
|
broadcasting to partitioned workers. */
|
|
|
|
void loop_g_5()
|
|
{
|
|
int x[8], i, arr[32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang private(x)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
for (int j = 0; j < 8; j++)
|
|
x[j] = j * 2;
|
|
|
|
#pragma acc loop worker
|
|
for (int j = 0; j < 32; j++)
|
|
arr[i * 32 + j] += x[j % 8];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + (i % 8) * 2);
|
|
}
|
|
|
|
|
|
/* Test of gang-private aggregate variable declared on loop directive, with
|
|
broadcasting to partitioned workers. */
|
|
|
|
void loop_g_6()
|
|
{
|
|
int i, arr[32 * 32];
|
|
vec3_attr pt;
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang private(pt)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
pt.x = i;
|
|
pt.y = i * 2;
|
|
pt.z = i * 4;
|
|
pt.attr[5] = i * 6;
|
|
|
|
#pragma acc loop worker
|
|
for (int j = 0; j < 32; j++)
|
|
arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + (i / 32) * 13);
|
|
}
|
|
|
|
|
|
/* Test of vector-private variables declared on loop directive. */
|
|
|
|
void loop_v_1()
|
|
{
|
|
int x, i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
|
|
#pragma acc loop vector private(x)
|
|
for (k = 0; k < 32; k++)
|
|
{
|
|
x = i ^ j * 3;
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
|
|
#pragma acc loop vector private(x)
|
|
for (k = 0; k < 32; k++)
|
|
{
|
|
x = i | j * 5;
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of vector-private variables declared on loop directive. Array type. */
|
|
|
|
void loop_v_2()
|
|
{
|
|
int pt[2], i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
|
|
#pragma acc loop vector private(pt)
|
|
for (k = 0; k < 32; k++)
|
|
{
|
|
pt[0] = i ^ j * 3;
|
|
pt[1] = i | j * 5;
|
|
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
|
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on a loop directive. */
|
|
|
|
void loop_w_1()
|
|
{
|
|
int x = 5, i, arr[32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker private(x)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
x = i ^ j * 3;
|
|
/* Try to ensure 'x' accesses doesn't get optimized into a
|
|
temporary. */
|
|
__asm__ __volatile__ ("");
|
|
arr[i * 32 + j] += x;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on a loop directive, broadcasting
|
|
to vector-partitioned mode. */
|
|
|
|
void loop_w_2()
|
|
{
|
|
int x = 5, i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker private(x)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on a loop directive, broadcasting
|
|
to vector-partitioned mode. Back-to-back worker loops. */
|
|
|
|
void loop_w_3()
|
|
{
|
|
int x = 5, i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker private(x)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
|
|
#pragma acc loop worker private(x)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
x = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on a loop directive, broadcasting
|
|
to vector-partitioned mode. Successive vector loops. */
|
|
|
|
void loop_w_4()
|
|
{
|
|
int x = 5, i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker private(x)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
|
|
x = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on a loop directive, broadcasting
|
|
to vector-partitioned mode. Addressable worker variable. */
|
|
|
|
void loop_w_5()
|
|
{
|
|
int x = 5, i, arr[32 * 32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker private(x)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
int *p = &x;
|
|
|
|
x = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
|
|
*p = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += x * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on a loop directive, broadcasting
|
|
to vector-partitioned mode. Aggregate worker variable. */
|
|
|
|
void loop_w_6()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
vec2 pt;
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
#pragma acc loop worker private(pt)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
|
|
pt.x = i ^ j * 3;
|
|
pt.y = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt.x * k;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt.y * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of worker-private variables declared on loop directive, broadcasting
|
|
to vector-partitioned mode. Array worker variable. */
|
|
|
|
void loop_w_7()
|
|
{
|
|
int i, arr[32 * 32 * 32];
|
|
int pt[2];
|
|
|
|
for (i = 0; i < 32 * 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
/* "pt" is treated as "present_or_copy" on the parallel directive because it
|
|
is an array variable. */
|
|
#pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
|
|
{
|
|
int j;
|
|
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
/* But here, it is made private per-worker. */
|
|
#pragma acc loop worker private(pt)
|
|
for (j = 0; j < 32; j++)
|
|
{
|
|
int k;
|
|
|
|
pt[0] = i ^ j * 3;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt[0] * k;
|
|
|
|
pt[1] = i | j * 5;
|
|
|
|
#pragma acc loop vector
|
|
for (k = 0; k < 32; k++)
|
|
arr[i * 1024 + j * 32 + k] += pt[1] * k;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
for (int j = 0; j < 32; j++)
|
|
for (int k = 0; k < 32; k++)
|
|
{
|
|
int idx = i * 1024 + j * 32 + k;
|
|
assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
|
|
}
|
|
}
|
|
|
|
|
|
/* Test of gang-private variables declared on the parallel directive. */
|
|
|
|
void parallel_g_1()
|
|
{
|
|
int x = 5, i, arr[32];
|
|
|
|
for (i = 0; i < 32; i++)
|
|
arr[i] = 3;
|
|
|
|
#pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
|
|
{
|
|
#pragma acc loop gang(static:1)
|
|
for (i = 0; i < 32; i++)
|
|
x = i * 2;
|
|
|
|
#pragma acc loop gang(static:1)
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
if (acc_on_device (acc_device_host))
|
|
x = i * 2;
|
|
arr[i] += x;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32; i++)
|
|
assert (arr[i] == 3 + i * 2);
|
|
}
|
|
|
|
|
|
/* Test of gang-private array variable declared on the parallel directive. */
|
|
|
|
void parallel_g_2()
|
|
{
|
|
int x[32], i, arr[32 * 32];
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
arr[i] = i;
|
|
|
|
#pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32)
|
|
{
|
|
#pragma acc loop gang
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
int j;
|
|
for (j = 0; j < 32; j++)
|
|
x[j] = j * 2;
|
|
|
|
#pragma acc loop worker
|
|
for (j = 0; j < 32; j++)
|
|
arr[i * 32 + j] += x[31 - j];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 32 * 32; i++)
|
|
assert (arr[i] == i + (31 - (i % 32)) * 2);
|
|
}
|
|
|
|
|
|
int main ()
|
|
{
|
|
local_g_1();
|
|
local_w_1();
|
|
local_w_2();
|
|
local_w_3();
|
|
local_w_4();
|
|
local_w_5();
|
|
loop_g_1();
|
|
loop_g_2();
|
|
loop_g_3();
|
|
loop_g_4();
|
|
loop_g_5();
|
|
loop_g_6();
|
|
loop_v_1();
|
|
loop_v_2();
|
|
loop_w_1();
|
|
loop_w_2();
|
|
loop_w_3();
|
|
loop_w_4();
|
|
loop_w_5();
|
|
loop_w_6();
|
|
loop_w_7();
|
|
parallel_g_1();
|
|
parallel_g_2();
|
|
|
|
return 0;
|
|
}
|