143 lines
2.3 KiB
C
143 lines
2.3 KiB
C
#pragma acc routine
|
|
template <typename T> T
|
|
accDouble(int val)
|
|
{
|
|
return val * 2;
|
|
}
|
|
|
|
template<typename T, int I> T
|
|
oacc_parallel_copy (T a)
|
|
{
|
|
T b = 0;
|
|
char w = 1;
|
|
int x = 2;
|
|
float y = 3;
|
|
double z = 4;
|
|
|
|
#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a)
|
|
#pragma acc loop gang worker vector
|
|
for (int i = 0; i < 1; i++)
|
|
b = a;
|
|
|
|
#pragma acc parallel num_gangs (a) copy (w, x, y, z)
|
|
#pragma acc loop
|
|
for (int i = 0; i < 1; i++)
|
|
{
|
|
w = accDouble<char>(w);
|
|
x = accDouble<int>(x);
|
|
y = accDouble<float>(y);
|
|
z = accDouble<double>(z);
|
|
}
|
|
|
|
#pragma acc parallel num_gangs (a) if (1)
|
|
{
|
|
#pragma acc loop independent collapse (2) gang
|
|
for (int i = 0; i < a; i++)
|
|
for (int j = 0; j < 5; j++)
|
|
b = a;
|
|
|
|
#pragma acc loop auto tile (I, 3)
|
|
for (int i = 0; i < a; i++)
|
|
for (int j = 0; j < 5; j++)
|
|
b = a;
|
|
|
|
#pragma acc loop seq
|
|
for (int i = 0; i < a; i++)
|
|
b = a;
|
|
}
|
|
|
|
T c;
|
|
|
|
#pragma acc parallel num_workers (10)
|
|
#pragma acc loop worker
|
|
for (int i = 0; i < 1; i++)
|
|
{
|
|
#pragma acc atomic capture
|
|
c = b++;
|
|
|
|
#pragma atomic update
|
|
c++;
|
|
|
|
#pragma acc atomic read
|
|
b = a;
|
|
|
|
#pragma acc atomic write
|
|
b = a;
|
|
}
|
|
|
|
#pragma acc parallel reduction (+:c)
|
|
c = 1;
|
|
|
|
#pragma acc data if (1) copy (b)
|
|
{
|
|
#pragma acc parallel
|
|
{
|
|
b = a;
|
|
}
|
|
}
|
|
|
|
#pragma acc enter data copyin (b)
|
|
#pragma acc parallel present (b)
|
|
{
|
|
b = a;
|
|
}
|
|
|
|
#pragma acc update host (b)
|
|
#pragma acc update self (b)
|
|
#pragma acc update device (b)
|
|
#pragma acc exit data delete (b)
|
|
|
|
return b;
|
|
}
|
|
|
|
template<typename T> T
|
|
oacc_kernels_copy (T a)
|
|
{
|
|
T b = 0;
|
|
T c = 0;
|
|
char w = 1;
|
|
int x = 2;
|
|
float y = 3;
|
|
double z = 4;
|
|
|
|
#pragma acc kernels copy (w, x, y, z)
|
|
{
|
|
w = accDouble<char>(w);
|
|
x = accDouble<int>(x);
|
|
y = accDouble<float>(y);
|
|
z = accDouble<double>(z);
|
|
}
|
|
|
|
#pragma acc kernels copyout (b) copyin (a)
|
|
b = a;
|
|
|
|
#pragma acc kernels loop reduction (+:c)
|
|
for (int i = 0; i < 10; i++)
|
|
c = 1;
|
|
|
|
#pragma acc data if (1) copy (b)
|
|
{
|
|
#pragma acc kernels
|
|
{
|
|
b = a;
|
|
}
|
|
}
|
|
|
|
#pragma acc enter data copyin (b)
|
|
#pragma acc kernels present (b)
|
|
{
|
|
b = a;
|
|
}
|
|
|
|
return b;
|
|
}
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int b = oacc_parallel_copy<int, 4> (5);
|
|
int c = oacc_kernels_copy<int> (5);
|
|
|
|
return b + c;
|
|
}
|