#pragma acc routine template T accDouble(int val) { return val * 2; } template T oacc_parallel_copy (T a) { T b = 0; char w = 1; int x = 2; float y = 3; double z = 4; #pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a) #pragma acc loop gang worker vector for (int i = 0; i < 1; i++) b = a; #pragma acc parallel num_gangs (a) copy (w, x, y, z) #pragma acc loop for (int i = 0; i < 1; i++) { w = accDouble(w); x = accDouble(x); y = accDouble(y); z = accDouble(z); } #pragma acc parallel num_gangs (a) if (1) { #pragma acc loop independent collapse (2) gang for (int i = 0; i < a; i++) for (int j = 0; j < 5; j++) b = a; #pragma acc loop auto tile (I, 3) for (int i = 0; i < a; i++) for (int j = 0; j < 5; j++) b = a; #pragma acc loop seq for (int i = 0; i < a; i++) b = a; } T c; #pragma acc parallel num_workers (10) #pragma acc loop worker for (int i = 0; i < 1; i++) { #pragma acc atomic capture c = b++; #pragma atomic update c++; #pragma acc atomic read b = a; #pragma acc atomic write b = a; } #pragma acc parallel reduction (+:c) c = 1; #pragma acc data if (1) copy (b) { #pragma acc parallel { b = a; } } #pragma acc enter data copyin (b) #pragma acc parallel present (b) { b = a; } #pragma acc update host (b) #pragma acc update self (b) #pragma acc update device (b) #pragma acc exit data delete (b) return b; } template T oacc_kernels_copy (T a) { T b = 0; T c = 0; char w = 1; int x = 2; float y = 3; double z = 4; #pragma acc kernels copy (w, x, y, z) { w = accDouble(w); x = accDouble(x); y = accDouble(y); z = accDouble(z); } #pragma acc kernels copyout (b) copyin (a) b = a; #pragma acc kernels loop reduction (+:c) for (int i = 0; i < 10; i++) c = 1; #pragma acc data if (1) copy (b) { #pragma acc kernels { b = a; } } #pragma acc enter data copyin (b) #pragma acc kernels present (b) { b = a; } return b; } int main () { int b = oacc_parallel_copy (5); int c = oacc_kernels_copy (5); return b + c; }