Add oacc kernels tests in goacc
2016-01-18 Tom de Vries <tom@codesourcery.com> * c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test. * c-c++-common/goacc/kernels-double-reduction.c: New test. * c-c++-common/goacc/kernels-empty.c: New test. * c-c++-common/goacc/kernels-eternal.c: New test. * c-c++-common/goacc/kernels-loop-2.c: New test. * c-c++-common/goacc/kernels-loop-3.c: New test. * c-c++-common/goacc/kernels-loop-data-2.c: New test. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test. * c-c++-common/goacc/kernels-loop-data-update.c: New test. * c-c++-common/goacc/kernels-loop-data.c: New test. * c-c++-common/goacc/kernels-loop-g.c: New test. * c-c++-common/goacc/kernels-loop-mod-not-zero.c: New test. * c-c++-common/goacc/kernels-loop-n.c: New test. * c-c++-common/goacc/kernels-loop-nest.c: New test. * c-c++-common/goacc/kernels-loop.c: New test. * c-c++-common/goacc/kernels-noreturn.c: New test. * c-c++-common/goacc/kernels-one-counter-var.c: New test. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New test. * c-c++-common/goacc/kernels-reduction.c: New test. From-SVN: r232514
This commit is contained in:
parent
a98d464768
commit
40e26f946c
@ -1,3 +1,26 @@
|
||||
2016-01-18 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test.
|
||||
* c-c++-common/goacc/kernels-double-reduction.c: New test.
|
||||
* c-c++-common/goacc/kernels-empty.c: New test.
|
||||
* c-c++-common/goacc/kernels-eternal.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-2.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-3.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-data-2.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-data-update.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-data.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-g.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-mod-not-zero.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-n.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop-nest.c: New test.
|
||||
* c-c++-common/goacc/kernels-loop.c: New test.
|
||||
* c-c++-common/goacc/kernels-noreturn.c: New test.
|
||||
* c-c++-common/goacc/kernels-one-counter-var.c: New test.
|
||||
* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New test.
|
||||
* c-c++-common/goacc/kernels-reduction.c: New test.
|
||||
|
||||
2016-01-18 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
* gcc.dg/autopar/outer-1.c: Update for new parloops instantiation.
|
||||
|
@ -0,0 +1,54 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
COUNTERTYPE i;
|
||||
COUNTERTYPE ii;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
|
||||
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
for (ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
@ -0,0 +1,37 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 500
|
||||
|
||||
unsigned int a[N][N];
|
||||
|
||||
void __attribute__((noinline,noclone))
|
||||
foo (unsigned int n)
|
||||
{
|
||||
int i, j;
|
||||
unsigned int sum = 1;
|
||||
|
||||
#pragma acc kernels copyin (a[0:n]) copy (sum)
|
||||
{
|
||||
for (i = 0; i < n; ++i)
|
||||
for (j = 0; j < n; ++j)
|
||||
sum += a[i][j];
|
||||
}
|
||||
|
||||
if (sum != 5001)
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
37
gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
Normal file
37
gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
Normal file
@ -0,0 +1,37 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 500
|
||||
|
||||
unsigned int a[N][N];
|
||||
|
||||
void __attribute__((noinline,noclone))
|
||||
foo (void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned int sum = 1;
|
||||
|
||||
#pragma acc kernels copyin (a[0:N]) copy (sum)
|
||||
{
|
||||
for (i = 0; i < N; ++i)
|
||||
for (j = 0; j < N; ++j)
|
||||
sum += a[i][j];
|
||||
}
|
||||
|
||||
if (sum != 5001)
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
6
gcc/testsuite/c-c++-common/goacc/kernels-empty.c
Normal file
6
gcc/testsuite/c-c++-common/goacc/kernels-empty.c
Normal file
@ -0,0 +1,6 @@
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
;
|
||||
}
|
11
gcc/testsuite/c-c++-common/goacc/kernels-eternal.c
Normal file
11
gcc/testsuite/c-c++-common/goacc/kernels-eternal.c
Normal file
@ -0,0 +1,11 @@
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
#pragma acc kernels
|
||||
{
|
||||
while (1)
|
||||
;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
70
gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
Normal file
70
gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
Normal file
@ -0,0 +1,70 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
#pragma acc kernels copyout (a[0:N])
|
||||
{
|
||||
#ifdef ACC_LOOP
|
||||
#pragma acc loop
|
||||
#endif
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
}
|
||||
|
||||
#pragma acc kernels copyout (b[0:N])
|
||||
{
|
||||
#ifdef ACC_LOOP
|
||||
#pragma acc loop
|
||||
#endif
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
}
|
||||
|
||||
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
#ifdef ACC_LOOP
|
||||
#pragma acc loop
|
||||
#endif
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only three loops are analyzed, and that all can be
|
||||
parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops1" } } */
|
49
gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
Normal file
49
gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
Normal file
@ -0,0 +1,49 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
unsigned int *__restrict c;
|
||||
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
c[i] = i * 2;
|
||||
|
||||
#pragma acc kernels copy (c[0:N])
|
||||
{
|
||||
#ifdef ACC_LOOP
|
||||
#pragma acc loop
|
||||
#endif
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = c[ii] + ii + 1;
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != i * 2 + i + 1)
|
||||
abort ();
|
||||
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
||||
|
17
gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
Normal file
17
gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-g" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include "kernels-loop.c"
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
||||
|
52
gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
Normal file
52
gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
Normal file
@ -0,0 +1,52 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N ((1024 * 512) + 1)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
|
||||
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
56
gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
Normal file
56
gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N ((1024 * 512) + 1)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
foo (COUNTERTYPE n)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
|
||||
b = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
|
||||
c = (unsigned int *__restrict)malloc (n * sizeof (unsigned int));
|
||||
|
||||
for (COUNTERTYPE i = 0; i < n; i++)
|
||||
a[i] = i * 2;
|
||||
|
||||
for (COUNTERTYPE i = 0; i < n; i++)
|
||||
b[i] = i * 4;
|
||||
|
||||
#pragma acc kernels copyin (a[0:n], b[0:n]) copyout (c[0:n])
|
||||
{
|
||||
#ifdef ACC_LOOP
|
||||
#pragma acc loop
|
||||
#endif
|
||||
for (COUNTERTYPE ii = 0; ii < n; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < n; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
||||
|
39
gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
Normal file
39
gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
Normal file
@ -0,0 +1,39 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
/* Based on autopar/outer-1.c. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 1000
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int x[N][N];
|
||||
|
||||
#pragma acc kernels copyout (x)
|
||||
{
|
||||
for (int ii = 0; ii < N; ii++)
|
||||
for (int jj = 0; jj < N; jj++)
|
||||
x[ii][jj] = ii + jj + 3;
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
if (x[i][j] != i + j + 3)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
56
gcc/testsuite/c-c++-common/goacc/kernels-loop.c
Normal file
56
gcc/testsuite/c-c++-common/goacc/kernels-loop.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
|
||||
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
#ifdef ACC_LOOP
|
||||
#pragma acc loop
|
||||
#endif
|
||||
for (COUNTERTYPE ii = 0; ii < N; ii++)
|
||||
c[ii] = a[ii] + b[ii];
|
||||
}
|
||||
|
||||
for (COUNTERTYPE i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
||||
|
12
gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c
Normal file
12
gcc/testsuite/c-c++-common/goacc/kernels-noreturn.c
Normal file
@ -0,0 +1,12 @@
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
|
||||
#pragma acc kernels
|
||||
{
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
54
gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
Normal file
54
gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N (1024 * 512)
|
||||
#define COUNTERTYPE unsigned int
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
unsigned int *__restrict a;
|
||||
unsigned int *__restrict b;
|
||||
unsigned int *__restrict c;
|
||||
COUNTERTYPE i;
|
||||
|
||||
a = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
b = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
c = (unsigned int *)malloc (N * sizeof (unsigned int));
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = i * 2;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
b[i] = i * 4;
|
||||
|
||||
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
|
||||
{
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
||||
|
36
gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
Normal file
36
gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
Normal file
@ -0,0 +1,36 @@
|
||||
/* { dg-additional-options "-O2" } */
|
||||
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
|
||||
/* { dg-additional-options "-fdump-tree-parloops1-all" } */
|
||||
/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define n 10000
|
||||
|
||||
unsigned int a[n];
|
||||
|
||||
void __attribute__((noinline,noclone))
|
||||
foo (void)
|
||||
{
|
||||
int i;
|
||||
unsigned int sum = 1;
|
||||
|
||||
#pragma acc kernels copyin (a[0:n]) copy (sum)
|
||||
{
|
||||
for (i = 0; i < n; ++i)
|
||||
sum += a[i];
|
||||
}
|
||||
|
||||
if (sum != 5001)
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Check that only one loop is analyzed, and that it can be parallelized. */
|
||||
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
|
||||
|
||||
/* Check that the loop has been split off into a function. */
|
||||
/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops1" } } */
|
||||
|
Loading…
Reference in New Issue
Block a user