[nvptx] Enable setting vector length using -fopenacc-dim -- testcases
Add some test-cases that set vector length using -fopenacc-dim. 2019-01-12 Tom de Vries <tdevries@suse.de> * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test. * testsuite/libgomp.oacc-fortran/gemm-2.f90: New test. From-SVN: r267897
This commit is contained in:
parent
2c2ff1684d
commit
efb56ae82b
@ -1,3 +1,10 @@
|
||||
2019-01-12 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
|
||||
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test.
|
||||
* testsuite/libgomp.oacc-fortran/gemm-2.f90: New test.
|
||||
|
||||
2019-01-12 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
* plugin/plugin-nvptx.c (nvptx_exec): Update error message.
|
||||
|
52
libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
Normal file
52
libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
Normal file
@ -0,0 +1,52 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-additional-options "-fopenacc-dim=::128" } */
|
||||
|
||||
/* Minimized from ref-1.C. */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#pragma acc routine vector
|
||||
void __attribute__((noinline, noclone))
|
||||
Vector (int *ptr, int n, const int inc)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (unsigned ix = 0; ix < n; ix++)
|
||||
ptr[ix] += inc;
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
const int n = 32, m=32;
|
||||
|
||||
int ary[m][n];
|
||||
unsigned ix, iy;
|
||||
|
||||
for (ix = m; ix--;)
|
||||
for (iy = n; iy--;)
|
||||
ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
|
||||
|
||||
int err = 0;
|
||||
|
||||
#pragma acc parallel copy (ary)
|
||||
{
|
||||
Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
|
||||
}
|
||||
|
||||
for (ix = m; ix--;)
|
||||
for (iy = n; iy--;)
|
||||
if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
|
||||
{
|
||||
printf ("ary[%u][%u] = %x expected %x\n",
|
||||
ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
|
||||
err++;
|
||||
}
|
||||
|
||||
if (err)
|
||||
{
|
||||
printf ("%d failed\n", err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-additional-options "-fopenacc-dim=::128" } */
|
||||
/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
|
||||
/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 1024
|
||||
|
||||
unsigned int a[N];
|
||||
unsigned int b[N];
|
||||
unsigned int c[N];
|
||||
unsigned int n = N;
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
for (unsigned int i = 0; i < n; ++i)
|
||||
{
|
||||
a[i] = i % 3;
|
||||
b[i] = i % 5;
|
||||
}
|
||||
|
||||
#pragma acc parallel copyin (a,b) copyout (c)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (unsigned int i = 0; i < n; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < n; ++i)
|
||||
if (c[i] != (i % 3) + (i % 5))
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */
|
||||
/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */
|
@ -0,0 +1,41 @@
|
||||
/* { dg-do run { target openacc_nvidia_accel_selected } } */
|
||||
/* { dg-additional-options "-fopenacc-dim=:2:128" } */
|
||||
/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
|
||||
/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 1024
|
||||
|
||||
unsigned int a[N];
|
||||
unsigned int b[N];
|
||||
unsigned int c[N];
|
||||
unsigned int n = N;
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
for (unsigned int i = 0; i < n; ++i)
|
||||
{
|
||||
a[i] = i % 3;
|
||||
b[i] = i % 5;
|
||||
}
|
||||
|
||||
#pragma acc parallel copyin (a,b) copyout (c)
|
||||
{
|
||||
#pragma acc loop worker
|
||||
for (unsigned int i = 0; i < 4; i++)
|
||||
#pragma acc loop vector
|
||||
for (unsigned int j = 0; j < n / 4; j++)
|
||||
c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < n; ++i)
|
||||
if (c[i] != (i % 3) + (i % 5))
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */
|
||||
/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */
|
80
libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
Normal file
80
libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
Normal file
@ -0,0 +1,80 @@
|
||||
! Exercise three levels of parallelism using SGEMM from BLAS.
|
||||
|
||||
! { dg-do run }
|
||||
! { dg-additional-options "-fopenacc-dim=::128" }
|
||||
|
||||
! Implicitly set vector_length to 128 using -fopenacc-dim.
|
||||
subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
|
||||
integer :: m, n, k
|
||||
real :: alpha, beta
|
||||
real :: a(k,*), b(k,*), c(m,*)
|
||||
|
||||
integer :: i, j, l
|
||||
real :: temp
|
||||
|
||||
!$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp)
|
||||
do j = 1, n
|
||||
!$acc loop
|
||||
do i = 1, m
|
||||
temp = 0.0
|
||||
!$acc loop reduction(+:temp)
|
||||
do l = 1, k
|
||||
temp = temp + a(l,i)*b(l,j)
|
||||
end do
|
||||
if(beta == 0.0) then
|
||||
c(i,j) = alpha*temp
|
||||
else
|
||||
c(i,j) = alpha*temp + beta*c(i,j)
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end subroutine openacc_sgemm
|
||||
|
||||
subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
|
||||
integer :: m, n, k
|
||||
real :: alpha, beta
|
||||
real :: a(k,*), b(k,*), c(m,*)
|
||||
|
||||
integer :: i, j, l
|
||||
real :: temp
|
||||
|
||||
do j = 1, n
|
||||
do i = 1, m
|
||||
temp = 0.0
|
||||
do l = 1, k
|
||||
temp = temp + a(l,i)*b(l,j)
|
||||
end do
|
||||
if(beta == 0.0) then
|
||||
c(i,j) = alpha*temp
|
||||
else
|
||||
c(i,j) = alpha*temp + beta*c(i,j)
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
end subroutine host_sgemm
|
||||
|
||||
program main
|
||||
integer, parameter :: M = 100, N = 50, K = 2000
|
||||
real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
|
||||
real alpha, beta
|
||||
integer i, j
|
||||
|
||||
a(:,:) = 1.0
|
||||
b(:,:) = 0.25
|
||||
|
||||
c(:,:) = 0.0
|
||||
d(:,:) = 0.0
|
||||
e(:,:) = 0.0
|
||||
|
||||
alpha = 1.05
|
||||
beta = 1.25
|
||||
|
||||
call openacc_sgemm (M, N, K, alpha, a, b, beta, c)
|
||||
call host_sgemm (M, N, K, alpha, a, b, beta, e)
|
||||
|
||||
do i = 1, m
|
||||
do j = 1, n
|
||||
if (c(i,j) /= e(i,j)) call abort
|
||||
end do
|
||||
end do
|
||||
end program main
|
Loading…
Reference in New Issue
Block a user