Julian Brown 37d5ad46dd OpenACC host_data support.
gcc/
	* gimple-pretty-print.c (dump_gimple_omp_target): Add host_data
	support.
	* gimple.h (gf_mask): Add GF_OMP_TARGET_KIND_OACC_HOST_DATA.
	(is_gimple_omp_oacc): Add support for above.
	* gimplify.c (omp_region_type): Add ORT_ACC_HOST_DATA.
	(omp_notice_variable): Diagnose undefined implicit uses of
	use_device variables in offloaded regions.
	(gimplify_scan_omp_clauses): Add host_data, use_device
	support. Diagnose undefined mapping of use_device variables in
	OpenACC clauses.
	(gimplify_omp_workshare): Add host_data support.
	(gimplify_expr): Likewise.
	* omp-builtins.def (BUILT_IN_GOACC_HOST_DATA): New.
	* omp-low.c (lookup_decl_in_outer_ctx)
	(maybe_lookup_decl_in_outer_ctx): Add optional argument to skip
	host_data regions.
	(scan_sharing_clauses): Support use_device.
	(check_omp_nesting_restrictions): Support host_data.
	(expand_omp_target): Support host_data.
	(lower_omp_target): Skip over outer host_data regions when looking
	up decls. Support use_device.
	(make_gimple_omp_edges): Support host_data.
	* tree-nested.c (convert_nonlocal_omp_clauses): Add use_device
	clause.

	gcc/c/
	* c-parser.c (c_parser_omp_clause_name): Add use_device support.
	(c_parser_oacc_clause_use_device): New function.
	(c_parser_oacc_all_clauses): Add use_device support.
	(OACC_HOST_DATA_CLAUSE_MASK): New macro.
	(c_parser_oacc_host_data): New function.
	(c_parser_omp_construct): Add host_data support.
	* c-tree.h (c_finish_oacc_host_data): Add prototype.
	* c-typeck.c (c_finish_oacc_host_data): New function.
	(c_finish_omp_clauses): Add use_device support.

	gcc/cp/
	* cp-tree.h (finish_oacc_host_data): Add prototype.
	* parser.c (cp_parser_omp_clause_name): Add use_device support.
	(cp_parser_oacc_all_clauses): Add use_device support.
	(OACC_HOST_DATA_CLAUSE_MASK): New macro.
	(cp_parser_oacc_host_data): New function.
	(cp_parser_omp_construct): Add host_data support.
	(cp_parser_pragma): Add host_data support.
	* semantics.c (finish_omp_clauses): Add use_device support.
	(finish_oacc_host_data): New function.

	gcc/c-family/
	* c-pragma.c (oacc_pragmas): Add PRAGMA_OACC_HOST_DATA.
	* c-pragma.h (pragma_kind): Add PRAGMA_OACC_HOST_DATA.
	(pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_USE_DEVICE.

	libgomp/
	* oacc-parallel.c (GOACC_host_data): New function.
	* libgomp.map (GOACC_host_data): Add to GOACC_2.0.1.
	* testsuite/libgomp.oacc-c-c++-common/host_data-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/host_data-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/host_data-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/host_data-4.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/host_data-5.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/host_data-6.c: New test.


Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
Co-Authored-By: James Norris <James_Norris@mentor.com>

From-SVN: r231118
2015-12-01 15:52:23 +00:00

101 lines
1.7 KiB
C

/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
#include <stdlib.h>
#include <openacc.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cublas_v2.h>
void
saxpy_host (int n, float a, float *x, float *y)
{
int i;
for (i = 0; i < n; i++)
y[i] = y[i] + a * x[i];
}
#pragma acc routine
void
saxpy_target (int n, float a, float *x, float *y)
{
int i;
for (i = 0; i < n; i++)
y[i] = y[i] + a * x[i];
}
int
main(int argc, char **argv)
{
#define N 8
int i;
float x_ref[N], y_ref[N];
float x[N], y[N];
cublasHandle_t h;
float a = 2.0;
for (i = 0; i < N; i++)
{
x[i] = x_ref[i] = 4.0 + i;
y[i] = y_ref[i] = 3.0;
}
saxpy_host (N, a, x_ref, y_ref);
cublasCreate (&h);
#pragma acc data copyin (x[0:N]) copy (y[0:N])
{
#pragma acc host_data use_device (x, y)
{
cublasSaxpy (h, N, &a, x, 1, y, 1);
}
}
for (i = 0; i < N; i++)
{
if (y[i] != y_ref[i])
abort ();
}
#pragma acc data create (x[0:N]) copyout (y[0:N])
{
#pragma acc kernels
for (i = 0; i < N; i++)
y[i] = 3.0;
#pragma acc host_data use_device (x, y)
{
cublasSaxpy (h, N, &a, x, 1, y, 1);
}
}
cublasDestroy (h);
for (i = 0; i < N; i++)
{
if (y[i] != y_ref[i])
abort ();
}
for (i = 0; i < N; i++)
y[i] = 3.0;
/* There's no need to use host_data here. */
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
{
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
saxpy_target (N, a, x, y);
}
for (i = 0; i < N; i++)
{
if (y[i] != y_ref[i])
abort ();
}
return 0;
}