6c420193e8
This patch adds two new OpenMP runtime routines: omp_target_memcpy_async and omp_target_memcpy_rect_async. Both functions are introduced in OpenMP 5.1 as asynchronous variants of omp_target_memcpy and omp_target_memcpy_rect. In contrast to the synchronous variants, the asynchronous functions have two additional function parameters to allow the specification of task dependences: int depobj_count omp_depend_t *depobj_list integer(c_int), value :: depobj_count integer(omp_depend_kind), optional :: depobj_list(*) The implementation splits the synchronous functions into two parts: (a) check and (b) copy. Then (a) is used in the asynchronous functions for the sequential part, and the actual copy process (b) is executed in a new created task. The sequential part (a) takes into account the requirements for the return values: "The routine returns zero if successful. Otherwise, it returns a non-zero value." (omp_target_memcpy_async, OpenMP 5.1 spec, section 3.8.7) "An application can determine the number of inclusive dimensions supported by an implementation by passing NULL pointers (or C_NULL_PTR, for Fortran) for both dst and src. The routine returns the number of dimensions supported by the implementation for the specified device numbers. No copy operation is performed." (omp_target_memcpy_rect_async, OpenMP 5.1 spec, section 3.8.8) Due to asynchronicity an error is thrown if the asynchronous memcpy is not successful (in contrast to the synchronous functions which use a return value unequal to zero). gcc/ChangeLog: * omp-low.cc (omp_runtime_api_call): Added target_memcpy_async and target_memcpy_rect_async to omp_runtime_apis array. libgomp/ChangeLog: * libgomp.map: Added omp_target_memcpy_async and omp_target_memcpy_rect_async. * libgomp.texi: Both functions are now supported. * omp.h.in: Added omp_target_memcpy_async and omp_target_memcpy_rect_async. * omp_lib.f90.in: Added interfaces for both new functions. * omp_lib.h.in: Likewise. * target.c (ialias_redirect): Added for GOMP_task. (omp_target_memcpy): Restructured into check and copy part. (omp_target_memcpy_check): New helper function for omp_target_memcpy and omp_target_memcpy_async that checks requirements. (omp_target_memcpy_copy): New helper function for omp_target_memcpy and omp_target_memcpy_async that performs the memcpy. (omp_target_memcpy_async_helper): New helper function that is used in omp_target_memcpy_async for the asynchronous task. (omp_target_memcpy_async): Added. (omp_target_memcpy_rect): Restructured into check and copy part. (omp_target_memcpy_rect_check): New helper function for omp_target_memcpy_rect and omp_target_memcpy_rect_async that checks requirements. (omp_target_memcpy_rect_copy): New helper function for omp_target_memcpy_rect and omp_target_memcpy_rect_async that performs the memcpy. (omp_target_memcpy_rect_async_helper): New helper function that is used in omp_target_memcpy_rect_async for the asynchronous task. (omp_target_memcpy_rect_async): Added. * task.c (ialias): Added for GOMP_task. * testsuite/libgomp.c-c++-common/target-memcpy-async-1.c: New test. * testsuite/libgomp.c-c++-common/target-memcpy-async-2.c: New test. * testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c: New test. * testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c: New test. * testsuite/libgomp.fortran/target-memcpy-async-1.f90: New test. * testsuite/libgomp.fortran/target-memcpy-async-2.f90: New test. * testsuite/libgomp.fortran/target-memcpy-rect-async-1.f90: New test. * testsuite/libgomp.fortran/target-memcpy-rect-async-2.f90: New test.
92 lines
2.2 KiB
C
92 lines
2.2 KiB
C
/* Test for omp_target_memcpy_rect_async considering dependence objects. */
|
|
|
|
#include <omp.h>
|
|
#include <stdlib.h>
|
|
|
|
#define NUM_DIMS 3
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int d = omp_get_default_device ();
|
|
int id = omp_get_initial_device ();
|
|
int a[128], b[64], c[128], e[16], q[128], i;
|
|
void *p;
|
|
|
|
if (d < 0 || d >= omp_get_num_devices ())
|
|
d = id;
|
|
|
|
p = omp_target_alloc (130 * sizeof (int), d);
|
|
if (p == NULL)
|
|
return 0;
|
|
|
|
for (i = 0; i < 128; i++)
|
|
q[i] = 0;
|
|
if (omp_target_memcpy (p, q, 128 * sizeof (int), 0, 0, d, id) != 0)
|
|
abort ();
|
|
|
|
size_t volume[NUM_DIMS] = { 2, 2, 3 };
|
|
size_t dst_offsets[NUM_DIMS] = { 0, 0, 0 };
|
|
size_t src_offsets[NUM_DIMS] = { 0, 0, 0 };
|
|
size_t dst_dimensions[NUM_DIMS] = { 3, 4, 5 };
|
|
size_t src_dimensions[NUM_DIMS] = { 2, 3, 4 };
|
|
|
|
for (i = 0; i < 128; i++)
|
|
a[i] = 42;
|
|
for (i = 0; i < 64; i++)
|
|
b[i] = 24;
|
|
for (i = 0; i < 128; i++)
|
|
c[i] = 0;
|
|
for (i = 0; i < 16; i++)
|
|
e[i] = 77;
|
|
|
|
omp_depend_t obj[2];
|
|
|
|
#pragma omp parallel num_threads(5)
|
|
#pragma omp single
|
|
{
|
|
#pragma omp task depend (out: p)
|
|
omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id);
|
|
|
|
#pragma omp task depend(inout: p)
|
|
omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id);
|
|
|
|
#pragma omp task depend(out: c)
|
|
for (i = 0; i < 128; i++)
|
|
c[i] = i + 1;
|
|
|
|
#pragma omp depobj(obj[0]) depend(inout: p)
|
|
#pragma omp depobj(obj[1]) depend(in: c)
|
|
|
|
/* This produces: 1 2 3 - - 5 6 7 - - at positions 0..9 and
|
|
13 14 15 - - 17 18 19 - - at positions 20..29. */
|
|
omp_target_memcpy_rect_async (p, c, sizeof (int), NUM_DIMS, volume,
|
|
dst_offsets, src_offsets, dst_dimensions,
|
|
src_dimensions, d, id, 2, obj);
|
|
|
|
#pragma omp task depend(in: p)
|
|
omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id);
|
|
}
|
|
|
|
#pragma omp taskwait
|
|
|
|
if (omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d) != 0)
|
|
abort ();
|
|
|
|
for (i = 0; i < 16; ++i)
|
|
if (q[i] != 77)
|
|
abort ();
|
|
if (q[20] != 13 || q[21] != 14 || q[22] != 15 || q[25] != 17 || q[26] != 18
|
|
|| q[27] != 19)
|
|
abort ();
|
|
for (i = 28; i < 64; ++i)
|
|
if (q[i] != 24)
|
|
abort ();
|
|
for (i = 64; i < 128; ++i)
|
|
if (q[i] != 42)
|
|
abort ();
|
|
|
|
omp_target_free (p, d);
|
|
return 0;
|
|
}
|