Thomas Schwinge 1404af62dc [PR88407] [OpenACC] Correctly handle unseen async-arguments
... which turn the operation into a no-op.

	libgomp/
	PR libgomp/88407
	* plugin/plugin-nvptx.c (nvptx_async_test, nvptx_wait)
	(nvptx_wait_async): Unseen async-argument is a no-op.
	* testsuite/libgomp.oacc-c-c++-common/async_queue-1.c: Update.
	* testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
	* testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-71.c: Merge into...
	* testsuite/libgomp.oacc-c-c++-common/lib-69.c: ... this.  Update.
	* testsuite/libgomp.oacc-c-c++-common/lib-77.c: Merge into...
	* testsuite/libgomp.oacc-c-c++-common/lib-74.c: ... this.  Update

From-SVN: r267150
2018-12-14 21:42:40 +01:00

128 lines
3.2 KiB
C

/* Test mapping of async values to specific underlying queues. */
#undef NDEBUG
#include <assert.h>
#include <openacc.h>
/* This is implemented in terms of the "acc_get_cuda_stream" interface. */
struct
{
int async;
void *cuda_stream;
} queues[] = { { acc_async_sync, NULL },
{ acc_async_noval, NULL },
{ 0, NULL },
{ 1, NULL },
{ 2, NULL },
{ 36, NULL },
{ 1982, NULL } };
const size_t queues_n = sizeof queues / sizeof queues[0];
int main(void)
{
/* Explicitly initialize: it's not clear whether the following OpenACC
runtime library calls implicitly initialize;
<https://github.com/OpenACC/openacc-spec/issues/102>. */
acc_device_t d;
#if defined ACC_DEVICE_TYPE_nvidia
d = acc_device_nvidia;
#elif defined ACC_DEVICE_TYPE_host
d = acc_device_host;
#else
# error Not ported to this ACC_DEVICE_TYPE
#endif
acc_init (d);
for (size_t i = 0; i < queues_n; ++i)
{
/* Before actually being used, there are all NULL. */
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
assert (queues[i].cuda_stream == NULL);
}
/* No-ops still don't initialize them. */
{
size_t i = 0;
/* Find the first non-special async-argument. */
while (queues[i].async < 0)
++i;
assert (i < queues_n);
#pragma acc wait(queues[i].async) // no-op
++i;
assert (i < queues_n);
#pragma acc parallel wait(queues[i].async) // no-op
;
++i;
assert (i < queues_n);
acc_wait(queues[i].async); // no-op
i += 2;
assert (i < queues_n);
acc_wait_async(queues[i - 1].async, queues[i].async); // no-op, and async queue "i" does not get set up
for (size_t i = 0; i < queues_n; ++i)
{
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
assert (queues[i].cuda_stream == NULL);
}
}
for (size_t i = 0; i < queues_n; ++i)
{
/* Use the queue to initialize it. */
#pragma acc parallel async(queues[i].async)
;
#pragma acc wait
/* Verify CUDA stream used. */
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
#if defined ACC_DEVICE_TYPE_nvidia
/* "acc_async_sync" maps to the NULL CUDA default stream. */
if (queues[i].async == acc_async_sync)
assert (queues[i].cuda_stream == NULL);
else
assert (queues[i].cuda_stream != NULL);
#elif defined ACC_DEVICE_TYPE_host
/* For "acc_device_host" there are no CUDA streams. */
assert (queues[i].cuda_stream == NULL);
#else
# error Not ported to this ACC_DEVICE_TYPE
#endif
}
/* Verify same results. */
for (size_t i = 0; i < queues_n; ++i)
{
void *cuda_stream;
cuda_stream = acc_get_cuda_stream (queues[i].async);
assert (cuda_stream == queues[i].cuda_stream);
#pragma acc parallel async(queues[i].async)
;
#pragma acc wait
cuda_stream = acc_get_cuda_stream (queues[i].async);
assert (cuda_stream == queues[i].cuda_stream);
}
/* Verify individual underlying queues are all different. */
for (size_t i = 0; i < queues_n; ++i)
{
if (queues[i].cuda_stream == NULL)
continue;
for (size_t j = i + 1; j < queues_n; ++j)
{
if (queues[j].cuda_stream == NULL)
continue;
assert (queues[j].cuda_stream != queues[i].cuda_stream);
}
}
return 0;
}