98 lines
2.5 KiB
C
98 lines
2.5 KiB
C
|
/* Test mapping of async values to specific underlying queues. */
|
||
|
|
||
|
#undef NDEBUG
|
||
|
#include <assert.h>
|
||
|
#include <openacc.h>
|
||
|
|
||
|
/* This is implemented in terms of the "acc_get_cuda_stream" interface. */
|
||
|
|
||
|
struct
|
||
|
{
|
||
|
int async;
|
||
|
void *cuda_stream;
|
||
|
} queues[] = { { acc_async_sync, NULL },
|
||
|
{ acc_async_noval, NULL },
|
||
|
{ 0, NULL },
|
||
|
{ 1, NULL },
|
||
|
{ 2, NULL },
|
||
|
{ 36, NULL },
|
||
|
{ 1982, NULL } };
|
||
|
const size_t queues_n = sizeof queues / sizeof queues[0];
|
||
|
|
||
|
int main(void)
|
||
|
{
|
||
|
/* Explicitly initialize: it's not clear whether the following OpenACC
|
||
|
runtime library calls implicitly initialize;
|
||
|
<https://github.com/OpenACC/openacc-spec/issues/102>. */
|
||
|
acc_device_t d;
|
||
|
#if defined ACC_DEVICE_TYPE_nvidia
|
||
|
d = acc_device_nvidia;
|
||
|
#elif defined ACC_DEVICE_TYPE_host
|
||
|
d = acc_device_host;
|
||
|
#else
|
||
|
# error Not ported to this ACC_DEVICE_TYPE
|
||
|
#endif
|
||
|
acc_init (d);
|
||
|
|
||
|
for (size_t i = 0; i < queues_n; ++i)
|
||
|
{
|
||
|
/* Before actually being used, there are all NULL. */
|
||
|
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||
|
assert (queues[i].cuda_stream == NULL);
|
||
|
}
|
||
|
|
||
|
for (size_t i = 0; i < queues_n; ++i)
|
||
|
{
|
||
|
/* Use the queue to initialize it. */
|
||
|
#pragma acc parallel async(queues[i].async)
|
||
|
;
|
||
|
#pragma acc wait
|
||
|
|
||
|
/* Verify CUDA stream used. */
|
||
|
queues[i].cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||
|
#if defined ACC_DEVICE_TYPE_nvidia
|
||
|
/* "acc_async_sync" maps to the NULL CUDA default stream. */
|
||
|
if (queues[i].async == acc_async_sync)
|
||
|
assert (queues[i].cuda_stream == NULL);
|
||
|
else
|
||
|
assert (queues[i].cuda_stream != NULL);
|
||
|
#elif defined ACC_DEVICE_TYPE_host
|
||
|
/* For "acc_device_host" there are no CUDA streams. */
|
||
|
assert (queues[i].cuda_stream == NULL);
|
||
|
#else
|
||
|
# error Not ported to this ACC_DEVICE_TYPE
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/* Verify same results. */
|
||
|
for (size_t i = 0; i < queues_n; ++i)
|
||
|
{
|
||
|
void *cuda_stream;
|
||
|
|
||
|
cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||
|
assert (cuda_stream == queues[i].cuda_stream);
|
||
|
|
||
|
#pragma acc parallel async(queues[i].async)
|
||
|
;
|
||
|
#pragma acc wait
|
||
|
|
||
|
cuda_stream = acc_get_cuda_stream (queues[i].async);
|
||
|
assert (cuda_stream == queues[i].cuda_stream);
|
||
|
}
|
||
|
|
||
|
/* Verify individual underlying queues are all different. */
|
||
|
for (size_t i = 0; i < queues_n; ++i)
|
||
|
{
|
||
|
if (queues[i].cuda_stream == NULL)
|
||
|
continue;
|
||
|
for (size_t j = i + 1; j < queues_n; ++j)
|
||
|
{
|
||
|
if (queues[j].cuda_stream == NULL)
|
||
|
continue;
|
||
|
assert (queues[j].cuda_stream != queues[i].cuda_stream);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|