[nvptx, libgomp] Fix cuMemAlloc with size zero
Consider test-case: ... int main (void) { #pragma acc parallel async ; #pragma acc parallel async ; #pragma acc wait return 0; } ... This fails with: ... libgomp: cuMemAlloc error: invalid argument Segmentation fault (core dumped) ... The cuMemAlloc error is due to the fact that we're try to allocate 0 bytes. Fix this by preventing calling map_push with size zero argument in nvptx_exec. This also has the consequence that for the abort-1.c test-case, we end up calling cuMemFree during map_fini for the struct cuda_map allocated in map_init, which fails because an abort happened. Fix this by calling cuMemFree with CUDA_CALL_NOCHECK in cuda_map_destroy. 2019-01-23 Tom de Vries <tdevries@suse.de> PR target/PR88946 * plugin/plugin-nvptx.c (cuda_map_destroy): Use CUDA_CALL_NOCHECK for cuMemFree. (nvptx_exec): Don't call map_push if mapnum == 0. * testsuite/libgomp.oacc-c-c++-common/pr88946.c: New test. From-SVN: r268178
This commit is contained in:
parent
4fef8e4d8c
commit
4a75460b00
|
@ -1,3 +1,11 @@
|
|||
2019-01-23 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
PR target/PR88946
|
||||
* plugin/plugin-nvptx.c (cuda_map_destroy): Use CUDA_CALL_NOCHECK for
|
||||
cuMemFree.
|
||||
(nvptx_exec): Don't call map_push if mapnum == 0.
|
||||
* testsuite/libgomp.oacc-c-c++-common/pr88946.c: New test.
|
||||
|
||||
2019-01-23 Tom de Vries <tdevries@suse.de>
|
||||
|
||||
PR target/88941
|
||||
|
|
|
@ -260,7 +260,7 @@ cuda_map_destroy (struct cuda_map *map)
|
|||
atexit handler (PR83795). */
|
||||
;
|
||||
else
|
||||
CUDA_CALL_ASSERT (cuMemFree, map->d);
|
||||
CUDA_CALL_NOCHECK (cuMemFree, map->d);
|
||||
|
||||
free (map);
|
||||
}
|
||||
|
@ -1164,7 +1164,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
|||
struct ptx_stream *dev_str;
|
||||
void *kargs[1];
|
||||
void *hp;
|
||||
CUdeviceptr dp;
|
||||
CUdeviceptr dp = 0;
|
||||
struct nvptx_thread *nvthd = nvptx_thread ();
|
||||
int warp_size = nvthd->ptx_dev->warp_size;
|
||||
const char *maybe_abort_msg = "(perhaps abort was called)";
|
||||
|
@ -1361,23 +1361,27 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
|||
dims[GOMP_DIM_VECTOR]);
|
||||
}
|
||||
|
||||
/* This reserves a chunk of a pre-allocated page of memory mapped on both
|
||||
the host and the device. HP is a host pointer to the new chunk, and DP is
|
||||
the corresponding device pointer. */
|
||||
pthread_mutex_lock (&ptx_event_lock);
|
||||
dp = map_push (dev_str, mapnum * sizeof (void *));
|
||||
pthread_mutex_unlock (&ptx_event_lock);
|
||||
if (mapnum > 0)
|
||||
{
|
||||
/* This reserves a chunk of a pre-allocated page of memory mapped on both
|
||||
the host and the device. HP is a host pointer to the new chunk, and DP is
|
||||
the corresponding device pointer. */
|
||||
pthread_mutex_lock (&ptx_event_lock);
|
||||
dp = map_push (dev_str, mapnum * sizeof (void *));
|
||||
pthread_mutex_unlock (&ptx_event_lock);
|
||||
|
||||
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
||||
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
||||
|
||||
/* Copy the array of arguments to the mapped page. */
|
||||
hp = alloca(sizeof(void *) * mapnum);
|
||||
for (i = 0; i < mapnum; i++)
|
||||
((void **) hp)[i] = devaddrs[i];
|
||||
/* Copy the array of arguments to the mapped page. */
|
||||
hp = alloca(sizeof(void *) * mapnum);
|
||||
for (i = 0; i < mapnum; i++)
|
||||
((void **) hp)[i] = devaddrs[i];
|
||||
|
||||
/* Copy the (device) pointers to arguments to the device */
|
||||
CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp,
|
||||
mapnum * sizeof (void *));
|
||||
}
|
||||
|
||||
/* Copy the (device) pointers to arguments to the device */
|
||||
CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp,
|
||||
mapnum * sizeof (void *));
|
||||
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
|
||||
" gangs=%u, workers=%u, vectors=%u\n",
|
||||
__FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG],
|
||||
|
@ -1422,7 +1426,8 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
|||
|
||||
CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream);
|
||||
|
||||
event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
|
||||
if (mapnum > 0)
|
||||
event_add (PTX_EVT_KNL, e, (void *)dev_str, 0);
|
||||
}
|
||||
#else
|
||||
r = CUDA_CALL_NOCHECK (cuCtxSynchronize, );
|
||||
|
@ -1439,7 +1444,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
|
|||
#ifndef DISABLE_ASYNC
|
||||
if (async < acc_async_noval)
|
||||
#endif
|
||||
map_pop (dev_str);
|
||||
{
|
||||
if (mapnum > 0)
|
||||
map_pop (dev_str);
|
||||
}
|
||||
}
|
||||
|
||||
void * openacc_get_current_cuda_context (void);
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
#pragma acc parallel async
|
||||
;
|
||||
|
||||
#pragma acc parallel async
|
||||
;
|
||||
|
||||
#pragma acc wait
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue