[libgomp, nvptx] Fall back to cuLinkAddData/cuLinkCreate if _v2 not found

Cuda driver api functions cuLinkAddData and cuLinkCreate are available starting
version 5.5.  In version 6.5, they are remapped onto _v2 versions.

The dlopen interface of the libgomp nvptx plugin uses the _v2 versions, so it
won't work with a cuda driver with driver api version lower than 6.5.

This patch fixes the problem by testing for the presence of the _v2 versions,
and falling back to the original versions in case of absence of the _v2
versions.

Build on x86_64 with nvptx accelerator and reg-tested libgomp, both with and
without --without-cuda-driver.

2018-08-08  Tom de Vries  <tdevries@suse.de>

	* plugin/cuda-lib.def (cuLinkAddData_v2, cuLinkCreate_v2): Declare using
	CUDA_ONE_CALL_MAYBE_NULL.
	* plugin/plugin-nvptx.c (cuLinkAddData, cuLinkCreate): Undef and declare.
	(cuLinkAddData_v2, cuLinkCreate_v2): Declare.
	(link_ptx): Fall back to cuLinkAddData/cuLinkCreate if the _v2 versions
	are not found.

From-SVN: r263408
This commit is contained in:
Tom de Vries 2018-08-08 14:26:37 +00:00 committed by Tom de Vries
parent cedd9bd016
commit 8e09a12f01
3 changed files with 35 additions and 4 deletions

View File

@ -1,3 +1,12 @@
2018-08-08 Tom de Vries <tdevries@suse.de>
* plugin/cuda-lib.def (cuLinkAddData_v2, cuLinkCreate_v2): Declare using
CUDA_ONE_CALL_MAYBE_NULL.
* plugin/plugin-nvptx.c (cuLinkAddData, cuLinkCreate): Undef and declare.
(cuLinkAddData_v2, cuLinkCreate_v2): Declare.
(link_ptx): Fall back to cuLinkAddData/cuLinkCreate if the _v2 versions
are not found.
2018-08-08 Tom de Vries <tdevries@suse.de>
* plugin/cuda-lib.def (cuGetErrorString): Use CUDA_ONE_CALL_MAYBE_NULL.

View File

@ -19,8 +19,10 @@ CUDA_ONE_CALL_MAYBE_NULL (cuGetErrorString)
CUDA_ONE_CALL (cuInit)
CUDA_ONE_CALL (cuLaunchKernel)
CUDA_ONE_CALL (cuLinkAddData)
CUDA_ONE_CALL_MAYBE_NULL (cuLinkAddData_v2)
CUDA_ONE_CALL (cuLinkComplete)
CUDA_ONE_CALL (cuLinkCreate)
CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2)
CUDA_ONE_CALL (cuLinkDestroy)
CUDA_ONE_CALL (cuMemAlloc)
CUDA_ONE_CALL (cuMemAllocHost)

View File

@ -54,6 +54,18 @@ extern CUresult cuGetErrorString (CUresult, const char **);
#define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 82
#endif
#if CUDA_VERSION >= 6050
#undef cuLinkCreate
#undef cuLinkAddData
CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t,
const char *, unsigned, CUjit_option *, void **);
CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *);
#else
CUresult cuLinkAddData_v2 (CUlinkState, CUjitInputType, void *, size_t,
const char *, unsigned, CUjit_option *, void **);
CUresult cuLinkCreate_v2 (unsigned, CUjit_option *, void **, CUlinkState *);
#endif
#define DO_PRAGMA(x) _Pragma (#x)
#if PLUGIN_NVPTX_DYNAMIC
@ -938,16 +950,24 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
nopts++;
}
CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
if (CUDA_CALL_EXISTS (cuLinkCreate_v2))
CUDA_CALL (cuLinkCreate_v2, nopts, opts, optvals, &linkstate);
else
CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
for (; num_objs--; ptx_objs++)
{
/* cuLinkAddData's 'data' argument erroneously omits the const
qualifier. */
GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code);
r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
(char *) ptx_objs->code, ptx_objs->size,
0, 0, 0, 0);
if (CUDA_CALL_EXISTS (cuLinkAddData_v2))
r = CUDA_CALL_NOCHECK (cuLinkAddData_v2, linkstate, CU_JIT_INPUT_PTX,
(char *) ptx_objs->code, ptx_objs->size,
0, 0, 0, 0);
else
r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX,
(char *) ptx_objs->code, ptx_objs->size,
0, 0, 0, 0);
if (r != CUDA_SUCCESS)
{
GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);