plugin-nvptx.c (struct targ_fn_descriptor): Move later.

* plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later.
	(struct ptx_image_data): Move earlier, add fns field.
	(struct ptx_device): Add images and image_lock fields.
	(ptx_images, ptx_image_lock): Delete.
	(nvptx_open_device): Initialize images and image_lock fields.
	(nvptx_close_device): Destroy image_lock.
	(GOMP_OFFLOAD_load_image): Register image to device-specific fields.
	(GOMP_OFFLOAD_unload_image): Unregister image from device-specific
	fields.

From-SVN: r226004
This commit is contained in:
Nathan Sidwell 2015-07-20 16:17:57 +00:00 committed by Nathan Sidwell
parent dd5bc4becd
commit f3e9a059a7
2 changed files with 87 additions and 66 deletions

View File

@ -1,3 +1,15 @@
2015-07-20 Nathan Sidwell <nathan@codesourcery.com>
* plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later.
(struct ptx_image_data): Move earlier, add fns field.
(struct ptx_device): Add images and image_lock fields.
(ptx_images, ptx_image_lock): Delete.
(nvptx_open_device): Initialize images and image_lock fields.
(nvptx_close_device): Destroy image_lock.
(GOMP_OFFLOAD_load_image): Register image to device-specific fields.
(GOMP_OFFLOAD_unload_image): Unregister image from device-specific
fields.
2015-07-17 Nathan Sidwell <nathan@codesourcery.com>
* target.c (GOMP_offload_register): Use int for device type arg.

View File

@ -127,12 +127,6 @@ cuda_error (CUresult r)
return &errmsg[0];
}
struct targ_fn_descriptor
{
CUfunction fn;
const char *name;
};
static unsigned int instantiated_devices = 0;
static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
@ -288,6 +282,25 @@ map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
return;
}
/* Descriptor of a loaded function. */
struct targ_fn_descriptor
{
CUfunction fn;
const char *name;
};
/* A loaded PTX image. */
struct ptx_image_data
{
const void *target_data;
CUmodule module;
struct targ_fn_descriptor *fns; /* Array of functions. */
struct ptx_image_data *next;
};
struct ptx_device
{
CUcontext ctx;
@ -311,6 +324,9 @@ struct ptx_device
int mode;
bool mkern;
struct ptx_image_data *images; /* Images loaded on device. */
pthread_mutex_t image_lock; /* Lock for above list. */
struct ptx_device *next;
};
@ -332,21 +348,11 @@ struct ptx_event
struct ptx_event *next;
};
struct ptx_image_data
{
const void *target_data;
CUmodule module;
struct ptx_image_data *next;
};
static pthread_mutex_t ptx_event_lock;
static struct ptx_event *ptx_events;
static struct ptx_device **ptx_devices;
static struct ptx_image_data *ptx_images = NULL;
static pthread_mutex_t ptx_image_lock = PTHREAD_MUTEX_INITIALIZER;
#define _XSTR(s) _STR(s)
#define _STR(s) #s
@ -590,6 +596,7 @@ select_stream_for_async (int async, pthread_t thread, bool create,
/* Initialize the device. Return TRUE on success, else FALSE. PTX_DEV_LOCK
should be locked on entry and remains locked on exit. */
static bool
nvptx_init (void)
{
@ -746,6 +753,9 @@ nvptx_open_device (int n)
if (r != CUDA_SUCCESS)
async_engines = 1;
ptx_dev->images = NULL;
pthread_mutex_init (&ptx_dev->image_lock, NULL);
init_streams_for_device (ptx_dev, async_engines);
return ptx_dev;
@ -760,6 +770,8 @@ nvptx_close_device (struct ptx_device *ptx_dev)
return;
fini_streams_for_device (ptx_dev);
pthread_mutex_destroy (&ptx_dev->image_lock);
if (!ptx_dev->ctx_shared)
{
@ -1632,6 +1644,9 @@ typedef struct nvptx_tdata
size_t fn_num;
} nvptx_tdata_t;
/* Load the (partial) program described by TARGET_DATA to device
number ORD. Allocate and return TARGET_TABLE. */
int
GOMP_OFFLOAD_load_image (int ord, const void *target_data,
struct addr_pair **target_table)
@ -1641,23 +1656,19 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
unsigned int fn_entries, var_entries, i, j;
CUresult r;
struct targ_fn_descriptor *targ_fns;
struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
struct ptx_image_data *new_image;
struct ptx_device *dev;
GOMP_OFFLOAD_init_device (ord);
dev = ptx_devices[ord];
nvptx_attach_host_thread_to_device (ord);
link_ptx (&module, img_header->ptx_src);
pthread_mutex_lock (&ptx_image_lock);
new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
new_image->target_data = target_data;
new_image->module = module;
new_image->next = ptx_images;
ptx_images = new_image;
pthread_mutex_unlock (&ptx_image_lock);
/* The mkoffload utility emits a struct of pointers/integers at the
start of each offload image. The array of kernel names and the
functions addresses form a one-to-one correspondence. */
@ -1667,12 +1678,24 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
fn_entries = img_header->fn_num;
fn_names = img_header->fn_names;
*target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
* (fn_entries + var_entries));
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
* (fn_entries + var_entries));
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
* fn_entries);
for (i = 0; i < fn_entries; i++)
*target_table = targ_tbl;
new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
new_image->target_data = target_data;
new_image->module = module;
new_image->fns = targ_fns;
pthread_mutex_lock (&dev->image_lock);
new_image->next = dev->images;
dev->images = new_image;
pthread_mutex_unlock (&dev->image_lock);
for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++)
{
CUfunction function;
@ -1680,14 +1703,14 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
targ_fns[i].fn = function;
targ_fns[i].name = (const char *) fn_names[i];
targ_fns->fn = function;
targ_fns->name = (const char *) fn_names[i];
(*target_table)[i].start = (uintptr_t) &targ_fns[i];
(*target_table)[i].end = (*target_table)[i].start + 1;
targ_tbl->start = (uintptr_t) targ_fns;
targ_tbl->end = targ_tbl->start + 1;
}
for (j = 0; j < var_entries; j++, i++)
for (j = 0; j < var_entries; j++, targ_tbl++)
{
CUdeviceptr var;
size_t bytes;
@ -1696,47 +1719,33 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
(*target_table)[i].start = (uintptr_t) var;
(*target_table)[i].end = (*target_table)[i].start + bytes;
targ_tbl->start = (uintptr_t) var;
targ_tbl->end = targ_tbl->start + bytes;
}
return i;
return fn_entries + var_entries;
}
/* Unload the program described by TARGET_DATA. DEV_DATA is the
function descriptors allocated by G_O_load_image. */
void
GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)),
const void *target_data)
GOMP_OFFLOAD_unload_image (int ord, const void *target_data)
{
const void *const *img_header = (const void *const *) target_data;
struct targ_fn_descriptor *targ_fns
= (struct targ_fn_descriptor *) img_header[0];
struct ptx_image_data *image, *prev = NULL, *newhd = NULL;
struct ptx_image_data *image, **prev_p;
struct ptx_device *dev = ptx_devices[ord];
free (targ_fns);
pthread_mutex_lock (&ptx_image_lock);
for (image = ptx_images; image != NULL;)
{
struct ptx_image_data *next = image->next;
if (image->target_data == target_data)
{
cuModuleUnload (image->module);
free (image);
if (prev)
prev->next = next;
}
else
{
prev = image;
if (!newhd)
newhd = image;
}
image = next;
}
ptx_images = newhd;
pthread_mutex_unlock (&ptx_image_lock);
pthread_mutex_lock (&dev->image_lock);
for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
if (image->target_data == target_data)
{
*prev_p = image->next;
cuModuleUnload (image->module);
free (image->fns);
free (image);
break;
}
pthread_mutex_unlock (&dev->image_lock);
}
void *