plugin-nvptx.c (struct targ_fn_descriptor): Move later.
* plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later. (struct ptx_image_data): Move earlier, add fns field. (struct ptx_device): Add images and image_lock fields. (ptx_images, ptx_image_lock): Delete. (nvptx_open_device): Initialize images and image_lock fields. (nvptx_close_device): Destroy image_lock. (GOMP_OFFLOAD_load_image): Register image to device-specific fields. (GOMP_OFFLOAD_unload_image): Unregister image from device-specific fields. From-SVN: r226004
This commit is contained in:
parent
dd5bc4becd
commit
f3e9a059a7
@ -1,3 +1,15 @@
|
||||
2015-07-20 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later.
|
||||
(struct ptx_image_data): Move earlier, add fns field.
|
||||
(struct ptx_device): Add images and image_lock fields.
|
||||
(ptx_images, ptx_image_lock): Delete.
|
||||
(nvptx_open_device): Initialize images and image_lock fields.
|
||||
(nvptx_close_device): Destroy image_lock.
|
||||
(GOMP_OFFLOAD_load_image): Register image to device-specific fields.
|
||||
(GOMP_OFFLOAD_unload_image): Unregister image from device-specific
|
||||
fields.
|
||||
|
||||
2015-07-17 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* target.c (GOMP_offload_register): Use int for device type arg.
|
||||
|
@ -127,12 +127,6 @@ cuda_error (CUresult r)
|
||||
return &errmsg[0];
|
||||
}
|
||||
|
||||
struct targ_fn_descriptor
|
||||
{
|
||||
CUfunction fn;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
static unsigned int instantiated_devices = 0;
|
||||
static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
@ -288,6 +282,25 @@ map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Descriptor of a loaded function. */
|
||||
|
||||
struct targ_fn_descriptor
|
||||
{
|
||||
CUfunction fn;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
/* A loaded PTX image. */
|
||||
struct ptx_image_data
|
||||
{
|
||||
const void *target_data;
|
||||
CUmodule module;
|
||||
|
||||
struct targ_fn_descriptor *fns; /* Array of functions. */
|
||||
|
||||
struct ptx_image_data *next;
|
||||
};
|
||||
|
||||
struct ptx_device
|
||||
{
|
||||
CUcontext ctx;
|
||||
@ -311,6 +324,9 @@ struct ptx_device
|
||||
int mode;
|
||||
bool mkern;
|
||||
|
||||
struct ptx_image_data *images; /* Images loaded on device. */
|
||||
pthread_mutex_t image_lock; /* Lock for above list. */
|
||||
|
||||
struct ptx_device *next;
|
||||
};
|
||||
|
||||
@ -332,21 +348,11 @@ struct ptx_event
|
||||
struct ptx_event *next;
|
||||
};
|
||||
|
||||
struct ptx_image_data
|
||||
{
|
||||
const void *target_data;
|
||||
CUmodule module;
|
||||
struct ptx_image_data *next;
|
||||
};
|
||||
|
||||
static pthread_mutex_t ptx_event_lock;
|
||||
static struct ptx_event *ptx_events;
|
||||
|
||||
static struct ptx_device **ptx_devices;
|
||||
|
||||
static struct ptx_image_data *ptx_images = NULL;
|
||||
static pthread_mutex_t ptx_image_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
#define _XSTR(s) _STR(s)
|
||||
#define _STR(s) #s
|
||||
|
||||
@ -590,6 +596,7 @@ select_stream_for_async (int async, pthread_t thread, bool create,
|
||||
|
||||
/* Initialize the device. Return TRUE on success, else FALSE. PTX_DEV_LOCK
|
||||
should be locked on entry and remains locked on exit. */
|
||||
|
||||
static bool
|
||||
nvptx_init (void)
|
||||
{
|
||||
@ -746,6 +753,9 @@ nvptx_open_device (int n)
|
||||
if (r != CUDA_SUCCESS)
|
||||
async_engines = 1;
|
||||
|
||||
ptx_dev->images = NULL;
|
||||
pthread_mutex_init (&ptx_dev->image_lock, NULL);
|
||||
|
||||
init_streams_for_device (ptx_dev, async_engines);
|
||||
|
||||
return ptx_dev;
|
||||
@ -760,6 +770,8 @@ nvptx_close_device (struct ptx_device *ptx_dev)
|
||||
return;
|
||||
|
||||
fini_streams_for_device (ptx_dev);
|
||||
|
||||
pthread_mutex_destroy (&ptx_dev->image_lock);
|
||||
|
||||
if (!ptx_dev->ctx_shared)
|
||||
{
|
||||
@ -1632,6 +1644,9 @@ typedef struct nvptx_tdata
|
||||
size_t fn_num;
|
||||
} nvptx_tdata_t;
|
||||
|
||||
/* Load the (partial) program described by TARGET_DATA to device
|
||||
number ORD. Allocate and return TARGET_TABLE. */
|
||||
|
||||
int
|
||||
GOMP_OFFLOAD_load_image (int ord, const void *target_data,
|
||||
struct addr_pair **target_table)
|
||||
@ -1641,23 +1656,19 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
|
||||
unsigned int fn_entries, var_entries, i, j;
|
||||
CUresult r;
|
||||
struct targ_fn_descriptor *targ_fns;
|
||||
struct addr_pair *targ_tbl;
|
||||
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
|
||||
struct ptx_image_data *new_image;
|
||||
struct ptx_device *dev;
|
||||
|
||||
GOMP_OFFLOAD_init_device (ord);
|
||||
|
||||
dev = ptx_devices[ord];
|
||||
|
||||
nvptx_attach_host_thread_to_device (ord);
|
||||
|
||||
link_ptx (&module, img_header->ptx_src);
|
||||
|
||||
pthread_mutex_lock (&ptx_image_lock);
|
||||
new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
|
||||
new_image->target_data = target_data;
|
||||
new_image->module = module;
|
||||
new_image->next = ptx_images;
|
||||
ptx_images = new_image;
|
||||
pthread_mutex_unlock (&ptx_image_lock);
|
||||
|
||||
/* The mkoffload utility emits a struct of pointers/integers at the
|
||||
start of each offload image. The array of kernel names and the
|
||||
functions addresses form a one-to-one correspondence. */
|
||||
@ -1667,12 +1678,24 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
|
||||
fn_entries = img_header->fn_num;
|
||||
fn_names = img_header->fn_names;
|
||||
|
||||
*target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
|
||||
* (fn_entries + var_entries));
|
||||
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
|
||||
* (fn_entries + var_entries));
|
||||
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
|
||||
* fn_entries);
|
||||
|
||||
for (i = 0; i < fn_entries; i++)
|
||||
*target_table = targ_tbl;
|
||||
|
||||
new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
|
||||
new_image->target_data = target_data;
|
||||
new_image->module = module;
|
||||
new_image->fns = targ_fns;
|
||||
|
||||
pthread_mutex_lock (&dev->image_lock);
|
||||
new_image->next = dev->images;
|
||||
dev->images = new_image;
|
||||
pthread_mutex_unlock (&dev->image_lock);
|
||||
|
||||
for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++)
|
||||
{
|
||||
CUfunction function;
|
||||
|
||||
@ -1680,14 +1703,14 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
|
||||
|
||||
targ_fns[i].fn = function;
|
||||
targ_fns[i].name = (const char *) fn_names[i];
|
||||
targ_fns->fn = function;
|
||||
targ_fns->name = (const char *) fn_names[i];
|
||||
|
||||
(*target_table)[i].start = (uintptr_t) &targ_fns[i];
|
||||
(*target_table)[i].end = (*target_table)[i].start + 1;
|
||||
targ_tbl->start = (uintptr_t) targ_fns;
|
||||
targ_tbl->end = targ_tbl->start + 1;
|
||||
}
|
||||
|
||||
for (j = 0; j < var_entries; j++, i++)
|
||||
for (j = 0; j < var_entries; j++, targ_tbl++)
|
||||
{
|
||||
CUdeviceptr var;
|
||||
size_t bytes;
|
||||
@ -1696,47 +1719,33 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
|
||||
if (r != CUDA_SUCCESS)
|
||||
GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
|
||||
|
||||
(*target_table)[i].start = (uintptr_t) var;
|
||||
(*target_table)[i].end = (*target_table)[i].start + bytes;
|
||||
targ_tbl->start = (uintptr_t) var;
|
||||
targ_tbl->end = targ_tbl->start + bytes;
|
||||
}
|
||||
|
||||
return i;
|
||||
return fn_entries + var_entries;
|
||||
}
|
||||
|
||||
/* Unload the program described by TARGET_DATA. DEV_DATA is the
|
||||
function descriptors allocated by G_O_load_image. */
|
||||
|
||||
void
|
||||
GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)),
|
||||
const void *target_data)
|
||||
GOMP_OFFLOAD_unload_image (int ord, const void *target_data)
|
||||
{
|
||||
const void *const *img_header = (const void *const *) target_data;
|
||||
struct targ_fn_descriptor *targ_fns
|
||||
= (struct targ_fn_descriptor *) img_header[0];
|
||||
struct ptx_image_data *image, *prev = NULL, *newhd = NULL;
|
||||
struct ptx_image_data *image, **prev_p;
|
||||
struct ptx_device *dev = ptx_devices[ord];
|
||||
|
||||
free (targ_fns);
|
||||
|
||||
pthread_mutex_lock (&ptx_image_lock);
|
||||
for (image = ptx_images; image != NULL;)
|
||||
{
|
||||
struct ptx_image_data *next = image->next;
|
||||
|
||||
if (image->target_data == target_data)
|
||||
{
|
||||
cuModuleUnload (image->module);
|
||||
free (image);
|
||||
if (prev)
|
||||
prev->next = next;
|
||||
}
|
||||
else
|
||||
{
|
||||
prev = image;
|
||||
if (!newhd)
|
||||
newhd = image;
|
||||
}
|
||||
|
||||
image = next;
|
||||
}
|
||||
ptx_images = newhd;
|
||||
pthread_mutex_unlock (&ptx_image_lock);
|
||||
pthread_mutex_lock (&dev->image_lock);
|
||||
for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
|
||||
if (image->target_data == target_data)
|
||||
{
|
||||
*prev_p = image->next;
|
||||
cuModuleUnload (image->module);
|
||||
free (image->fns);
|
||||
free (image);
|
||||
break;
|
||||
}
|
||||
pthread_mutex_unlock (&dev->image_lock);
|
||||
}
|
||||
|
||||
void *
|
||||
|
Loading…
Reference in New Issue
Block a user