2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>
Reviewed-by: Thomas Schwinge <thomas@codesourcery.com> libgomp/ * libgomp-plugin.h (struct goacc_asyncqueue): Declare. (struct goacc_asyncqueue_list): Likewise. (goacc_aq): Likewise. (goacc_aq_list): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove. (GOMP_OFFLOAD_openacc_async_test): Remove. (GOMP_OFFLOAD_openacc_async_test_all): Remove. (GOMP_OFFLOAD_openacc_async_wait): Remove. (GOMP_OFFLOAD_openacc_async_wait_async): Remove. (GOMP_OFFLOAD_openacc_async_wait_all): Remove. (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove. (GOMP_OFFLOAD_openacc_async_set_async): Remove. (GOMP_OFFLOAD_openacc_exec): Adjust declaration. (GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise. (GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise. (GOMP_OFFLOAD_openacc_async_exec): Declare. (GOMP_OFFLOAD_openacc_async_construct): Declare. (GOMP_OFFLOAD_openacc_async_destruct): Declare. (GOMP_OFFLOAD_openacc_async_test): Declare. (GOMP_OFFLOAD_openacc_async_synchronize): Declare. (GOMP_OFFLOAD_openacc_async_serialize): Declare. (GOMP_OFFLOAD_openacc_async_queue_callback): Declare. (GOMP_OFFLOAD_openacc_async_host2dev): Declare. (GOMP_OFFLOAD_openacc_async_dev2host): Declare. * libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct. (gomp_acc_insert_pointer): Adjust declaration. (gomp_copy_host2dev): New declaration. (gomp_copy_dev2host): Likewise. (gomp_map_vars_async): Likewise. (gomp_unmap_tgt): Likewise. (gomp_unmap_vars_async): Likewise. (gomp_fini_device): Likewise. * oacc-async.c (get_goacc_thread): New function. (get_goacc_thread_device): New function. (lookup_goacc_asyncqueue): New function. (get_goacc_asyncqueue): New function. (acc_async_test): Adjust code to use new async design. (acc_async_test_all): Likewise. (acc_wait): Likewise. (acc_wait_async): Likewise. (acc_wait_all): Likewise. (acc_wait_all_async): Likewise. (goacc_async_free): New function. (goacc_init_asyncqueues): Likewise. (goacc_fini_asyncqueues): Likewise. * oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async design. (acc_set_cuda_stream): Likewise. * oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'. (host_openacc_register_async_cleanup): Remove. (host_openacc_async_exec): New function. (host_openacc_async_test): Adjust parameters. (host_openacc_async_test_all): Remove. (host_openacc_async_wait): Remove. (host_openacc_async_wait_async): Remove. (host_openacc_async_wait_all): Remove. (host_openacc_async_wait_all_async): Remove. (host_openacc_async_set_async): Remove. (host_openacc_async_synchronize): New function. (host_openacc_async_serialize): New function. (host_openacc_async_host2dev): New function. (host_openacc_async_dev2host): New function. (host_openacc_async_queue_callback): New function. (host_openacc_async_construct): New function. (host_openacc_async_destruct): New function. (struct gomp_device_descr host_dispatch): Remove initialization of old interface, add intialization of new async sub-struct. * oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device. (goacc_attach_host_thread_to_device): Remove old async code usage. * oacc-int.h (goacc_init_asyncqueues): New declaration. (goacc_fini_asyncqueues): Likewise. (goacc_async_copyout_unmap_vars): Likewise. (goacc_async_free): Likewise. (get_goacc_asyncqueue): Likewise. (lookup_goacc_asyncqueue): Likewise. * oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async design. (present_create_copy): Adjust code to use new async design. (delete_copyout): Likewise. (update_dev_host): Likewise. (gomp_acc_insert_pointer): Add async parameter, adjust code to use new async design. (gomp_acc_remove_pointer): Adjust code to use new async design. * oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async design. (GOACC_enter_exit_data): Likewise. (goacc_wait): Likewise. (GOACC_update): Likewise. * oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail when called, warn as obsolete in comment. * target.c (goacc_device_copy_async): New function. (gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter, add goacc_device_copy_async case. (gomp_copy_dev2host): Likewise. (gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code. (gomp_map_pointer): Likewise. (gomp_map_fields_existing): Likewise. (gomp_map_vars_internal): New always_inline function, renamed from gomp_map_vars. (gomp_map_vars): Implement by calling gomp_map_vars_internal. (gomp_map_vars_async): Implement by calling gomp_map_vars_internal, passing goacc_asyncqueue argument. (gomp_unmap_tgt): Remove static, add attribute_hidden. (gomp_unref_tgt): New function. (gomp_unmap_vars_internal): New always_inline function, renamed from gomp_unmap_vars. (gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal. (gomp_unmap_vars_async): Implement by calling gomp_unmap_vars_internal, passing goacc_asyncqueue argument. (gomp_fini_device): New function. (gomp_exit_data): Adjust gomp_copy_dev2host call. (gomp_load_plugin_for_device): Remove old interface, adjust to load new async interface. (gomp_target_fini): Adjust code to call gomp_fini_device. * plugin/plugin-nvptx.c (struct cuda_map): Remove. (struct ptx_stream): Remove. (struct nvptx_thread): Remove current_stream field. (cuda_map_create): Remove. (cuda_map_destroy): Remove. (map_init): Remove. (map_fini): Remove. (map_pop): Remove. (map_push): Remove. (struct goacc_asyncqueue): Define. (struct nvptx_callback): Define. (struct ptx_free_block): Define. (struct ptx_device): Remove null_stream, active_streams, async_streams, stream_lock, and next fields. (enum ptx_event_type): Remove. (struct ptx_event): Remove. (ptx_event_lock): Remove. (ptx_events): Remove. (init_streams_for_device): Remove. (fini_streams_for_device): Remove. (select_stream_for_async): Remove. (nvptx_init): Remove ptx_events and ptx_event_lock references. (nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED case. (nvptx_open_device): Add free_blocks initialization, remove init_streams_for_device call. (nvptx_close_device): Remove fini_streams_for_device call, add free_blocks destruct code. (event_gc): Remove. (event_add): Remove. (nvptx_exec): Adjust parameters and code. (nvptx_free): Likewise. (nvptx_host2dev): Remove. (nvptx_dev2host): Remove. (nvptx_set_async): Remove. (nvptx_async_test): Remove. (nvptx_async_test_all): Remove. (nvptx_wait): Remove. (nvptx_wait_async): Remove. (nvptx_wait_all): Remove. (nvptx_wait_all_async): Remove. (nvptx_get_cuda_stream): Remove. (nvptx_set_cuda_stream): Remove. (GOMP_OFFLOAD_alloc): Adjust code. (GOMP_OFFLOAD_free): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove. (GOMP_OFFLOAD_openacc_exec): Adjust parameters and code. (GOMP_OFFLOAD_openacc_async_test_all): Remove. (GOMP_OFFLOAD_openacc_async_wait): Remove. (GOMP_OFFLOAD_openacc_async_wait_async): Remove. (GOMP_OFFLOAD_openacc_async_wait_all): Remove. (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove. (GOMP_OFFLOAD_openacc_async_set_async): Remove. (cuda_free_argmem): New function. (GOMP_OFFLOAD_openacc_async_exec): New plugin hook function. (GOMP_OFFLOAD_openacc_create_thread_data): Adjust code. (GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code. (GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code. (GOMP_OFFLOAD_openacc_async_construct): New plugin hook function. (GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function. (GOMP_OFFLOAD_openacc_async_test): Remove and re-implement. (GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function. (GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function. (GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function. (cuda_callback_wrapper): New function. (cuda_memcpy_sanity_check): New function. (GOMP_OFFLOAD_host2dev): Remove and re-implement. (GOMP_OFFLOAD_dev2host): Remove and re-implement. (GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function. (GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function. From-SVN: r271128
This commit is contained in:
parent
f78f5d2392
commit
1f4c5b9bb2
@ -1,3 +1,193 @@
|
||||
2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
* libgomp-plugin.h (struct goacc_asyncqueue): Declare.
|
||||
(struct goacc_asyncqueue_list): Likewise.
|
||||
(goacc_aq): Likewise.
|
||||
(goacc_aq_list): Likewise.
|
||||
(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_test): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_test_all): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_set_async): Remove.
|
||||
(GOMP_OFFLOAD_openacc_exec): Adjust declaration.
|
||||
(GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise.
|
||||
(GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise.
|
||||
(GOMP_OFFLOAD_openacc_async_exec): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_construct): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_destruct): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_test): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_synchronize): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_serialize): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_queue_callback): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_host2dev): Declare.
|
||||
(GOMP_OFFLOAD_openacc_async_dev2host): Declare.
|
||||
|
||||
* libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct.
|
||||
(gomp_acc_insert_pointer): Adjust declaration.
|
||||
(gomp_copy_host2dev): New declaration.
|
||||
(gomp_copy_dev2host): Likewise.
|
||||
(gomp_map_vars_async): Likewise.
|
||||
(gomp_unmap_tgt): Likewise.
|
||||
(gomp_unmap_vars_async): Likewise.
|
||||
(gomp_fini_device): Likewise.
|
||||
|
||||
* oacc-async.c (get_goacc_thread): New function.
|
||||
(get_goacc_thread_device): New function.
|
||||
(lookup_goacc_asyncqueue): New function.
|
||||
(get_goacc_asyncqueue): New function.
|
||||
(acc_async_test): Adjust code to use new async design.
|
||||
(acc_async_test_all): Likewise.
|
||||
(acc_wait): Likewise.
|
||||
(acc_wait_async): Likewise.
|
||||
(acc_wait_all): Likewise.
|
||||
(acc_wait_all_async): Likewise.
|
||||
(goacc_async_free): New function.
|
||||
(goacc_init_asyncqueues): Likewise.
|
||||
(goacc_fini_asyncqueues): Likewise.
|
||||
* oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async
|
||||
design.
|
||||
(acc_set_cuda_stream): Likewise.
|
||||
* oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'.
|
||||
(host_openacc_register_async_cleanup): Remove.
|
||||
(host_openacc_async_exec): New function.
|
||||
(host_openacc_async_test): Adjust parameters.
|
||||
(host_openacc_async_test_all): Remove.
|
||||
(host_openacc_async_wait): Remove.
|
||||
(host_openacc_async_wait_async): Remove.
|
||||
(host_openacc_async_wait_all): Remove.
|
||||
(host_openacc_async_wait_all_async): Remove.
|
||||
(host_openacc_async_set_async): Remove.
|
||||
(host_openacc_async_synchronize): New function.
|
||||
(host_openacc_async_serialize): New function.
|
||||
(host_openacc_async_host2dev): New function.
|
||||
(host_openacc_async_dev2host): New function.
|
||||
(host_openacc_async_queue_callback): New function.
|
||||
(host_openacc_async_construct): New function.
|
||||
(host_openacc_async_destruct): New function.
|
||||
(struct gomp_device_descr host_dispatch): Remove initialization of old
|
||||
interface, add intialization of new async sub-struct.
|
||||
* oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device.
|
||||
(goacc_attach_host_thread_to_device): Remove old async code usage.
|
||||
* oacc-int.h (goacc_init_asyncqueues): New declaration.
|
||||
(goacc_fini_asyncqueues): Likewise.
|
||||
(goacc_async_copyout_unmap_vars): Likewise.
|
||||
(goacc_async_free): Likewise.
|
||||
(get_goacc_asyncqueue): Likewise.
|
||||
(lookup_goacc_asyncqueue): Likewise.
|
||||
* oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async
|
||||
design.
|
||||
(present_create_copy): Adjust code to use new async design.
|
||||
(delete_copyout): Likewise.
|
||||
(update_dev_host): Likewise.
|
||||
(gomp_acc_insert_pointer): Add async parameter, adjust code to use new
|
||||
async design.
|
||||
(gomp_acc_remove_pointer): Adjust code to use new async design.
|
||||
* oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async
|
||||
design.
|
||||
(GOACC_enter_exit_data): Likewise.
|
||||
(goacc_wait): Likewise.
|
||||
(GOACC_update): Likewise.
|
||||
* oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail
|
||||
when called, warn as obsolete in comment.
|
||||
* target.c (goacc_device_copy_async): New function.
|
||||
(gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
|
||||
add goacc_device_copy_async case.
|
||||
(gomp_copy_dev2host): Likewise.
|
||||
(gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
|
||||
(gomp_map_pointer): Likewise.
|
||||
(gomp_map_fields_existing): Likewise.
|
||||
(gomp_map_vars_internal): New always_inline function, renamed from
|
||||
gomp_map_vars.
|
||||
(gomp_map_vars): Implement by calling gomp_map_vars_internal.
|
||||
(gomp_map_vars_async): Implement by calling gomp_map_vars_internal,
|
||||
passing goacc_asyncqueue argument.
|
||||
(gomp_unmap_tgt): Remove static, add attribute_hidden.
|
||||
(gomp_unref_tgt): New function.
|
||||
(gomp_unmap_vars_internal): New always_inline function, renamed from
|
||||
gomp_unmap_vars.
|
||||
(gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal.
|
||||
(gomp_unmap_vars_async): Implement by calling
|
||||
gomp_unmap_vars_internal, passing goacc_asyncqueue argument.
|
||||
(gomp_fini_device): New function.
|
||||
(gomp_exit_data): Adjust gomp_copy_dev2host call.
|
||||
(gomp_load_plugin_for_device): Remove old interface, adjust to load
|
||||
new async interface.
|
||||
(gomp_target_fini): Adjust code to call gomp_fini_device.
|
||||
|
||||
* plugin/plugin-nvptx.c (struct cuda_map): Remove.
|
||||
(struct ptx_stream): Remove.
|
||||
(struct nvptx_thread): Remove current_stream field.
|
||||
(cuda_map_create): Remove.
|
||||
(cuda_map_destroy): Remove.
|
||||
(map_init): Remove.
|
||||
(map_fini): Remove.
|
||||
(map_pop): Remove.
|
||||
(map_push): Remove.
|
||||
(struct goacc_asyncqueue): Define.
|
||||
(struct nvptx_callback): Define.
|
||||
(struct ptx_free_block): Define.
|
||||
(struct ptx_device): Remove null_stream, active_streams, async_streams,
|
||||
stream_lock, and next fields.
|
||||
(enum ptx_event_type): Remove.
|
||||
(struct ptx_event): Remove.
|
||||
(ptx_event_lock): Remove.
|
||||
(ptx_events): Remove.
|
||||
(init_streams_for_device): Remove.
|
||||
(fini_streams_for_device): Remove.
|
||||
(select_stream_for_async): Remove.
|
||||
(nvptx_init): Remove ptx_events and ptx_event_lock references.
|
||||
(nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED
|
||||
case.
|
||||
(nvptx_open_device): Add free_blocks initialization, remove
|
||||
init_streams_for_device call.
|
||||
(nvptx_close_device): Remove fini_streams_for_device call, add
|
||||
free_blocks destruct code.
|
||||
(event_gc): Remove.
|
||||
(event_add): Remove.
|
||||
(nvptx_exec): Adjust parameters and code.
|
||||
(nvptx_free): Likewise.
|
||||
(nvptx_host2dev): Remove.
|
||||
(nvptx_dev2host): Remove.
|
||||
(nvptx_set_async): Remove.
|
||||
(nvptx_async_test): Remove.
|
||||
(nvptx_async_test_all): Remove.
|
||||
(nvptx_wait): Remove.
|
||||
(nvptx_wait_async): Remove.
|
||||
(nvptx_wait_all): Remove.
|
||||
(nvptx_wait_all_async): Remove.
|
||||
(nvptx_get_cuda_stream): Remove.
|
||||
(nvptx_set_cuda_stream): Remove.
|
||||
(GOMP_OFFLOAD_alloc): Adjust code.
|
||||
(GOMP_OFFLOAD_free): Likewise.
|
||||
(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
|
||||
(GOMP_OFFLOAD_openacc_exec): Adjust parameters and code.
|
||||
(GOMP_OFFLOAD_openacc_async_test_all): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
|
||||
(GOMP_OFFLOAD_openacc_async_set_async): Remove.
|
||||
(cuda_free_argmem): New function.
|
||||
(GOMP_OFFLOAD_openacc_async_exec): New plugin hook function.
|
||||
(GOMP_OFFLOAD_openacc_create_thread_data): Adjust code.
|
||||
(GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code.
|
||||
(GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code.
|
||||
(GOMP_OFFLOAD_openacc_async_construct): New plugin hook function.
|
||||
(GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function.
|
||||
(GOMP_OFFLOAD_openacc_async_test): Remove and re-implement.
|
||||
(GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function.
|
||||
(GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function.
|
||||
(GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function.
|
||||
(cuda_callback_wrapper): New function.
|
||||
(cuda_memcpy_sanity_check): New function.
|
||||
(GOMP_OFFLOAD_host2dev): Remove and re-implement.
|
||||
(GOMP_OFFLOAD_dev2host): Remove and re-implement.
|
||||
(GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function.
|
||||
(GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function.
|
||||
|
||||
2019-05-07 Thomas Schwinge <thomas@codesourcery.com>
|
||||
|
||||
PR target/87835
|
||||
|
@ -53,6 +53,20 @@ enum offload_target_type
|
||||
OFFLOAD_TARGET_TYPE_HSA = 7
|
||||
};
|
||||
|
||||
/* Opaque type to represent plugin-dependent implementation of an
|
||||
OpenACC asynchronous queue. */
|
||||
struct goacc_asyncqueue;
|
||||
|
||||
/* Used to keep a list of active asynchronous queues. */
|
||||
struct goacc_asyncqueue_list
|
||||
{
|
||||
struct goacc_asyncqueue *aq;
|
||||
struct goacc_asyncqueue_list *next;
|
||||
};
|
||||
|
||||
typedef struct goacc_asyncqueue *goacc_aq;
|
||||
typedef struct goacc_asyncqueue_list *goacc_aq_list;
|
||||
|
||||
/* Auxiliary struct, used for transferring pairs of addresses from plugin
|
||||
to libgomp. */
|
||||
struct addr_pair
|
||||
@ -93,22 +107,31 @@ extern bool GOMP_OFFLOAD_dev2dev (int, void *, const void *, size_t);
|
||||
extern bool GOMP_OFFLOAD_can_run (void *);
|
||||
extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
|
||||
extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
|
||||
|
||||
extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
|
||||
void **, int, unsigned *, void *);
|
||||
extern void GOMP_OFFLOAD_openacc_register_async_cleanup (void *, int);
|
||||
extern int GOMP_OFFLOAD_openacc_async_test (int);
|
||||
extern int GOMP_OFFLOAD_openacc_async_test_all (void);
|
||||
extern void GOMP_OFFLOAD_openacc_async_wait (int);
|
||||
extern void GOMP_OFFLOAD_openacc_async_wait_async (int, int);
|
||||
extern void GOMP_OFFLOAD_openacc_async_wait_all (void);
|
||||
extern void GOMP_OFFLOAD_openacc_async_wait_all_async (int);
|
||||
extern void GOMP_OFFLOAD_openacc_async_set_async (int);
|
||||
void **, unsigned *, void *);
|
||||
extern void *GOMP_OFFLOAD_openacc_create_thread_data (int);
|
||||
extern void GOMP_OFFLOAD_openacc_destroy_thread_data (void *);
|
||||
extern struct goacc_asyncqueue *GOMP_OFFLOAD_openacc_async_construct (void);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_destruct (struct goacc_asyncqueue *);
|
||||
extern int GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
|
||||
struct goacc_asyncqueue *);
|
||||
extern void GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *,
|
||||
void (*)(void *), void *);
|
||||
extern void GOMP_OFFLOAD_openacc_async_exec (void (*) (void *), size_t, void **,
|
||||
void **, unsigned *, void *,
|
||||
struct goacc_asyncqueue *);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t,
|
||||
struct goacc_asyncqueue *);
|
||||
extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
|
||||
struct goacc_asyncqueue *);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (int);
|
||||
extern int GOMP_OFFLOAD_openacc_cuda_set_stream (int, void *);
|
||||
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
|
||||
extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *,
|
||||
void *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -949,24 +949,31 @@ typedef struct acc_dispatch_t
|
||||
/* Execute. */
|
||||
__typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
|
||||
|
||||
/* Async cleanup callback registration. */
|
||||
__typeof (GOMP_OFFLOAD_openacc_register_async_cleanup)
|
||||
*register_async_cleanup_func;
|
||||
|
||||
/* Asynchronous routines. */
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_test) *async_test_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_test_all) *async_test_all_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_wait) *async_wait_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_wait_async) *async_wait_async_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_wait_all) *async_wait_all_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_wait_all_async)
|
||||
*async_wait_all_async_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_set_async) *async_set_async_func;
|
||||
|
||||
/* Create/destroy TLS data. */
|
||||
__typeof (GOMP_OFFLOAD_openacc_create_thread_data) *create_thread_data_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_destroy_thread_data)
|
||||
*destroy_thread_data_func;
|
||||
|
||||
struct {
|
||||
/* Once created and put into the "active" list, asyncqueues are then never
|
||||
destructed and removed from the "active" list, other than if the TODO
|
||||
device is shut down. */
|
||||
gomp_mutex_t lock;
|
||||
int nasyncqueue;
|
||||
struct goacc_asyncqueue **asyncqueue;
|
||||
struct goacc_asyncqueue_list *active;
|
||||
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_construct) *construct_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_destruct) *destruct_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_test) *test_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_synchronize) *synchronize_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_serialize) *serialize_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_queue_callback) *queue_callback_func;
|
||||
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
|
||||
__typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
|
||||
} async;
|
||||
|
||||
/* NVIDIA target specific routines. */
|
||||
struct {
|
||||
@ -1053,17 +1060,33 @@ enum gomp_map_vars_kind
|
||||
GOMP_MAP_VARS_ENTER_DATA
|
||||
};
|
||||
|
||||
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
|
||||
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
|
||||
extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
|
||||
extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
|
||||
unsigned short *);
|
||||
struct gomp_coalesce_buf;
|
||||
extern void gomp_copy_host2dev (struct gomp_device_descr *,
|
||||
struct goacc_asyncqueue *, void *, const void *,
|
||||
size_t, struct gomp_coalesce_buf *);
|
||||
extern void gomp_copy_dev2host (struct gomp_device_descr *,
|
||||
struct goacc_asyncqueue *, void *, const void *,
|
||||
size_t);
|
||||
|
||||
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
|
||||
size_t, void **, void **,
|
||||
size_t *, void *, bool,
|
||||
enum gomp_map_vars_kind);
|
||||
extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
|
||||
struct goacc_asyncqueue *,
|
||||
size_t, void **, void **,
|
||||
size_t *, void *, bool,
|
||||
enum gomp_map_vars_kind);
|
||||
extern void gomp_unmap_tgt (struct target_mem_desc *);
|
||||
extern void gomp_unmap_vars (struct target_mem_desc *, bool);
|
||||
extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
|
||||
struct goacc_asyncqueue *);
|
||||
extern void gomp_init_device (struct gomp_device_descr *);
|
||||
extern bool gomp_fini_device (struct gomp_device_descr *);
|
||||
extern void gomp_free_memmap (struct splay_tree_s *);
|
||||
extern void gomp_unload_device (struct gomp_device_descr *);
|
||||
extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
|
||||
|
@ -27,47 +27,160 @@
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "openacc.h"
|
||||
#include "libgomp.h"
|
||||
#include "oacc-int.h"
|
||||
|
||||
int
|
||||
acc_async_test (int async)
|
||||
static struct goacc_thread *
|
||||
get_goacc_thread (void)
|
||||
{
|
||||
if (!async_valid_p (async))
|
||||
gomp_fatal ("invalid async argument: %d", async);
|
||||
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
return thr->dev->openacc.async_test_func (async);
|
||||
return thr;
|
||||
}
|
||||
|
||||
static struct gomp_device_descr *
|
||||
get_goacc_thread_device (void)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
return thr->dev;
|
||||
}
|
||||
|
||||
static int
|
||||
validate_async_val (int async)
|
||||
{
|
||||
if (!async_valid_p (async))
|
||||
gomp_fatal ("invalid async-argument: %d", async);
|
||||
|
||||
if (async == acc_async_sync)
|
||||
return -1;
|
||||
|
||||
if (async == acc_async_noval)
|
||||
return 0;
|
||||
|
||||
if (async >= 0)
|
||||
/* TODO: we reserve 0 for acc_async_noval before we can clarify the
|
||||
semantics of "default_async". */
|
||||
return 1 + async;
|
||||
else
|
||||
__builtin_unreachable ();
|
||||
}
|
||||
|
||||
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
|
||||
might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
|
||||
create the asyncqueue if it doesn't exist yet. */
|
||||
|
||||
attribute_hidden struct goacc_asyncqueue *
|
||||
lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
|
||||
{
|
||||
async = validate_async_val (async);
|
||||
if (async < 0)
|
||||
return NULL;
|
||||
|
||||
struct goacc_asyncqueue *ret_aq = NULL;
|
||||
struct gomp_device_descr *dev = thr->dev;
|
||||
|
||||
gomp_mutex_lock (&dev->openacc.async.lock);
|
||||
|
||||
if (!create
|
||||
&& (async >= dev->openacc.async.nasyncqueue
|
||||
|| !dev->openacc.async.asyncqueue[async]))
|
||||
goto end;
|
||||
|
||||
if (async >= dev->openacc.async.nasyncqueue)
|
||||
{
|
||||
int diff = async + 1 - dev->openacc.async.nasyncqueue;
|
||||
dev->openacc.async.asyncqueue
|
||||
= gomp_realloc (dev->openacc.async.asyncqueue,
|
||||
sizeof (goacc_aq) * (async + 1));
|
||||
memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
|
||||
0, sizeof (goacc_aq) * diff);
|
||||
dev->openacc.async.nasyncqueue = async + 1;
|
||||
}
|
||||
|
||||
if (!dev->openacc.async.asyncqueue[async])
|
||||
{
|
||||
dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
|
||||
|
||||
if (!dev->openacc.async.asyncqueue[async])
|
||||
{
|
||||
gomp_mutex_unlock (&dev->openacc.async.lock);
|
||||
gomp_fatal ("async %d creation failed", async);
|
||||
}
|
||||
|
||||
/* Link new async queue into active list. */
|
||||
goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
|
||||
n->aq = dev->openacc.async.asyncqueue[async];
|
||||
n->next = dev->openacc.async.active;
|
||||
dev->openacc.async.active = n;
|
||||
}
|
||||
|
||||
ret_aq = dev->openacc.async.asyncqueue[async];
|
||||
|
||||
end:
|
||||
gomp_mutex_unlock (&dev->openacc.async.lock);
|
||||
return ret_aq;
|
||||
}
|
||||
|
||||
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
|
||||
might return NULL if no asyncqueue is to be used. Otherwise, create the
|
||||
asyncqueue if it doesn't exist yet. */
|
||||
|
||||
attribute_hidden struct goacc_asyncqueue *
|
||||
get_goacc_asyncqueue (int async)
|
||||
{
|
||||
struct goacc_thread *thr = get_goacc_thread ();
|
||||
return lookup_goacc_asyncqueue (thr, true, async);
|
||||
}
|
||||
|
||||
int
|
||||
acc_async_test (int async)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
||||
if (!aq)
|
||||
return 1;
|
||||
else
|
||||
return thr->dev->openacc.async.test_func (aq);
|
||||
}
|
||||
|
||||
int
|
||||
acc_async_test_all (void)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct goacc_thread *thr = get_goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
return thr->dev->openacc.async_test_all_func ();
|
||||
int ret = 1;
|
||||
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
||||
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
||||
if (!thr->dev->openacc.async.test_func (l->aq))
|
||||
{
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
acc_wait (int async)
|
||||
{
|
||||
if (!async_valid_p (async))
|
||||
gomp_fatal ("invalid async argument: %d", async);
|
||||
struct goacc_thread *thr = get_goacc_thread ();
|
||||
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
|
||||
thr->dev->openacc.async_wait_func (async);
|
||||
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
||||
if (aq && !thr->dev->openacc.async.synchronize_func (aq))
|
||||
gomp_fatal ("wait on %d failed", async);
|
||||
}
|
||||
|
||||
/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
|
||||
@ -84,23 +197,46 @@ acc_async_wait (int async)
|
||||
void
|
||||
acc_wait_async (int async1, int async2)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct goacc_thread *thr = get_goacc_thread ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
|
||||
/* TODO: Is this also correct for acc_async_sync, assuming that in this case,
|
||||
we'll always be synchronous anyways? */
|
||||
if (!aq1)
|
||||
return;
|
||||
|
||||
thr->dev->openacc.async_wait_async_func (async1, async2);
|
||||
goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
|
||||
/* An async queue is always synchronized with itself. */
|
||||
if (aq1 == aq2)
|
||||
return;
|
||||
|
||||
if (aq2)
|
||||
{
|
||||
if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
|
||||
gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* TODO: Local thread synchronization.
|
||||
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
|
||||
if (!thr->dev->openacc.async.synchronize_func (aq1))
|
||||
gomp_fatal ("wait on %d failed", async1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
acc_wait_all (void)
|
||||
{
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
struct gomp_device_descr *dev = get_goacc_thread_device ();
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
bool ret = true;
|
||||
gomp_mutex_lock (&dev->openacc.async.lock);
|
||||
for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next)
|
||||
ret &= dev->openacc.async.synchronize_func (l->aq);
|
||||
gomp_mutex_unlock (&dev->openacc.async.lock);
|
||||
|
||||
thr->dev->openacc.async_wait_all_func ();
|
||||
if (!ret)
|
||||
gomp_fatal ("wait all failed");
|
||||
}
|
||||
|
||||
/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
|
||||
@ -117,13 +253,73 @@ acc_async_wait_all (void)
|
||||
void
|
||||
acc_wait_all_async (int async)
|
||||
{
|
||||
if (!async_valid_p (async))
|
||||
gomp_fatal ("invalid async argument: %d", async);
|
||||
struct goacc_thread *thr = get_goacc_thread ();
|
||||
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
|
||||
|
||||
if (!thr || !thr->dev)
|
||||
gomp_fatal ("no device active");
|
||||
bool ret = true;
|
||||
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
||||
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
|
||||
{
|
||||
if (waiting_queue)
|
||||
ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
|
||||
else
|
||||
/* TODO: Local thread synchronization.
|
||||
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
|
||||
ret &= thr->dev->openacc.async.synchronize_func (l->aq);
|
||||
}
|
||||
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
||||
|
||||
thr->dev->openacc.async_wait_all_async_func (async);
|
||||
if (!ret)
|
||||
gomp_fatal ("wait all async(%d) failed", async);
|
||||
}
|
||||
|
||||
attribute_hidden void
|
||||
goacc_async_free (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq, void *ptr)
|
||||
{
|
||||
if (!aq)
|
||||
free (ptr);
|
||||
else
|
||||
devicep->openacc.async.queue_callback_func (aq, free, ptr);
|
||||
}
|
||||
|
||||
/* This function initializes the asyncqueues for the device specified by
|
||||
DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
|
||||
return. */
|
||||
|
||||
attribute_hidden void
|
||||
goacc_init_asyncqueues (struct gomp_device_descr *devicep)
|
||||
{
|
||||
devicep->openacc.async.nasyncqueue = 0;
|
||||
devicep->openacc.async.asyncqueue = NULL;
|
||||
devicep->openacc.async.active = NULL;
|
||||
gomp_mutex_init (&devicep->openacc.async.lock);
|
||||
}
|
||||
|
||||
/* This function finalizes the asyncqueues for the device specified by DEVICEP.
|
||||
TODO DEVICEP must be locked on entry, and remains locked on return. */
|
||||
|
||||
attribute_hidden bool
|
||||
goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
|
||||
{
|
||||
bool ret = true;
|
||||
gomp_mutex_lock (&devicep->openacc.async.lock);
|
||||
if (devicep->openacc.async.nasyncqueue > 0)
|
||||
{
|
||||
goacc_aq_list next;
|
||||
for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
|
||||
{
|
||||
ret &= devicep->openacc.async.destruct_func (l->aq);
|
||||
next = l->next;
|
||||
free (l);
|
||||
}
|
||||
free (devicep->openacc.async.asyncqueue);
|
||||
devicep->openacc.async.nasyncqueue = 0;
|
||||
devicep->openacc.async.asyncqueue = NULL;
|
||||
devicep->openacc.async.active = NULL;
|
||||
}
|
||||
gomp_mutex_unlock (&devicep->openacc.async.lock);
|
||||
gomp_mutex_destroy (&devicep->openacc.async.lock);
|
||||
return ret;
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "config.h"
|
||||
#include "libgomp.h"
|
||||
#include "oacc-int.h"
|
||||
#include <assert.h>
|
||||
|
||||
void *
|
||||
acc_get_current_cuda_device (void)
|
||||
@ -62,7 +63,11 @@ acc_get_cuda_stream (int async)
|
||||
return NULL;
|
||||
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
|
||||
return thr->dev->openacc.cuda.get_stream_func (async);
|
||||
{
|
||||
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
|
||||
if (aq)
|
||||
return thr->dev->openacc.cuda.get_stream_func (aq);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -79,8 +84,23 @@ acc_set_cuda_stream (int async, void *stream)
|
||||
|
||||
thr = goacc_thread ();
|
||||
|
||||
int ret = -1;
|
||||
if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func)
|
||||
return thr->dev->openacc.cuda.set_stream_func (async, stream);
|
||||
{
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
/* Due to not using an asyncqueue for "acc_async_sync", this cannot be
|
||||
used to change the CUDA stream associated with "acc_async_sync". */
|
||||
if (!aq)
|
||||
{
|
||||
assert (async == acc_async_sync);
|
||||
gomp_debug (0, "Refusing request to set CUDA stream associated"
|
||||
" with \"acc_async_sync\"\n");
|
||||
return 0;
|
||||
}
|
||||
gomp_mutex_lock (&thr->dev->openacc.async.lock);
|
||||
ret = thr->dev->openacc.cuda.set_stream_func (aq, stream);
|
||||
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
|
||||
}
|
||||
|
||||
return -1;
|
||||
return ret;
|
||||
}
|
||||
|
@ -140,55 +140,89 @@ host_openacc_exec (void (*fn) (void *),
|
||||
size_t mapnum __attribute__ ((unused)),
|
||||
void **hostaddrs,
|
||||
void **devaddrs __attribute__ ((unused)),
|
||||
int async __attribute__ ((unused)),
|
||||
unsigned *dims __attribute ((unused)),
|
||||
unsigned *dims __attribute__ ((unused)),
|
||||
void *targ_mem_desc __attribute__ ((unused)))
|
||||
{
|
||||
fn (hostaddrs);
|
||||
}
|
||||
|
||||
static void
|
||||
host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)),
|
||||
int async __attribute__ ((unused)))
|
||||
host_openacc_async_exec (void (*fn) (void *),
|
||||
size_t mapnum __attribute__ ((unused)),
|
||||
void **hostaddrs,
|
||||
void **devaddrs __attribute__ ((unused)),
|
||||
unsigned *dims __attribute__ ((unused)),
|
||||
void *targ_mem_desc __attribute__ ((unused)),
|
||||
struct goacc_asyncqueue *aq __attribute__ ((unused)))
|
||||
{
|
||||
fn (hostaddrs);
|
||||
}
|
||||
|
||||
static int
|
||||
host_openacc_async_test (int async __attribute__ ((unused)))
|
||||
host_openacc_async_test (struct goacc_asyncqueue *aq __attribute__ ((unused)))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
host_openacc_async_test_all (void)
|
||||
static bool
|
||||
host_openacc_async_synchronize (struct goacc_asyncqueue *aq
|
||||
__attribute__ ((unused)))
|
||||
{
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
host_openacc_async_serialize (struct goacc_asyncqueue *aq1
|
||||
__attribute__ ((unused)),
|
||||
struct goacc_asyncqueue *aq2
|
||||
__attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
host_openacc_async_host2dev (int ord __attribute__ ((unused)),
|
||||
void *dst __attribute__ ((unused)),
|
||||
const void *src __attribute__ ((unused)),
|
||||
size_t n __attribute__ ((unused)),
|
||||
struct goacc_asyncqueue *aq
|
||||
__attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
host_openacc_async_dev2host (int ord __attribute__ ((unused)),
|
||||
void *dst __attribute__ ((unused)),
|
||||
const void *src __attribute__ ((unused)),
|
||||
size_t n __attribute__ ((unused)),
|
||||
struct goacc_asyncqueue *aq
|
||||
__attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
host_openacc_async_wait (int async __attribute__ ((unused)))
|
||||
host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
|
||||
__attribute__ ((unused)),
|
||||
void (*callback_fn)(void *)
|
||||
__attribute__ ((unused)),
|
||||
void *userptr __attribute__ ((unused)))
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
host_openacc_async_wait_async (int async1 __attribute__ ((unused)),
|
||||
int async2 __attribute__ ((unused)))
|
||||
static struct goacc_asyncqueue *
|
||||
host_openacc_async_construct (void)
|
||||
{
|
||||
/* Non-NULL 0xffff... value as opaque dummy. */
|
||||
return (struct goacc_asyncqueue *) -1;
|
||||
}
|
||||
|
||||
static void
|
||||
host_openacc_async_wait_all (void)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
host_openacc_async_wait_all_async (int async __attribute__ ((unused)))
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
host_openacc_async_set_async (int async __attribute__ ((unused)))
|
||||
static bool
|
||||
host_openacc_async_destruct (struct goacc_asyncqueue *aq
|
||||
__attribute__ ((unused)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *
|
||||
@ -235,19 +269,21 @@ static struct gomp_device_descr host_dispatch =
|
||||
|
||||
.exec_func = host_openacc_exec,
|
||||
|
||||
.register_async_cleanup_func = host_openacc_register_async_cleanup,
|
||||
|
||||
.async_test_func = host_openacc_async_test,
|
||||
.async_test_all_func = host_openacc_async_test_all,
|
||||
.async_wait_func = host_openacc_async_wait,
|
||||
.async_wait_async_func = host_openacc_async_wait_async,
|
||||
.async_wait_all_func = host_openacc_async_wait_all,
|
||||
.async_wait_all_async_func = host_openacc_async_wait_all_async,
|
||||
.async_set_async_func = host_openacc_async_set_async,
|
||||
|
||||
.create_thread_data_func = host_openacc_create_thread_data,
|
||||
.destroy_thread_data_func = host_openacc_destroy_thread_data,
|
||||
|
||||
.async = {
|
||||
.construct_func = host_openacc_async_construct,
|
||||
.destruct_func = host_openacc_async_destruct,
|
||||
.test_func = host_openacc_async_test,
|
||||
.synchronize_func = host_openacc_async_synchronize,
|
||||
.serialize_func = host_openacc_async_serialize,
|
||||
.queue_callback_func = host_openacc_async_queue_callback,
|
||||
.exec_func = host_openacc_async_exec,
|
||||
.dev2host_func = host_openacc_async_dev2host,
|
||||
.host2dev_func = host_openacc_async_host2dev,
|
||||
},
|
||||
|
||||
.cuda = {
|
||||
.get_current_device_func = NULL,
|
||||
.get_current_context_func = NULL,
|
||||
|
@ -309,7 +309,7 @@ acc_shutdown_1 (acc_device_t d)
|
||||
if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
|
||||
{
|
||||
devices_active = true;
|
||||
ret &= acc_dev->fini_device_func (acc_dev->target_id);
|
||||
ret &= gomp_fini_device (acc_dev);
|
||||
acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
|
||||
}
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
@ -426,8 +426,6 @@ goacc_attach_host_thread_to_device (int ord)
|
||||
|
||||
thr->target_tls
|
||||
= acc_dev->openacc.create_thread_data_func (ord);
|
||||
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
}
|
||||
|
||||
/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
|
||||
|
@ -99,6 +99,13 @@ void goacc_restore_bind (void);
|
||||
void goacc_lazy_initialize (void);
|
||||
void goacc_host_init (void);
|
||||
|
||||
void goacc_init_asyncqueues (struct gomp_device_descr *);
|
||||
bool goacc_fini_asyncqueues (struct gomp_device_descr *);
|
||||
void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *,
|
||||
void *);
|
||||
struct goacc_asyncqueue *get_goacc_asyncqueue (int);
|
||||
struct goacc_asyncqueue *lookup_goacc_asyncqueue (struct goacc_thread *, bool,
|
||||
int);
|
||||
static inline bool
|
||||
async_valid_stream_id_p (int async)
|
||||
{
|
||||
|
@ -172,18 +172,11 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
|
||||
return;
|
||||
}
|
||||
|
||||
if (async > acc_async_sync)
|
||||
thr->dev->openacc.async_set_async_func (async);
|
||||
|
||||
bool ret = (from
|
||||
? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
|
||||
: thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
|
||||
|
||||
if (async > acc_async_sync)
|
||||
thr->dev->openacc.async_set_async_func (acc_async_sync);
|
||||
|
||||
if (!ret)
|
||||
gomp_fatal ("error in %s", libfnname);
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
if (from)
|
||||
gomp_copy_dev2host (thr->dev, aq, h, d, s);
|
||||
else
|
||||
gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
|
||||
}
|
||||
|
||||
void
|
||||
@ -509,17 +502,13 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
|
||||
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
|
||||
if (async > acc_async_sync)
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
|
||||
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
|
||||
GOMP_MAP_VARS_OPENACC);
|
||||
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
|
||||
&kinds, true, GOMP_MAP_VARS_OPENACC);
|
||||
/* Initialize dynamic refcount. */
|
||||
tgt->list[0].key->dynamic_refcount = 1;
|
||||
|
||||
if (async > acc_async_sync)
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
|
||||
gomp_mutex_lock (&acc_dev->lock);
|
||||
|
||||
d = tgt->to_free;
|
||||
@ -676,13 +665,9 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
|
||||
|
||||
if (f & FLAG_COPYOUT)
|
||||
{
|
||||
if (async > acc_async_sync)
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
|
||||
if (async > acc_async_sync)
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
gomp_copy_dev2host (acc_dev, aq, h, d, s);
|
||||
}
|
||||
|
||||
gomp_remove_var (acc_dev, n);
|
||||
}
|
||||
|
||||
@ -765,16 +750,12 @@ update_dev_host (int is_dev, void *h, size_t s, int async)
|
||||
d = (void *) (n->tgt->tgt_start + n->tgt_offset
|
||||
+ (uintptr_t) h - n->host_start);
|
||||
|
||||
if (async > acc_async_sync)
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
|
||||
if (is_dev)
|
||||
acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
|
||||
gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
|
||||
else
|
||||
acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
|
||||
|
||||
if (async > acc_async_sync)
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
gomp_copy_dev2host (acc_dev, aq, h, d, s);
|
||||
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
}
|
||||
@ -805,7 +786,7 @@ acc_update_self_async (void *h, size_t s, int async)
|
||||
|
||||
void
|
||||
gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
|
||||
void *kinds)
|
||||
void *kinds, int async)
|
||||
{
|
||||
struct target_mem_desc *tgt;
|
||||
struct goacc_thread *thr = goacc_thread ();
|
||||
@ -835,8 +816,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
|
||||
}
|
||||
|
||||
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
||||
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
|
||||
NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
|
||||
NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
|
||||
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
|
||||
|
||||
/* Initialize dynamic refcount. */
|
||||
@ -930,7 +912,10 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
|
||||
if (async < acc_async_noval)
|
||||
gomp_unmap_vars (t, true);
|
||||
else
|
||||
t->device_descr->openacc.register_async_cleanup_func (t, async);
|
||||
{
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
gomp_unmap_vars_async (t, true, aq);
|
||||
}
|
||||
}
|
||||
|
||||
gomp_mutex_unlock (&acc_dev->lock);
|
||||
|
@ -217,8 +217,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
||||
}
|
||||
va_end (ap);
|
||||
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
|
||||
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
|
||||
{
|
||||
k.host_start = (uintptr_t) fn;
|
||||
@ -235,44 +233,29 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
|
||||
else
|
||||
tgt_fn = (void (*)) fn;
|
||||
|
||||
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
||||
GOMP_MAP_VARS_OPENACC);
|
||||
goacc_aq aq = get_goacc_asyncqueue (async);
|
||||
|
||||
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
|
||||
true, GOMP_MAP_VARS_OPENACC);
|
||||
|
||||
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
|
||||
for (i = 0; i < mapnum; i++)
|
||||
devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
|
||||
+ tgt->list[i].key->tgt_offset
|
||||
+ tgt->list[i].offset);
|
||||
|
||||
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
||||
async, dims, tgt);
|
||||
|
||||
/* If running synchronously, unmap immediately. */
|
||||
bool copyfrom = true;
|
||||
if (async_synchronous_p (async))
|
||||
gomp_unmap_vars (tgt, true);
|
||||
if (aq == NULL)
|
||||
{
|
||||
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
||||
dims, tgt);
|
||||
/* If running synchronously, unmap immediately. */
|
||||
gomp_unmap_vars (tgt, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool async_unmap = false;
|
||||
for (size_t i = 0; i < tgt->list_count; i++)
|
||||
{
|
||||
splay_tree_key k = tgt->list[i].key;
|
||||
if (k && k->refcount == 1)
|
||||
{
|
||||
async_unmap = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (async_unmap)
|
||||
tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
|
||||
else
|
||||
{
|
||||
copyfrom = false;
|
||||
gomp_unmap_vars (tgt, copyfrom);
|
||||
}
|
||||
acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
||||
dims, tgt, aq);
|
||||
gomp_unmap_vars_async (tgt, true, aq);
|
||||
}
|
||||
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
}
|
||||
|
||||
/* Legacy entry point (GCC 5). Only provide host fallback execution. */
|
||||
@ -383,8 +366,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
|
||||
finalize = true;
|
||||
}
|
||||
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
|
||||
/* Determine if this is an "acc enter data". */
|
||||
for (i = 0; i < mapnum; ++i)
|
||||
{
|
||||
@ -437,11 +418,11 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
|
||||
{
|
||||
case GOMP_MAP_ALLOC:
|
||||
case GOMP_MAP_FORCE_ALLOC:
|
||||
acc_create (hostaddrs[i], sizes[i]);
|
||||
acc_create_async (hostaddrs[i], sizes[i], async);
|
||||
break;
|
||||
case GOMP_MAP_TO:
|
||||
case GOMP_MAP_FORCE_TO:
|
||||
acc_copyin (hostaddrs[i], sizes[i]);
|
||||
acc_copyin_async (hostaddrs[i], sizes[i], async);
|
||||
break;
|
||||
default:
|
||||
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
|
||||
@ -452,7 +433,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
|
||||
else
|
||||
{
|
||||
gomp_acc_insert_pointer (pointer, &hostaddrs[i],
|
||||
&sizes[i], &kinds[i]);
|
||||
&sizes[i], &kinds[i], async);
|
||||
/* Increment 'i' by two because OpenACC requires fortran
|
||||
arrays to be contiguous, so each PSET is associated with
|
||||
one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
|
||||
@ -477,17 +458,17 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
|
||||
if (acc_is_present (hostaddrs[i], sizes[i]))
|
||||
{
|
||||
if (finalize)
|
||||
acc_delete_finalize (hostaddrs[i], sizes[i]);
|
||||
acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
|
||||
else
|
||||
acc_delete (hostaddrs[i], sizes[i]);
|
||||
acc_delete_async (hostaddrs[i], sizes[i], async);
|
||||
}
|
||||
break;
|
||||
case GOMP_MAP_FROM:
|
||||
case GOMP_MAP_FORCE_FROM:
|
||||
if (finalize)
|
||||
acc_copyout_finalize (hostaddrs[i], sizes[i]);
|
||||
acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
|
||||
else
|
||||
acc_copyout (hostaddrs[i], sizes[i]);
|
||||
acc_copyout_async (hostaddrs[i], sizes[i], async);
|
||||
break;
|
||||
default:
|
||||
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
|
||||
@ -505,8 +486,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
|
||||
i += pointer - 1;
|
||||
}
|
||||
}
|
||||
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -532,9 +511,10 @@ goacc_wait (int async, int num_waits, va_list *ap)
|
||||
if (async == acc_async_sync)
|
||||
acc_wait (qid);
|
||||
else if (qid == async)
|
||||
;/* If we're waiting on the same asynchronous queue as we're
|
||||
launching on, the queue itself will order work as
|
||||
required, so there's no need to wait explicitly. */
|
||||
/* If we're waiting on the same asynchronous queue as we're
|
||||
launching on, the queue itself will order work as
|
||||
required, so there's no need to wait explicitly. */
|
||||
;
|
||||
else
|
||||
acc_wait_async (qid, async);
|
||||
}
|
||||
@ -567,8 +547,6 @@ GOACC_update (int flags_m, size_t mapnum,
|
||||
va_end (ap);
|
||||
}
|
||||
|
||||
acc_dev->openacc.async_set_async_func (async);
|
||||
|
||||
bool update_device = false;
|
||||
for (i = 0; i < mapnum; ++i)
|
||||
{
|
||||
@ -591,6 +569,8 @@ GOACC_update (int flags_m, size_t mapnum,
|
||||
the value of the allocated device memory in the
|
||||
previous pointer. */
|
||||
*(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
|
||||
/* TODO: verify that we really cannot use acc_update_device_async
|
||||
here. */
|
||||
acc_update_device (hostaddrs[i], sizeof (uintptr_t));
|
||||
|
||||
/* Restore the host pointer. */
|
||||
@ -608,7 +588,7 @@ GOACC_update (int flags_m, size_t mapnum,
|
||||
/* Fallthru */
|
||||
case GOMP_MAP_FORCE_TO:
|
||||
update_device = true;
|
||||
acc_update_device (hostaddrs[i], sizes[i]);
|
||||
acc_update_device_async (hostaddrs[i], sizes[i], async);
|
||||
break;
|
||||
|
||||
case GOMP_MAP_FROM:
|
||||
@ -620,7 +600,7 @@ GOACC_update (int flags_m, size_t mapnum,
|
||||
/* Fallthru */
|
||||
case GOMP_MAP_FORCE_FROM:
|
||||
update_device = false;
|
||||
acc_update_self (hostaddrs[i], sizes[i]);
|
||||
acc_update_self_async (hostaddrs[i], sizes[i], async);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -628,8 +608,6 @@ GOACC_update (int flags_m, size_t mapnum,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
acc_dev->openacc.async_set_async_func (acc_async_sync);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -30,15 +30,12 @@
|
||||
#include "oacc-plugin.h"
|
||||
#include "oacc-int.h"
|
||||
|
||||
/* This plugin function is now obsolete. */
|
||||
void
|
||||
GOMP_PLUGIN_async_unmap_vars (void *ptr, int async)
|
||||
GOMP_PLUGIN_async_unmap_vars (void *ptr __attribute__((unused)),
|
||||
int async __attribute__((unused)))
|
||||
{
|
||||
struct target_mem_desc *tgt = ptr;
|
||||
struct gomp_device_descr *devicep = tgt->device_descr;
|
||||
|
||||
devicep->openacc.async_set_async_func (async);
|
||||
gomp_unmap_vars (tgt, true);
|
||||
devicep->openacc.async_set_async_func (acc_async_sync);
|
||||
gomp_fatal ("invalid plugin function");
|
||||
}
|
||||
|
||||
/* Return the target-specific part of the TLS data for the current thread. */
|
||||
|
@ -42,6 +42,7 @@ CUDA_ONE_CALL (cuModuleLoad)
|
||||
CUDA_ONE_CALL (cuModuleLoadData)
|
||||
CUDA_ONE_CALL (cuModuleUnload)
|
||||
CUDA_ONE_CALL_MAYBE_NULL (cuOccupancyMaxPotentialBlockSize)
|
||||
CUDA_ONE_CALL (cuStreamAddCallback)
|
||||
CUDA_ONE_CALL (cuStreamCreate)
|
||||
CUDA_ONE_CALL (cuStreamDestroy)
|
||||
CUDA_ONE_CALL (cuStreamQuery)
|
||||
|
@ -54,7 +54,11 @@ typedef enum {
|
||||
CUDA_ERROR_INVALID_CONTEXT = 201,
|
||||
CUDA_ERROR_NOT_FOUND = 500,
|
||||
CUDA_ERROR_NOT_READY = 600,
|
||||
CUDA_ERROR_LAUNCH_FAILED = 719
|
||||
CUDA_ERROR_LAUNCH_FAILED = 719,
|
||||
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
|
||||
CUDA_ERROR_NOT_PERMITTED = 800,
|
||||
CUDA_ERROR_NOT_SUPPORTED = 801,
|
||||
CUDA_ERROR_UNKNOWN = 999
|
||||
} CUresult;
|
||||
|
||||
typedef enum {
|
||||
@ -173,6 +177,8 @@ CUresult cuModuleLoadData (CUmodule *, const void *);
|
||||
CUresult cuModuleUnload (CUmodule);
|
||||
CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
|
||||
CUoccupancyB2DSize, size_t, int);
|
||||
typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
|
||||
CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int);
|
||||
CUresult cuStreamCreate (CUstream *, unsigned);
|
||||
#define cuStreamDestroy cuStreamDestroy_v2
|
||||
CUresult cuStreamDestroy (CUstream);
|
||||
|
File diff suppressed because it is too large
Load Diff
206
libgomp/target.c
206
libgomp/target.c
@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
goacc_device_copy_async (struct gomp_device_descr *devicep,
|
||||
bool (*copy_func) (int, void *, const void *, size_t,
|
||||
struct goacc_asyncqueue *),
|
||||
const char *dst, void *dstaddr,
|
||||
const char *src, const void *srcaddr,
|
||||
size_t size, struct goacc_asyncqueue *aq)
|
||||
{
|
||||
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
|
||||
{
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
|
||||
src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
|
||||
host to device memory transfers. */
|
||||
|
||||
@ -269,8 +285,9 @@ gomp_to_device_kind_p (int kind)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
attribute_hidden void
|
||||
gomp_copy_host2dev (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq,
|
||||
void *d, const void *h, size_t sz,
|
||||
struct gomp_coalesce_buf *cbuf)
|
||||
{
|
||||
@ -299,14 +316,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
|
||||
}
|
||||
}
|
||||
}
|
||||
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
|
||||
if (__builtin_expect (aq != NULL, 0))
|
||||
goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
|
||||
"dev", d, "host", h, sz, aq);
|
||||
else
|
||||
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
|
||||
}
|
||||
|
||||
static void
|
||||
attribute_hidden void
|
||||
gomp_copy_dev2host (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq,
|
||||
void *h, const void *d, size_t sz)
|
||||
{
|
||||
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
||||
if (__builtin_expect (aq != NULL, 0))
|
||||
goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
|
||||
"host", h, "dev", d, sz, aq);
|
||||
else
|
||||
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -324,7 +350,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
|
||||
Helper function of gomp_map_vars. */
|
||||
|
||||
static inline void
|
||||
gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
|
||||
gomp_map_vars_existing (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq, splay_tree_key oldn,
|
||||
splay_tree_key newn, struct target_var_desc *tgt_var,
|
||||
unsigned char kind, struct gomp_coalesce_buf *cbuf)
|
||||
{
|
||||
@ -346,7 +373,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
|
||||
}
|
||||
|
||||
if (GOMP_MAP_ALWAYS_TO_P (kind))
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
|
||||
+ newn->host_start - oldn->host_start),
|
||||
(void *) newn->host_start,
|
||||
@ -364,8 +391,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
||||
uintptr_t target_offset, uintptr_t bias,
|
||||
gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
|
||||
uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
|
||||
struct gomp_coalesce_buf *cbuf)
|
||||
{
|
||||
struct gomp_device_descr *devicep = tgt->device_descr;
|
||||
@ -376,7 +403,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
||||
if (cur_node.host_start == (uintptr_t) NULL)
|
||||
{
|
||||
cur_node.tgt_offset = (uintptr_t) NULL;
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start + target_offset),
|
||||
(void *) &cur_node.tgt_offset,
|
||||
sizeof (void *), cbuf);
|
||||
@ -398,12 +425,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
|
||||
array section. Now subtract bias to get what we want
|
||||
to initialize the pointer with. */
|
||||
cur_node.tgt_offset -= bias;
|
||||
gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
|
||||
gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
|
||||
(void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
|
||||
gomp_map_fields_existing (struct target_mem_desc *tgt,
|
||||
struct goacc_asyncqueue *aq, splay_tree_key n,
|
||||
size_t first, size_t i, void **hostaddrs,
|
||||
size_t *sizes, void *kinds,
|
||||
struct gomp_coalesce_buf *cbuf)
|
||||
@ -423,7 +451,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
|
||||
&& n2->tgt == n->tgt
|
||||
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
||||
{
|
||||
gomp_map_vars_existing (devicep, n2, &cur_node,
|
||||
gomp_map_vars_existing (devicep, aq, n2, &cur_node,
|
||||
&tgt->list[i], kind & typemask, cbuf);
|
||||
return;
|
||||
}
|
||||
@ -439,8 +467,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
|
||||
&& n2->host_start - n->host_start
|
||||
== n2->tgt_offset - n->tgt_offset)
|
||||
{
|
||||
gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
|
||||
kind & typemask, cbuf);
|
||||
gomp_map_vars_existing (devicep, aq, n2, &cur_node,
|
||||
&tgt->list[i], kind & typemask, cbuf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -451,7 +479,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
|
||||
&& n2->tgt == n->tgt
|
||||
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
|
||||
{
|
||||
gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
|
||||
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
|
||||
kind & typemask, cbuf);
|
||||
return;
|
||||
}
|
||||
@ -483,10 +511,12 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
|
||||
return tgt->tgt_start + tgt->list[i].offset;
|
||||
}
|
||||
|
||||
attribute_hidden struct target_mem_desc *
|
||||
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
|
||||
bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
|
||||
static inline __attribute__((always_inline)) struct target_mem_desc *
|
||||
gomp_map_vars_internal (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq, size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs, size_t *sizes,
|
||||
void *kinds, bool short_mapkind,
|
||||
enum gomp_map_vars_kind pragma_kind)
|
||||
{
|
||||
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
|
||||
bool has_firstprivate = false;
|
||||
@ -600,7 +630,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
continue;
|
||||
}
|
||||
for (i = first; i <= last; i++)
|
||||
gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
|
||||
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
|
||||
sizes, kinds, NULL);
|
||||
i--;
|
||||
continue;
|
||||
@ -645,7 +675,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
else
|
||||
n = splay_tree_lookup (mem_map, &cur_node);
|
||||
if (n && n->refcount != REFCOUNT_LINK)
|
||||
gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
|
||||
gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
|
||||
kind & typemask, NULL);
|
||||
else
|
||||
{
|
||||
@ -756,7 +786,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
tgt_size = (tgt_size + align - 1) & ~(align - 1);
|
||||
tgt->list[i].offset = tgt_size;
|
||||
len = sizes[i];
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start + tgt_size),
|
||||
(void *) hostaddrs[i], len, cbufp);
|
||||
tgt_size += len;
|
||||
@ -790,7 +820,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
continue;
|
||||
}
|
||||
for (i = first; i <= last; i++)
|
||||
gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
|
||||
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
|
||||
sizes, kinds, cbufp);
|
||||
i--;
|
||||
continue;
|
||||
@ -810,7 +840,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
|
||||
if (cur_node.tgt_offset)
|
||||
cur_node.tgt_offset -= sizes[i];
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (n->tgt->tgt_start
|
||||
+ n->tgt_offset
|
||||
+ cur_node.host_start
|
||||
@ -831,7 +861,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
k->host_end = k->host_start + sizeof (void *);
|
||||
splay_tree_key n = splay_tree_lookup (mem_map, k);
|
||||
if (n && n->refcount != REFCOUNT_LINK)
|
||||
gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
|
||||
gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
|
||||
kind & typemask, cbufp);
|
||||
else
|
||||
{
|
||||
@ -884,18 +914,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
case GOMP_MAP_FORCE_TOFROM:
|
||||
case GOMP_MAP_ALWAYS_TO:
|
||||
case GOMP_MAP_ALWAYS_TOFROM:
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
k->host_end - k->host_start, cbufp);
|
||||
break;
|
||||
case GOMP_MAP_POINTER:
|
||||
gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
|
||||
gomp_map_pointer (tgt, aq,
|
||||
(uintptr_t) *(void **) k->host_start,
|
||||
k->tgt_offset, sizes[i], cbufp);
|
||||
break;
|
||||
case GOMP_MAP_TO_PSET:
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
@ -917,7 +948,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
tgt->list[j].always_copy_from = false;
|
||||
if (k->refcount != REFCOUNT_INFINITY)
|
||||
k->refcount++;
|
||||
gomp_map_pointer (tgt,
|
||||
gomp_map_pointer (tgt, aq,
|
||||
(uintptr_t) *(void **) hostaddrs[j],
|
||||
k->tgt_offset
|
||||
+ ((uintptr_t) hostaddrs[j]
|
||||
@ -946,7 +977,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
break;
|
||||
case GOMP_MAP_FORCE_DEVICEPTR:
|
||||
assert (k->host_end - k->host_start == sizeof (void *));
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start
|
||||
+ k->tgt_offset),
|
||||
(void *) k->host_start,
|
||||
@ -965,7 +996,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
|
||||
/* We intentionally do not use coalescing here, as it's not
|
||||
data allocated by the current call to this function. */
|
||||
gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
|
||||
gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
|
||||
&tgt_addr, sizeof (void *), NULL);
|
||||
}
|
||||
array++;
|
||||
@ -978,7 +1009,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
for (i = 0; i < mapnum; i++)
|
||||
{
|
||||
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start + i * sizeof (void *)),
|
||||
(void *) &cur_node.tgt_offset, sizeof (void *),
|
||||
cbufp);
|
||||
@ -989,7 +1020,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
{
|
||||
long c = 0;
|
||||
for (c = 0; c < cbuf.chunk_cnt; ++c)
|
||||
gomp_copy_host2dev (devicep,
|
||||
gomp_copy_host2dev (devicep, aq,
|
||||
(void *) (tgt->tgt_start + cbuf.chunks[c].start),
|
||||
(char *) cbuf.buf + (cbuf.chunks[c].start
|
||||
- cbuf.chunks[0].start),
|
||||
@ -1012,7 +1043,27 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
return tgt;
|
||||
}
|
||||
|
||||
static void
|
||||
attribute_hidden struct target_mem_desc *
|
||||
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
|
||||
bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
|
||||
{
|
||||
return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
|
||||
sizes, kinds, short_mapkind, pragma_kind);
|
||||
}
|
||||
|
||||
attribute_hidden struct target_mem_desc *
|
||||
gomp_map_vars_async (struct gomp_device_descr *devicep,
|
||||
struct goacc_asyncqueue *aq, size_t mapnum,
|
||||
void **hostaddrs, void **devaddrs, size_t *sizes,
|
||||
void *kinds, bool short_mapkind,
|
||||
enum gomp_map_vars_kind pragma_kind)
|
||||
{
|
||||
return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
|
||||
sizes, kinds, short_mapkind, pragma_kind);
|
||||
}
|
||||
|
||||
attribute_hidden void
|
||||
gomp_unmap_tgt (struct target_mem_desc *tgt)
|
||||
{
|
||||
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
|
||||
@ -1040,12 +1091,24 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
|
||||
return is_tgt_unmapped;
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_unref_tgt (void *ptr)
|
||||
{
|
||||
struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
|
||||
|
||||
if (tgt->refcount > 1)
|
||||
tgt->refcount--;
|
||||
else
|
||||
gomp_unmap_tgt (tgt);
|
||||
}
|
||||
|
||||
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
|
||||
variables back from device to host: if it is false, it is assumed that this
|
||||
has been done already. */
|
||||
|
||||
attribute_hidden void
|
||||
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
||||
static inline __attribute__((always_inline)) void
|
||||
gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
|
||||
struct goacc_asyncqueue *aq)
|
||||
{
|
||||
struct gomp_device_descr *devicep = tgt->device_descr;
|
||||
|
||||
@ -1082,7 +1145,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
||||
|
||||
if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|
||||
|| tgt->list[i].always_copy_from)
|
||||
gomp_copy_dev2host (devicep,
|
||||
gomp_copy_dev2host (devicep, aq,
|
||||
(void *) (k->host_start + tgt->list[i].offset),
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset
|
||||
+ tgt->list[i].offset),
|
||||
@ -1091,14 +1154,28 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
||||
gomp_remove_var (devicep, k);
|
||||
}
|
||||
|
||||
if (tgt->refcount > 1)
|
||||
tgt->refcount--;
|
||||
if (aq)
|
||||
devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
|
||||
(void *) tgt);
|
||||
else
|
||||
gomp_unmap_tgt (tgt);
|
||||
gomp_unref_tgt ((void *) tgt);
|
||||
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
}
|
||||
|
||||
attribute_hidden void
|
||||
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
|
||||
{
|
||||
gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
|
||||
}
|
||||
|
||||
attribute_hidden void
|
||||
gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
|
||||
struct goacc_asyncqueue *aq)
|
||||
{
|
||||
gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
|
||||
}
|
||||
|
||||
static void
|
||||
gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
|
||||
size_t *sizes, void *kinds, bool short_mapkind)
|
||||
@ -1148,9 +1225,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
|
||||
size_t size = cur_node.host_end - cur_node.host_start;
|
||||
|
||||
if (GOMP_MAP_COPY_TO_P (kind & typemask))
|
||||
gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
|
||||
gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
|
||||
NULL);
|
||||
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
|
||||
gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
|
||||
gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
|
||||
}
|
||||
}
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
@ -1443,9 +1521,24 @@ gomp_init_device (struct gomp_device_descr *devicep)
|
||||
false);
|
||||
}
|
||||
|
||||
/* Initialize OpenACC asynchronous queues. */
|
||||
goacc_init_asyncqueues (devicep);
|
||||
|
||||
devicep->state = GOMP_DEVICE_INITIALIZED;
|
||||
}
|
||||
|
||||
/* This function finalizes the target device, specified by DEVICEP. DEVICEP
|
||||
must be locked on entry, and remains locked on return. */
|
||||
|
||||
attribute_hidden bool
|
||||
gomp_fini_device (struct gomp_device_descr *devicep)
|
||||
{
|
||||
bool ret = goacc_fini_asyncqueues (devicep);
|
||||
ret &= devicep->fini_device_func (devicep->target_id);
|
||||
devicep->state = GOMP_DEVICE_FINALIZED;
|
||||
return ret;
|
||||
}
|
||||
|
||||
attribute_hidden void
|
||||
gomp_unload_device (struct gomp_device_descr *devicep)
|
||||
{
|
||||
@ -1954,7 +2047,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
|
||||
|
||||
if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|
||||
|| kind == GOMP_MAP_ALWAYS_FROM)
|
||||
gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
|
||||
gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
|
||||
(void *) (k->tgt->tgt_start + k->tgt_offset
|
||||
+ cur_node.host_start
|
||||
- k->host_start),
|
||||
@ -2636,20 +2729,20 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|
||||
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
|
||||
{
|
||||
if (!DLSYM_OPT (openacc.exec, openacc_exec)
|
||||
|| !DLSYM_OPT (openacc.register_async_cleanup,
|
||||
openacc_register_async_cleanup)
|
||||
|| !DLSYM_OPT (openacc.async_test, openacc_async_test)
|
||||
|| !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
|
||||
|| !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
|
||||
|| !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
|
||||
|| !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
|
||||
|| !DLSYM_OPT (openacc.async_wait_all_async,
|
||||
openacc_async_wait_all_async)
|
||||
|| !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
|
||||
|| !DLSYM_OPT (openacc.create_thread_data,
|
||||
openacc_create_thread_data)
|
||||
|| !DLSYM_OPT (openacc.destroy_thread_data,
|
||||
openacc_destroy_thread_data))
|
||||
openacc_destroy_thread_data)
|
||||
|| !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
|
||||
|| !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
|
||||
|| !DLSYM_OPT (openacc.async.test, openacc_async_test)
|
||||
|| !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
|
||||
|| !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
|
||||
|| !DLSYM_OPT (openacc.async.queue_callback,
|
||||
openacc_async_queue_callback)
|
||||
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|
||||
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
|
||||
|| !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
|
||||
{
|
||||
/* Require all the OpenACC handlers if we have
|
||||
GOMP_OFFLOAD_CAP_OPENACC_200. */
|
||||
@ -2700,10 +2793,7 @@ gomp_target_fini (void)
|
||||
struct gomp_device_descr *devicep = &devices[i];
|
||||
gomp_mutex_lock (&devicep->lock);
|
||||
if (devicep->state == GOMP_DEVICE_INITIALIZED)
|
||||
{
|
||||
ret = devicep->fini_device_func (devicep->target_id);
|
||||
devicep->state = GOMP_DEVICE_FINALIZED;
|
||||
}
|
||||
ret = gomp_fini_device (devicep);
|
||||
gomp_mutex_unlock (&devicep->lock);
|
||||
if (!ret)
|
||||
gomp_fatal ("device finalization failed");
|
||||
|
Loading…
Reference in New Issue
Block a user