2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>

Reviewed-by: Thomas Schwinge <thomas@codesourcery.com>

	libgomp/
	* libgomp-plugin.h (struct goacc_asyncqueue): Declare.
	(struct goacc_asyncqueue_list): Likewise.
	(goacc_aq): Likewise.
	(goacc_aq_list): Likewise.
	(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
	(GOMP_OFFLOAD_openacc_async_test): Remove.
	(GOMP_OFFLOAD_openacc_async_test_all): Remove.
	(GOMP_OFFLOAD_openacc_async_wait): Remove.
	(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
	(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
	(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
	(GOMP_OFFLOAD_openacc_async_set_async): Remove.
	(GOMP_OFFLOAD_openacc_exec): Adjust declaration.
	(GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise.
	(GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise.
	(GOMP_OFFLOAD_openacc_async_exec): Declare.
	(GOMP_OFFLOAD_openacc_async_construct): Declare.
	(GOMP_OFFLOAD_openacc_async_destruct): Declare.
	(GOMP_OFFLOAD_openacc_async_test): Declare.
	(GOMP_OFFLOAD_openacc_async_synchronize): Declare.
	(GOMP_OFFLOAD_openacc_async_serialize): Declare.
	(GOMP_OFFLOAD_openacc_async_queue_callback): Declare.
	(GOMP_OFFLOAD_openacc_async_host2dev): Declare.
	(GOMP_OFFLOAD_openacc_async_dev2host): Declare.

	* libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct.
	(gomp_acc_insert_pointer): Adjust declaration.
	(gomp_copy_host2dev): New declaration.
	(gomp_copy_dev2host): Likewise.
	(gomp_map_vars_async): Likewise.
	(gomp_unmap_tgt): Likewise.
	(gomp_unmap_vars_async): Likewise.
	(gomp_fini_device): Likewise.

	* oacc-async.c (get_goacc_thread): New function.
	(get_goacc_thread_device): New function.
	(lookup_goacc_asyncqueue): New function.
	(get_goacc_asyncqueue): New function.
	(acc_async_test): Adjust code to use new async design.
	(acc_async_test_all): Likewise.
	(acc_wait): Likewise.
	(acc_wait_async): Likewise.
	(acc_wait_all): Likewise.
	(acc_wait_all_async): Likewise.
	(goacc_async_free): New function.
	(goacc_init_asyncqueues): Likewise.
	(goacc_fini_asyncqueues): Likewise.
	* oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async
	design.
	(acc_set_cuda_stream): Likewise.
	* oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'.
	(host_openacc_register_async_cleanup): Remove.
	(host_openacc_async_exec): New function.
	(host_openacc_async_test): Adjust parameters.
	(host_openacc_async_test_all): Remove.
	(host_openacc_async_wait): Remove.
	(host_openacc_async_wait_async): Remove.
	(host_openacc_async_wait_all): Remove.
	(host_openacc_async_wait_all_async): Remove.
	(host_openacc_async_set_async): Remove.
	(host_openacc_async_synchronize): New function.
	(host_openacc_async_serialize): New function.
	(host_openacc_async_host2dev): New function.
	(host_openacc_async_dev2host): New function.
	(host_openacc_async_queue_callback): New function.
	(host_openacc_async_construct): New function.
	(host_openacc_async_destruct): New function.
	(struct gomp_device_descr host_dispatch): Remove initialization of old
	interface, add intialization of new async sub-struct.
	* oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device.
	(goacc_attach_host_thread_to_device): Remove old async code usage.
	* oacc-int.h (goacc_init_asyncqueues): New declaration.
	(goacc_fini_asyncqueues): Likewise.
	(goacc_async_copyout_unmap_vars): Likewise.
	(goacc_async_free): Likewise.
	(get_goacc_asyncqueue): Likewise.
	(lookup_goacc_asyncqueue): Likewise.

	* oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async
	design.
	(present_create_copy): Adjust code to use new async design.
	(delete_copyout): Likewise.
	(update_dev_host): Likewise.
	(gomp_acc_insert_pointer): Add async parameter, adjust code to use new
	async design.
	(gomp_acc_remove_pointer): Adjust code to use new async design.
	* oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async
	design.
	(GOACC_enter_exit_data): Likewise.
	(goacc_wait): Likewise.
	(GOACC_update): Likewise.
	* oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail
	when called, warn as obsolete in comment.

	* target.c (goacc_device_copy_async): New function.
	(gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
	add goacc_device_copy_async case.
	(gomp_copy_dev2host): Likewise.
	(gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
	(gomp_map_pointer): Likewise.
	(gomp_map_fields_existing): Likewise.
	(gomp_map_vars_internal): New always_inline function, renamed from
	gomp_map_vars.
	(gomp_map_vars): Implement by calling gomp_map_vars_internal.
	(gomp_map_vars_async): Implement by calling gomp_map_vars_internal,
	passing goacc_asyncqueue argument.
	(gomp_unmap_tgt): Remove static, add attribute_hidden.
	(gomp_unref_tgt): New function.
	(gomp_unmap_vars_internal): New always_inline function, renamed from
	gomp_unmap_vars.
	(gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal.
	(gomp_unmap_vars_async): Implement by calling
	gomp_unmap_vars_internal, passing goacc_asyncqueue argument.
	(gomp_fini_device): New function.
	(gomp_exit_data): Adjust gomp_copy_dev2host call.
	(gomp_load_plugin_for_device): Remove old interface, adjust to load
	new async interface.
	(gomp_target_fini): Adjust code to call gomp_fini_device.

	* plugin/plugin-nvptx.c (struct cuda_map): Remove.
	(struct ptx_stream): Remove.
	(struct nvptx_thread): Remove current_stream field.
	(cuda_map_create): Remove.
	(cuda_map_destroy): Remove.
	(map_init): Remove.
	(map_fini): Remove.
	(map_pop): Remove.
	(map_push): Remove.
	(struct goacc_asyncqueue): Define.
	(struct nvptx_callback): Define.
	(struct ptx_free_block): Define.
	(struct ptx_device): Remove null_stream, active_streams, async_streams,
	stream_lock, and next fields.
	(enum ptx_event_type): Remove.
	(struct ptx_event): Remove.
	(ptx_event_lock): Remove.
	(ptx_events): Remove.
	(init_streams_for_device): Remove.
	(fini_streams_for_device): Remove.
	(select_stream_for_async): Remove.
	(nvptx_init): Remove ptx_events and ptx_event_lock references.
	(nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED
	case.
	(nvptx_open_device): Add free_blocks initialization, remove
	init_streams_for_device call.
	(nvptx_close_device): Remove fini_streams_for_device call, add
	free_blocks destruct code.
	(event_gc): Remove.
	(event_add): Remove.
	(nvptx_exec): Adjust parameters and code.
	(nvptx_free): Likewise.
	(nvptx_host2dev): Remove.
	(nvptx_dev2host): Remove.
	(nvptx_set_async): Remove.
	(nvptx_async_test): Remove.
	(nvptx_async_test_all): Remove.
	(nvptx_wait): Remove.
	(nvptx_wait_async): Remove.
	(nvptx_wait_all): Remove.
	(nvptx_wait_all_async): Remove.
	(nvptx_get_cuda_stream): Remove.
	(nvptx_set_cuda_stream): Remove.
	(GOMP_OFFLOAD_alloc): Adjust code.
	(GOMP_OFFLOAD_free): Likewise.
	(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
	(GOMP_OFFLOAD_openacc_exec): Adjust parameters and code.
	(GOMP_OFFLOAD_openacc_async_test_all): Remove.
	(GOMP_OFFLOAD_openacc_async_wait): Remove.
	(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
	(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
	(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
	(GOMP_OFFLOAD_openacc_async_set_async): Remove.
	(cuda_free_argmem): New function.
	(GOMP_OFFLOAD_openacc_async_exec): New plugin hook function.
	(GOMP_OFFLOAD_openacc_create_thread_data): Adjust code.
	(GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code.
	(GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code.
	(GOMP_OFFLOAD_openacc_async_construct): New plugin hook function.
	(GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function.
	(GOMP_OFFLOAD_openacc_async_test): Remove and re-implement.
	(GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function.
	(GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function.
	(GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function.
	(cuda_callback_wrapper): New function.
	(cuda_memcpy_sanity_check): New function.
	(GOMP_OFFLOAD_host2dev): Remove and re-implement.
	(GOMP_OFFLOAD_dev2host): Remove and re-implement.
	(GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function.
	(GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function.

From-SVN: r271128
This commit is contained in:
Chung-Lin Tang 2019-05-13 13:32:00 +00:00 committed by Chung-Lin Tang
parent f78f5d2392
commit 1f4c5b9bb2
15 changed files with 1117 additions and 1280 deletions

View File

@ -1,3 +1,193 @@
2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>
* libgomp-plugin.h (struct goacc_asyncqueue): Declare.
(struct goacc_asyncqueue_list): Likewise.
(goacc_aq): Likewise.
(goacc_aq_list): Likewise.
(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
(GOMP_OFFLOAD_openacc_async_test): Remove.
(GOMP_OFFLOAD_openacc_async_test_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait): Remove.
(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
(GOMP_OFFLOAD_openacc_async_set_async): Remove.
(GOMP_OFFLOAD_openacc_exec): Adjust declaration.
(GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise.
(GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise.
(GOMP_OFFLOAD_openacc_async_exec): Declare.
(GOMP_OFFLOAD_openacc_async_construct): Declare.
(GOMP_OFFLOAD_openacc_async_destruct): Declare.
(GOMP_OFFLOAD_openacc_async_test): Declare.
(GOMP_OFFLOAD_openacc_async_synchronize): Declare.
(GOMP_OFFLOAD_openacc_async_serialize): Declare.
(GOMP_OFFLOAD_openacc_async_queue_callback): Declare.
(GOMP_OFFLOAD_openacc_async_host2dev): Declare.
(GOMP_OFFLOAD_openacc_async_dev2host): Declare.
* libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct.
(gomp_acc_insert_pointer): Adjust declaration.
(gomp_copy_host2dev): New declaration.
(gomp_copy_dev2host): Likewise.
(gomp_map_vars_async): Likewise.
(gomp_unmap_tgt): Likewise.
(gomp_unmap_vars_async): Likewise.
(gomp_fini_device): Likewise.
* oacc-async.c (get_goacc_thread): New function.
(get_goacc_thread_device): New function.
(lookup_goacc_asyncqueue): New function.
(get_goacc_asyncqueue): New function.
(acc_async_test): Adjust code to use new async design.
(acc_async_test_all): Likewise.
(acc_wait): Likewise.
(acc_wait_async): Likewise.
(acc_wait_all): Likewise.
(acc_wait_all_async): Likewise.
(goacc_async_free): New function.
(goacc_init_asyncqueues): Likewise.
(goacc_fini_asyncqueues): Likewise.
* oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async
design.
(acc_set_cuda_stream): Likewise.
* oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'.
(host_openacc_register_async_cleanup): Remove.
(host_openacc_async_exec): New function.
(host_openacc_async_test): Adjust parameters.
(host_openacc_async_test_all): Remove.
(host_openacc_async_wait): Remove.
(host_openacc_async_wait_async): Remove.
(host_openacc_async_wait_all): Remove.
(host_openacc_async_wait_all_async): Remove.
(host_openacc_async_set_async): Remove.
(host_openacc_async_synchronize): New function.
(host_openacc_async_serialize): New function.
(host_openacc_async_host2dev): New function.
(host_openacc_async_dev2host): New function.
(host_openacc_async_queue_callback): New function.
(host_openacc_async_construct): New function.
(host_openacc_async_destruct): New function.
(struct gomp_device_descr host_dispatch): Remove initialization of old
interface, add intialization of new async sub-struct.
* oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device.
(goacc_attach_host_thread_to_device): Remove old async code usage.
* oacc-int.h (goacc_init_asyncqueues): New declaration.
(goacc_fini_asyncqueues): Likewise.
(goacc_async_copyout_unmap_vars): Likewise.
(goacc_async_free): Likewise.
(get_goacc_asyncqueue): Likewise.
(lookup_goacc_asyncqueue): Likewise.
* oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async
design.
(present_create_copy): Adjust code to use new async design.
(delete_copyout): Likewise.
(update_dev_host): Likewise.
(gomp_acc_insert_pointer): Add async parameter, adjust code to use new
async design.
(gomp_acc_remove_pointer): Adjust code to use new async design.
* oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async
design.
(GOACC_enter_exit_data): Likewise.
(goacc_wait): Likewise.
(GOACC_update): Likewise.
* oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail
when called, warn as obsolete in comment.
* target.c (goacc_device_copy_async): New function.
(gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
add goacc_device_copy_async case.
(gomp_copy_dev2host): Likewise.
(gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
(gomp_map_pointer): Likewise.
(gomp_map_fields_existing): Likewise.
(gomp_map_vars_internal): New always_inline function, renamed from
gomp_map_vars.
(gomp_map_vars): Implement by calling gomp_map_vars_internal.
(gomp_map_vars_async): Implement by calling gomp_map_vars_internal,
passing goacc_asyncqueue argument.
(gomp_unmap_tgt): Remove static, add attribute_hidden.
(gomp_unref_tgt): New function.
(gomp_unmap_vars_internal): New always_inline function, renamed from
gomp_unmap_vars.
(gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal.
(gomp_unmap_vars_async): Implement by calling
gomp_unmap_vars_internal, passing goacc_asyncqueue argument.
(gomp_fini_device): New function.
(gomp_exit_data): Adjust gomp_copy_dev2host call.
(gomp_load_plugin_for_device): Remove old interface, adjust to load
new async interface.
(gomp_target_fini): Adjust code to call gomp_fini_device.
* plugin/plugin-nvptx.c (struct cuda_map): Remove.
(struct ptx_stream): Remove.
(struct nvptx_thread): Remove current_stream field.
(cuda_map_create): Remove.
(cuda_map_destroy): Remove.
(map_init): Remove.
(map_fini): Remove.
(map_pop): Remove.
(map_push): Remove.
(struct goacc_asyncqueue): Define.
(struct nvptx_callback): Define.
(struct ptx_free_block): Define.
(struct ptx_device): Remove null_stream, active_streams, async_streams,
stream_lock, and next fields.
(enum ptx_event_type): Remove.
(struct ptx_event): Remove.
(ptx_event_lock): Remove.
(ptx_events): Remove.
(init_streams_for_device): Remove.
(fini_streams_for_device): Remove.
(select_stream_for_async): Remove.
(nvptx_init): Remove ptx_events and ptx_event_lock references.
(nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED
case.
(nvptx_open_device): Add free_blocks initialization, remove
init_streams_for_device call.
(nvptx_close_device): Remove fini_streams_for_device call, add
free_blocks destruct code.
(event_gc): Remove.
(event_add): Remove.
(nvptx_exec): Adjust parameters and code.
(nvptx_free): Likewise.
(nvptx_host2dev): Remove.
(nvptx_dev2host): Remove.
(nvptx_set_async): Remove.
(nvptx_async_test): Remove.
(nvptx_async_test_all): Remove.
(nvptx_wait): Remove.
(nvptx_wait_async): Remove.
(nvptx_wait_all): Remove.
(nvptx_wait_all_async): Remove.
(nvptx_get_cuda_stream): Remove.
(nvptx_set_cuda_stream): Remove.
(GOMP_OFFLOAD_alloc): Adjust code.
(GOMP_OFFLOAD_free): Likewise.
(GOMP_OFFLOAD_openacc_register_async_cleanup): Remove.
(GOMP_OFFLOAD_openacc_exec): Adjust parameters and code.
(GOMP_OFFLOAD_openacc_async_test_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait): Remove.
(GOMP_OFFLOAD_openacc_async_wait_async): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all): Remove.
(GOMP_OFFLOAD_openacc_async_wait_all_async): Remove.
(GOMP_OFFLOAD_openacc_async_set_async): Remove.
(cuda_free_argmem): New function.
(GOMP_OFFLOAD_openacc_async_exec): New plugin hook function.
(GOMP_OFFLOAD_openacc_create_thread_data): Adjust code.
(GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code.
(GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code.
(GOMP_OFFLOAD_openacc_async_construct): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_test): Remove and re-implement.
(GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function.
(cuda_callback_wrapper): New function.
(cuda_memcpy_sanity_check): New function.
(GOMP_OFFLOAD_host2dev): Remove and re-implement.
(GOMP_OFFLOAD_dev2host): Remove and re-implement.
(GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function.
(GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function.
2019-05-07 Thomas Schwinge <thomas@codesourcery.com>
PR target/87835

View File

@ -53,6 +53,20 @@ enum offload_target_type
OFFLOAD_TARGET_TYPE_HSA = 7
};
/* Opaque type to represent plugin-dependent implementation of an
OpenACC asynchronous queue. */
struct goacc_asyncqueue;
/* Used to keep a list of active asynchronous queues. */
struct goacc_asyncqueue_list
{
struct goacc_asyncqueue *aq;
struct goacc_asyncqueue_list *next;
};
typedef struct goacc_asyncqueue *goacc_aq;
typedef struct goacc_asyncqueue_list *goacc_aq_list;
/* Auxiliary struct, used for transferring pairs of addresses from plugin
to libgomp. */
struct addr_pair
@ -93,22 +107,31 @@ extern bool GOMP_OFFLOAD_dev2dev (int, void *, const void *, size_t);
extern bool GOMP_OFFLOAD_can_run (void *);
extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
void **, int, unsigned *, void *);
extern void GOMP_OFFLOAD_openacc_register_async_cleanup (void *, int);
extern int GOMP_OFFLOAD_openacc_async_test (int);
extern int GOMP_OFFLOAD_openacc_async_test_all (void);
extern void GOMP_OFFLOAD_openacc_async_wait (int);
extern void GOMP_OFFLOAD_openacc_async_wait_async (int, int);
extern void GOMP_OFFLOAD_openacc_async_wait_all (void);
extern void GOMP_OFFLOAD_openacc_async_wait_all_async (int);
extern void GOMP_OFFLOAD_openacc_async_set_async (int);
void **, unsigned *, void *);
extern void *GOMP_OFFLOAD_openacc_create_thread_data (int);
extern void GOMP_OFFLOAD_openacc_destroy_thread_data (void *);
extern struct goacc_asyncqueue *GOMP_OFFLOAD_openacc_async_construct (void);
extern bool GOMP_OFFLOAD_openacc_async_destruct (struct goacc_asyncqueue *);
extern int GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *);
extern bool GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *);
extern bool GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *,
struct goacc_asyncqueue *);
extern void GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *,
void (*)(void *), void *);
extern void GOMP_OFFLOAD_openacc_async_exec (void (*) (void *), size_t, void **,
void **, unsigned *, void *,
struct goacc_asyncqueue *);
extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t,
struct goacc_asyncqueue *);
extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
struct goacc_asyncqueue *);
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (int);
extern int GOMP_OFFLOAD_openacc_cuda_set_stream (int, void *);
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
extern int GOMP_OFFLOAD_openacc_cuda_set_stream (struct goacc_asyncqueue *,
void *);
#ifdef __cplusplus
}

View File

@ -949,24 +949,31 @@ typedef struct acc_dispatch_t
/* Execute. */
__typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
/* Async cleanup callback registration. */
__typeof (GOMP_OFFLOAD_openacc_register_async_cleanup)
*register_async_cleanup_func;
/* Asynchronous routines. */
__typeof (GOMP_OFFLOAD_openacc_async_test) *async_test_func;
__typeof (GOMP_OFFLOAD_openacc_async_test_all) *async_test_all_func;
__typeof (GOMP_OFFLOAD_openacc_async_wait) *async_wait_func;
__typeof (GOMP_OFFLOAD_openacc_async_wait_async) *async_wait_async_func;
__typeof (GOMP_OFFLOAD_openacc_async_wait_all) *async_wait_all_func;
__typeof (GOMP_OFFLOAD_openacc_async_wait_all_async)
*async_wait_all_async_func;
__typeof (GOMP_OFFLOAD_openacc_async_set_async) *async_set_async_func;
/* Create/destroy TLS data. */
__typeof (GOMP_OFFLOAD_openacc_create_thread_data) *create_thread_data_func;
__typeof (GOMP_OFFLOAD_openacc_destroy_thread_data)
*destroy_thread_data_func;
struct {
/* Once created and put into the "active" list, asyncqueues are then never
destructed and removed from the "active" list, other than if the TODO
device is shut down. */
gomp_mutex_t lock;
int nasyncqueue;
struct goacc_asyncqueue **asyncqueue;
struct goacc_asyncqueue_list *active;
__typeof (GOMP_OFFLOAD_openacc_async_construct) *construct_func;
__typeof (GOMP_OFFLOAD_openacc_async_destruct) *destruct_func;
__typeof (GOMP_OFFLOAD_openacc_async_test) *test_func;
__typeof (GOMP_OFFLOAD_openacc_async_synchronize) *synchronize_func;
__typeof (GOMP_OFFLOAD_openacc_async_serialize) *serialize_func;
__typeof (GOMP_OFFLOAD_openacc_async_queue_callback) *queue_callback_func;
__typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
__typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
__typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
} async;
/* NVIDIA target specific routines. */
struct {
@ -1053,17 +1060,33 @@ enum gomp_map_vars_kind
GOMP_MAP_VARS_ENTER_DATA
};
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
unsigned short *);
struct gomp_coalesce_buf;
extern void gomp_copy_host2dev (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *, const void *,
size_t, struct gomp_coalesce_buf *);
extern void gomp_copy_dev2host (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *, const void *,
size_t);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
size_t *, void *, bool,
enum gomp_map_vars_kind);
extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
struct goacc_asyncqueue *,
size_t, void **, void **,
size_t *, void *, bool,
enum gomp_map_vars_kind);
extern void gomp_unmap_tgt (struct target_mem_desc *);
extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
struct goacc_asyncqueue *);
extern void gomp_init_device (struct gomp_device_descr *);
extern bool gomp_fini_device (struct gomp_device_descr *);
extern void gomp_free_memmap (struct splay_tree_s *);
extern void gomp_unload_device (struct gomp_device_descr *);
extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);

View File

@ -27,47 +27,160 @@
<http://www.gnu.org/licenses/>. */
#include <assert.h>
#include <string.h>
#include "openacc.h"
#include "libgomp.h"
#include "oacc-int.h"
int
acc_async_test (int async)
static struct goacc_thread *
get_goacc_thread (void)
{
if (!async_valid_p (async))
gomp_fatal ("invalid async argument: %d", async);
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
return thr->dev->openacc.async_test_func (async);
return thr;
}
static struct gomp_device_descr *
get_goacc_thread_device (void)
{
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
return thr->dev;
}
static int
validate_async_val (int async)
{
if (!async_valid_p (async))
gomp_fatal ("invalid async-argument: %d", async);
if (async == acc_async_sync)
return -1;
if (async == acc_async_noval)
return 0;
if (async >= 0)
/* TODO: we reserve 0 for acc_async_noval before we can clarify the
semantics of "default_async". */
return 1 + async;
else
__builtin_unreachable ();
}
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
create the asyncqueue if it doesn't exist yet. */
attribute_hidden struct goacc_asyncqueue *
lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
{
async = validate_async_val (async);
if (async < 0)
return NULL;
struct goacc_asyncqueue *ret_aq = NULL;
struct gomp_device_descr *dev = thr->dev;
gomp_mutex_lock (&dev->openacc.async.lock);
if (!create
&& (async >= dev->openacc.async.nasyncqueue
|| !dev->openacc.async.asyncqueue[async]))
goto end;
if (async >= dev->openacc.async.nasyncqueue)
{
int diff = async + 1 - dev->openacc.async.nasyncqueue;
dev->openacc.async.asyncqueue
= gomp_realloc (dev->openacc.async.asyncqueue,
sizeof (goacc_aq) * (async + 1));
memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
0, sizeof (goacc_aq) * diff);
dev->openacc.async.nasyncqueue = async + 1;
}
if (!dev->openacc.async.asyncqueue[async])
{
dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
if (!dev->openacc.async.asyncqueue[async])
{
gomp_mutex_unlock (&dev->openacc.async.lock);
gomp_fatal ("async %d creation failed", async);
}
/* Link new async queue into active list. */
goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
n->aq = dev->openacc.async.asyncqueue[async];
n->next = dev->openacc.async.active;
dev->openacc.async.active = n;
}
ret_aq = dev->openacc.async.asyncqueue[async];
end:
gomp_mutex_unlock (&dev->openacc.async.lock);
return ret_aq;
}
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
might return NULL if no asyncqueue is to be used. Otherwise, create the
asyncqueue if it doesn't exist yet. */
attribute_hidden struct goacc_asyncqueue *
get_goacc_asyncqueue (int async)
{
struct goacc_thread *thr = get_goacc_thread ();
return lookup_goacc_asyncqueue (thr, true, async);
}
int
acc_async_test (int async)
{
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
if (!aq)
return 1;
else
return thr->dev->openacc.async.test_func (aq);
}
int
acc_async_test_all (void)
{
struct goacc_thread *thr = goacc_thread ();
struct goacc_thread *thr = get_goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
return thr->dev->openacc.async_test_all_func ();
int ret = 1;
gomp_mutex_lock (&thr->dev->openacc.async.lock);
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
if (!thr->dev->openacc.async.test_func (l->aq))
{
ret = 0;
break;
}
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
return ret;
}
void
acc_wait (int async)
{
if (!async_valid_p (async))
gomp_fatal ("invalid async argument: %d", async);
struct goacc_thread *thr = get_goacc_thread ();
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
thr->dev->openacc.async_wait_func (async);
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
if (aq && !thr->dev->openacc.async.synchronize_func (aq))
gomp_fatal ("wait on %d failed", async);
}
/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
@ -84,23 +197,46 @@ acc_async_wait (int async)
void
acc_wait_async (int async1, int async2)
{
struct goacc_thread *thr = goacc_thread ();
struct goacc_thread *thr = get_goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
/* TODO: Is this also correct for acc_async_sync, assuming that in this case,
we'll always be synchronous anyways? */
if (!aq1)
return;
thr->dev->openacc.async_wait_async_func (async1, async2);
goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
/* An async queue is always synchronized with itself. */
if (aq1 == aq2)
return;
if (aq2)
{
if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
}
else
{
/* TODO: Local thread synchronization.
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
if (!thr->dev->openacc.async.synchronize_func (aq1))
gomp_fatal ("wait on %d failed", async1);
}
}
void
acc_wait_all (void)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *dev = get_goacc_thread_device ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
bool ret = true;
gomp_mutex_lock (&dev->openacc.async.lock);
for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next)
ret &= dev->openacc.async.synchronize_func (l->aq);
gomp_mutex_unlock (&dev->openacc.async.lock);
thr->dev->openacc.async_wait_all_func ();
if (!ret)
gomp_fatal ("wait all failed");
}
/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
@ -117,13 +253,73 @@ acc_async_wait_all (void)
void
acc_wait_all_async (int async)
{
if (!async_valid_p (async))
gomp_fatal ("invalid async argument: %d", async);
struct goacc_thread *thr = get_goacc_thread ();
struct goacc_thread *thr = goacc_thread ();
goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
if (!thr || !thr->dev)
gomp_fatal ("no device active");
bool ret = true;
gomp_mutex_lock (&thr->dev->openacc.async.lock);
for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
{
if (waiting_queue)
ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
else
/* TODO: Local thread synchronization.
Necessary for the "async2 == acc_async_sync" case, or can just skip? */
ret &= thr->dev->openacc.async.synchronize_func (l->aq);
}
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
thr->dev->openacc.async_wait_all_async_func (async);
if (!ret)
gomp_fatal ("wait all async(%d) failed", async);
}
attribute_hidden void
goacc_async_free (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, void *ptr)
{
if (!aq)
free (ptr);
else
devicep->openacc.async.queue_callback_func (aq, free, ptr);
}
/* This function initializes the asyncqueues for the device specified by
DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
return. */
attribute_hidden void
goacc_init_asyncqueues (struct gomp_device_descr *devicep)
{
devicep->openacc.async.nasyncqueue = 0;
devicep->openacc.async.asyncqueue = NULL;
devicep->openacc.async.active = NULL;
gomp_mutex_init (&devicep->openacc.async.lock);
}
/* This function finalizes the asyncqueues for the device specified by DEVICEP.
TODO DEVICEP must be locked on entry, and remains locked on return. */
attribute_hidden bool
goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
{
bool ret = true;
gomp_mutex_lock (&devicep->openacc.async.lock);
if (devicep->openacc.async.nasyncqueue > 0)
{
goacc_aq_list next;
for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
{
ret &= devicep->openacc.async.destruct_func (l->aq);
next = l->next;
free (l);
}
free (devicep->openacc.async.asyncqueue);
devicep->openacc.async.nasyncqueue = 0;
devicep->openacc.async.asyncqueue = NULL;
devicep->openacc.async.active = NULL;
}
gomp_mutex_unlock (&devicep->openacc.async.lock);
gomp_mutex_destroy (&devicep->openacc.async.lock);
return ret;
}

View File

@ -30,6 +30,7 @@
#include "config.h"
#include "libgomp.h"
#include "oacc-int.h"
#include <assert.h>
void *
acc_get_current_cuda_device (void)
@ -62,7 +63,11 @@ acc_get_cuda_stream (int async)
return NULL;
if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
return thr->dev->openacc.cuda.get_stream_func (async);
{
goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
if (aq)
return thr->dev->openacc.cuda.get_stream_func (aq);
}
return NULL;
}
@ -79,8 +84,23 @@ acc_set_cuda_stream (int async, void *stream)
thr = goacc_thread ();
int ret = -1;
if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func)
return thr->dev->openacc.cuda.set_stream_func (async, stream);
{
goacc_aq aq = get_goacc_asyncqueue (async);
/* Due to not using an asyncqueue for "acc_async_sync", this cannot be
used to change the CUDA stream associated with "acc_async_sync". */
if (!aq)
{
assert (async == acc_async_sync);
gomp_debug (0, "Refusing request to set CUDA stream associated"
" with \"acc_async_sync\"\n");
return 0;
}
gomp_mutex_lock (&thr->dev->openacc.async.lock);
ret = thr->dev->openacc.cuda.set_stream_func (aq, stream);
gomp_mutex_unlock (&thr->dev->openacc.async.lock);
}
return -1;
return ret;
}

View File

@ -140,55 +140,89 @@ host_openacc_exec (void (*fn) (void *),
size_t mapnum __attribute__ ((unused)),
void **hostaddrs,
void **devaddrs __attribute__ ((unused)),
int async __attribute__ ((unused)),
unsigned *dims __attribute ((unused)),
unsigned *dims __attribute__ ((unused)),
void *targ_mem_desc __attribute__ ((unused)))
{
fn (hostaddrs);
}
static void
host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)),
int async __attribute__ ((unused)))
host_openacc_async_exec (void (*fn) (void *),
size_t mapnum __attribute__ ((unused)),
void **hostaddrs,
void **devaddrs __attribute__ ((unused)),
unsigned *dims __attribute__ ((unused)),
void *targ_mem_desc __attribute__ ((unused)),
struct goacc_asyncqueue *aq __attribute__ ((unused)))
{
fn (hostaddrs);
}
static int
host_openacc_async_test (int async __attribute__ ((unused)))
host_openacc_async_test (struct goacc_asyncqueue *aq __attribute__ ((unused)))
{
return 1;
}
static int
host_openacc_async_test_all (void)
static bool
host_openacc_async_synchronize (struct goacc_asyncqueue *aq
__attribute__ ((unused)))
{
return 1;
return true;
}
static bool
host_openacc_async_serialize (struct goacc_asyncqueue *aq1
__attribute__ ((unused)),
struct goacc_asyncqueue *aq2
__attribute__ ((unused)))
{
return true;
}
static bool
host_openacc_async_host2dev (int ord __attribute__ ((unused)),
void *dst __attribute__ ((unused)),
const void *src __attribute__ ((unused)),
size_t n __attribute__ ((unused)),
struct goacc_asyncqueue *aq
__attribute__ ((unused)))
{
return true;
}
static bool
host_openacc_async_dev2host (int ord __attribute__ ((unused)),
void *dst __attribute__ ((unused)),
const void *src __attribute__ ((unused)),
size_t n __attribute__ ((unused)),
struct goacc_asyncqueue *aq
__attribute__ ((unused)))
{
return true;
}
static void
host_openacc_async_wait (int async __attribute__ ((unused)))
host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
__attribute__ ((unused)),
void (*callback_fn)(void *)
__attribute__ ((unused)),
void *userptr __attribute__ ((unused)))
{
}
static void
host_openacc_async_wait_async (int async1 __attribute__ ((unused)),
int async2 __attribute__ ((unused)))
static struct goacc_asyncqueue *
host_openacc_async_construct (void)
{
/* Non-NULL 0xffff... value as opaque dummy. */
return (struct goacc_asyncqueue *) -1;
}
static void
host_openacc_async_wait_all (void)
{
}
static void
host_openacc_async_wait_all_async (int async __attribute__ ((unused)))
{
}
static void
host_openacc_async_set_async (int async __attribute__ ((unused)))
static bool
host_openacc_async_destruct (struct goacc_asyncqueue *aq
__attribute__ ((unused)))
{
return true;
}
static void *
@ -235,19 +269,21 @@ static struct gomp_device_descr host_dispatch =
.exec_func = host_openacc_exec,
.register_async_cleanup_func = host_openacc_register_async_cleanup,
.async_test_func = host_openacc_async_test,
.async_test_all_func = host_openacc_async_test_all,
.async_wait_func = host_openacc_async_wait,
.async_wait_async_func = host_openacc_async_wait_async,
.async_wait_all_func = host_openacc_async_wait_all,
.async_wait_all_async_func = host_openacc_async_wait_all_async,
.async_set_async_func = host_openacc_async_set_async,
.create_thread_data_func = host_openacc_create_thread_data,
.destroy_thread_data_func = host_openacc_destroy_thread_data,
.async = {
.construct_func = host_openacc_async_construct,
.destruct_func = host_openacc_async_destruct,
.test_func = host_openacc_async_test,
.synchronize_func = host_openacc_async_synchronize,
.serialize_func = host_openacc_async_serialize,
.queue_callback_func = host_openacc_async_queue_callback,
.exec_func = host_openacc_async_exec,
.dev2host_func = host_openacc_async_dev2host,
.host2dev_func = host_openacc_async_host2dev,
},
.cuda = {
.get_current_device_func = NULL,
.get_current_context_func = NULL,

View File

@ -309,7 +309,7 @@ acc_shutdown_1 (acc_device_t d)
if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
{
devices_active = true;
ret &= acc_dev->fini_device_func (acc_dev->target_id);
ret &= gomp_fini_device (acc_dev);
acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
}
gomp_mutex_unlock (&acc_dev->lock);
@ -426,8 +426,6 @@ goacc_attach_host_thread_to_device (int ord)
thr->target_tls
= acc_dev->openacc.create_thread_data_func (ord);
acc_dev->openacc.async_set_async_func (acc_async_sync);
}
/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of

View File

@ -99,6 +99,13 @@ void goacc_restore_bind (void);
void goacc_lazy_initialize (void);
void goacc_host_init (void);
void goacc_init_asyncqueues (struct gomp_device_descr *);
bool goacc_fini_asyncqueues (struct gomp_device_descr *);
void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *,
void *);
struct goacc_asyncqueue *get_goacc_asyncqueue (int);
struct goacc_asyncqueue *lookup_goacc_asyncqueue (struct goacc_thread *, bool,
int);
static inline bool
async_valid_stream_id_p (int async)
{

View File

@ -172,18 +172,11 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
return;
}
if (async > acc_async_sync)
thr->dev->openacc.async_set_async_func (async);
bool ret = (from
? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
: thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
if (async > acc_async_sync)
thr->dev->openacc.async_set_async_func (acc_async_sync);
if (!ret)
gomp_fatal ("error in %s", libfnname);
goacc_aq aq = get_goacc_asyncqueue (async);
if (from)
gomp_copy_dev2host (thr->dev, aq, h, d, s);
else
gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
}
void
@ -509,17 +502,13 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
gomp_mutex_unlock (&acc_dev->lock);
if (async > acc_async_sync)
acc_dev->openacc.async_set_async_func (async);
goacc_aq aq = get_goacc_asyncqueue (async);
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
GOMP_MAP_VARS_OPENACC);
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
&kinds, true, GOMP_MAP_VARS_OPENACC);
/* Initialize dynamic refcount. */
tgt->list[0].key->dynamic_refcount = 1;
if (async > acc_async_sync)
acc_dev->openacc.async_set_async_func (acc_async_sync);
gomp_mutex_lock (&acc_dev->lock);
d = tgt->to_free;
@ -676,13 +665,9 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
if (f & FLAG_COPYOUT)
{
if (async > acc_async_sync)
acc_dev->openacc.async_set_async_func (async);
acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
if (async > acc_async_sync)
acc_dev->openacc.async_set_async_func (acc_async_sync);
goacc_aq aq = get_goacc_asyncqueue (async);
gomp_copy_dev2host (acc_dev, aq, h, d, s);
}
gomp_remove_var (acc_dev, n);
}
@ -765,16 +750,12 @@ update_dev_host (int is_dev, void *h, size_t s, int async)
d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ (uintptr_t) h - n->host_start);
if (async > acc_async_sync)
acc_dev->openacc.async_set_async_func (async);
goacc_aq aq = get_goacc_asyncqueue (async);
if (is_dev)
acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
else
acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
if (async > acc_async_sync)
acc_dev->openacc.async_set_async_func (acc_async_sync);
gomp_copy_dev2host (acc_dev, aq, h, d, s);
gomp_mutex_unlock (&acc_dev->lock);
}
@ -805,7 +786,7 @@ acc_update_self_async (void *h, size_t s, int async)
void
gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
void *kinds)
void *kinds, int async)
{
struct target_mem_desc *tgt;
struct goacc_thread *thr = goacc_thread ();
@ -835,8 +816,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
}
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
goacc_aq aq = get_goacc_asyncqueue (async);
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
/* Initialize dynamic refcount. */
@ -930,7 +912,10 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
if (async < acc_async_noval)
gomp_unmap_vars (t, true);
else
t->device_descr->openacc.register_async_cleanup_func (t, async);
{
goacc_aq aq = get_goacc_asyncqueue (async);
gomp_unmap_vars_async (t, true, aq);
}
}
gomp_mutex_unlock (&acc_dev->lock);

View File

@ -217,8 +217,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
}
va_end (ap);
acc_dev->openacc.async_set_async_func (async);
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
{
k.host_start = (uintptr_t) fn;
@ -235,44 +233,29 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
else
tgt_fn = (void (*)) fn;
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
GOMP_MAP_VARS_OPENACC);
goacc_aq aq = get_goacc_asyncqueue (async);
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
true, GOMP_MAP_VARS_OPENACC);
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+ tgt->list[i].key->tgt_offset
+ tgt->list[i].offset);
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
async, dims, tgt);
/* If running synchronously, unmap immediately. */
bool copyfrom = true;
if (async_synchronous_p (async))
gomp_unmap_vars (tgt, true);
if (aq == NULL)
{
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
dims, tgt);
/* If running synchronously, unmap immediately. */
gomp_unmap_vars (tgt, true);
}
else
{
bool async_unmap = false;
for (size_t i = 0; i < tgt->list_count; i++)
{
splay_tree_key k = tgt->list[i].key;
if (k && k->refcount == 1)
{
async_unmap = true;
break;
}
}
if (async_unmap)
tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
else
{
copyfrom = false;
gomp_unmap_vars (tgt, copyfrom);
}
acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
dims, tgt, aq);
gomp_unmap_vars_async (tgt, true, aq);
}
acc_dev->openacc.async_set_async_func (acc_async_sync);
}
/* Legacy entry point (GCC 5). Only provide host fallback execution. */
@ -383,8 +366,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
finalize = true;
}
acc_dev->openacc.async_set_async_func (async);
/* Determine if this is an "acc enter data". */
for (i = 0; i < mapnum; ++i)
{
@ -437,11 +418,11 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
{
case GOMP_MAP_ALLOC:
case GOMP_MAP_FORCE_ALLOC:
acc_create (hostaddrs[i], sizes[i]);
acc_create_async (hostaddrs[i], sizes[i], async);
break;
case GOMP_MAP_TO:
case GOMP_MAP_FORCE_TO:
acc_copyin (hostaddrs[i], sizes[i]);
acc_copyin_async (hostaddrs[i], sizes[i], async);
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@ -452,7 +433,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
else
{
gomp_acc_insert_pointer (pointer, &hostaddrs[i],
&sizes[i], &kinds[i]);
&sizes[i], &kinds[i], async);
/* Increment 'i' by two because OpenACC requires fortran
arrays to be contiguous, so each PSET is associated with
one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
@ -477,17 +458,17 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
if (acc_is_present (hostaddrs[i], sizes[i]))
{
if (finalize)
acc_delete_finalize (hostaddrs[i], sizes[i]);
acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
else
acc_delete (hostaddrs[i], sizes[i]);
acc_delete_async (hostaddrs[i], sizes[i], async);
}
break;
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_FROM:
if (finalize)
acc_copyout_finalize (hostaddrs[i], sizes[i]);
acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
else
acc_copyout (hostaddrs[i], sizes[i]);
acc_copyout_async (hostaddrs[i], sizes[i], async);
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@ -505,8 +486,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
i += pointer - 1;
}
}
acc_dev->openacc.async_set_async_func (acc_async_sync);
}
static void
@ -532,9 +511,10 @@ goacc_wait (int async, int num_waits, va_list *ap)
if (async == acc_async_sync)
acc_wait (qid);
else if (qid == async)
;/* If we're waiting on the same asynchronous queue as we're
launching on, the queue itself will order work as
required, so there's no need to wait explicitly. */
/* If we're waiting on the same asynchronous queue as we're
launching on, the queue itself will order work as
required, so there's no need to wait explicitly. */
;
else
acc_wait_async (qid, async);
}
@ -567,8 +547,6 @@ GOACC_update (int flags_m, size_t mapnum,
va_end (ap);
}
acc_dev->openacc.async_set_async_func (async);
bool update_device = false;
for (i = 0; i < mapnum; ++i)
{
@ -591,6 +569,8 @@ GOACC_update (int flags_m, size_t mapnum,
the value of the allocated device memory in the
previous pointer. */
*(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
/* TODO: verify that we really cannot use acc_update_device_async
here. */
acc_update_device (hostaddrs[i], sizeof (uintptr_t));
/* Restore the host pointer. */
@ -608,7 +588,7 @@ GOACC_update (int flags_m, size_t mapnum,
/* Fallthru */
case GOMP_MAP_FORCE_TO:
update_device = true;
acc_update_device (hostaddrs[i], sizes[i]);
acc_update_device_async (hostaddrs[i], sizes[i], async);
break;
case GOMP_MAP_FROM:
@ -620,7 +600,7 @@ GOACC_update (int flags_m, size_t mapnum,
/* Fallthru */
case GOMP_MAP_FORCE_FROM:
update_device = false;
acc_update_self (hostaddrs[i], sizes[i]);
acc_update_self_async (hostaddrs[i], sizes[i], async);
break;
default:
@ -628,8 +608,6 @@ GOACC_update (int flags_m, size_t mapnum,
break;
}
}
acc_dev->openacc.async_set_async_func (acc_async_sync);
}
void

View File

@ -30,15 +30,12 @@
#include "oacc-plugin.h"
#include "oacc-int.h"
/* This plugin function is now obsolete. */
void
GOMP_PLUGIN_async_unmap_vars (void *ptr, int async)
GOMP_PLUGIN_async_unmap_vars (void *ptr __attribute__((unused)),
int async __attribute__((unused)))
{
struct target_mem_desc *tgt = ptr;
struct gomp_device_descr *devicep = tgt->device_descr;
devicep->openacc.async_set_async_func (async);
gomp_unmap_vars (tgt, true);
devicep->openacc.async_set_async_func (acc_async_sync);
gomp_fatal ("invalid plugin function");
}
/* Return the target-specific part of the TLS data for the current thread. */

View File

@ -42,6 +42,7 @@ CUDA_ONE_CALL (cuModuleLoad)
CUDA_ONE_CALL (cuModuleLoadData)
CUDA_ONE_CALL (cuModuleUnload)
CUDA_ONE_CALL_MAYBE_NULL (cuOccupancyMaxPotentialBlockSize)
CUDA_ONE_CALL (cuStreamAddCallback)
CUDA_ONE_CALL (cuStreamCreate)
CUDA_ONE_CALL (cuStreamDestroy)
CUDA_ONE_CALL (cuStreamQuery)

View File

@ -54,7 +54,11 @@ typedef enum {
CUDA_ERROR_INVALID_CONTEXT = 201,
CUDA_ERROR_NOT_FOUND = 500,
CUDA_ERROR_NOT_READY = 600,
CUDA_ERROR_LAUNCH_FAILED = 719
CUDA_ERROR_LAUNCH_FAILED = 719,
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
CUDA_ERROR_NOT_PERMITTED = 800,
CUDA_ERROR_NOT_SUPPORTED = 801,
CUDA_ERROR_UNKNOWN = 999
} CUresult;
typedef enum {
@ -173,6 +177,8 @@ CUresult cuModuleLoadData (CUmodule *, const void *);
CUresult cuModuleUnload (CUmodule);
CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction,
CUoccupancyB2DSize, size_t, int);
typedef void (*CUstreamCallback)(CUstream, CUresult, void *);
CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int);
CUresult cuStreamCreate (CUstream *, unsigned);
#define cuStreamDestroy cuStreamDestroy_v2
CUresult cuStreamDestroy (CUstream);

File diff suppressed because it is too large Load Diff

View File

@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
}
}
static inline void
goacc_device_copy_async (struct gomp_device_descr *devicep,
bool (*copy_func) (int, void *, const void *, size_t,
struct goacc_asyncqueue *),
const char *dst, void *dstaddr,
const char *src, const void *srcaddr,
size_t size, struct goacc_asyncqueue *aq)
{
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
}
}
/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
host to device memory transfers. */
@ -269,8 +285,9 @@ gomp_to_device_kind_p (int kind)
}
}
static void
attribute_hidden void
gomp_copy_host2dev (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
void *d, const void *h, size_t sz,
struct gomp_coalesce_buf *cbuf)
{
@ -299,14 +316,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
}
}
}
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
if (__builtin_expect (aq != NULL, 0))
goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
"dev", d, "host", h, sz, aq);
else
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
}
static void
attribute_hidden void
gomp_copy_dev2host (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
void *h, const void *d, size_t sz)
{
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
if (__builtin_expect (aq != NULL, 0))
goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
"host", h, "dev", d, sz, aq);
else
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
}
static void
@ -324,7 +350,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
Helper function of gomp_map_vars. */
static inline void
gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
gomp_map_vars_existing (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree_key oldn,
splay_tree_key newn, struct target_var_desc *tgt_var,
unsigned char kind, struct gomp_coalesce_buf *cbuf)
{
@ -346,7 +373,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
}
if (GOMP_MAP_ALWAYS_TO_P (kind))
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ newn->host_start - oldn->host_start),
(void *) newn->host_start,
@ -364,8 +391,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
}
static void
gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
uintptr_t target_offset, uintptr_t bias,
gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
struct gomp_coalesce_buf *cbuf)
{
struct gomp_device_descr *devicep = tgt->device_descr;
@ -376,7 +403,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + target_offset),
(void *) &cur_node.tgt_offset,
sizeof (void *), cbuf);
@ -398,12 +425,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
array section. Now subtract bias to get what we want
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
(void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
}
static void
gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
gomp_map_fields_existing (struct target_mem_desc *tgt,
struct goacc_asyncqueue *aq, splay_tree_key n,
size_t first, size_t i, void **hostaddrs,
size_t *sizes, void *kinds,
struct gomp_coalesce_buf *cbuf)
@ -423,7 +451,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, n2, &cur_node,
gomp_map_vars_existing (devicep, aq, n2, &cur_node,
&tgt->list[i], kind & typemask, cbuf);
return;
}
@ -439,8 +467,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
&& n2->host_start - n->host_start
== n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
kind & typemask, cbuf);
gomp_map_vars_existing (devicep, aq, n2, &cur_node,
&tgt->list[i], kind & typemask, cbuf);
return;
}
}
@ -451,7 +479,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
kind & typemask, cbuf);
return;
}
@ -483,10 +511,12 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
return tgt->tgt_start + tgt->list[i].offset;
}
attribute_hidden struct target_mem_desc *
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
static inline __attribute__((always_inline)) struct target_mem_desc *
gomp_map_vars_internal (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes,
void *kinds, bool short_mapkind,
enum gomp_map_vars_kind pragma_kind)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
bool has_firstprivate = false;
@ -600,7 +630,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
continue;
}
for (i = first; i <= last; i++)
gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
sizes, kinds, NULL);
i--;
continue;
@ -645,7 +675,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
else
n = splay_tree_lookup (mem_map, &cur_node);
if (n && n->refcount != REFCOUNT_LINK)
gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
kind & typemask, NULL);
else
{
@ -756,7 +786,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt->list[i].offset = tgt_size;
len = sizes[i];
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + tgt_size),
(void *) hostaddrs[i], len, cbufp);
tgt_size += len;
@ -790,7 +820,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
continue;
}
for (i = first; i <= last; i++)
gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
sizes, kinds, cbufp);
i--;
continue;
@ -810,7 +840,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
if (cur_node.tgt_offset)
cur_node.tgt_offset -= sizes[i];
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (n->tgt->tgt_start
+ n->tgt_offset
+ cur_node.host_start
@ -831,7 +861,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
k->host_end = k->host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, k);
if (n && n->refcount != REFCOUNT_LINK)
gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
kind & typemask, cbufp);
else
{
@ -884,18 +914,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
case GOMP_MAP_FORCE_TOFROM:
case GOMP_MAP_ALWAYS_TO:
case GOMP_MAP_ALWAYS_TOFROM:
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
k->host_end - k->host_start, cbufp);
break;
case GOMP_MAP_POINTER:
gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
gomp_map_pointer (tgt, aq,
(uintptr_t) *(void **) k->host_start,
k->tgt_offset, sizes[i], cbufp);
break;
case GOMP_MAP_TO_PSET:
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
@ -917,7 +948,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt->list[j].always_copy_from = false;
if (k->refcount != REFCOUNT_INFINITY)
k->refcount++;
gomp_map_pointer (tgt,
gomp_map_pointer (tgt, aq,
(uintptr_t) *(void **) hostaddrs[j],
k->tgt_offset
+ ((uintptr_t) hostaddrs[j]
@ -946,7 +977,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
case GOMP_MAP_FORCE_DEVICEPTR:
assert (k->host_end - k->host_start == sizeof (void *));
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
@ -965,7 +996,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
&tgt_addr, sizeof (void *), NULL);
}
array++;
@ -978,7 +1009,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
for (i = 0; i < mapnum; i++)
{
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + i * sizeof (void *)),
(void *) &cur_node.tgt_offset, sizeof (void *),
cbufp);
@ -989,7 +1020,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
{
long c = 0;
for (c = 0; c < cbuf.chunk_cnt; ++c)
gomp_copy_host2dev (devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + cbuf.chunks[c].start),
(char *) cbuf.buf + (cbuf.chunks[c].start
- cbuf.chunks[0].start),
@ -1012,7 +1043,27 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
return tgt;
}
static void
attribute_hidden struct target_mem_desc *
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
{
return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
sizes, kinds, short_mapkind, pragma_kind);
}
attribute_hidden struct target_mem_desc *
gomp_map_vars_async (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes,
void *kinds, bool short_mapkind,
enum gomp_map_vars_kind pragma_kind)
{
return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
sizes, kinds, short_mapkind, pragma_kind);
}
attribute_hidden void
gomp_unmap_tgt (struct target_mem_desc *tgt)
{
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
@ -1040,12 +1091,24 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
return is_tgt_unmapped;
}
static void
gomp_unref_tgt (void *ptr)
{
struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
if (tgt->refcount > 1)
tgt->refcount--;
else
gomp_unmap_tgt (tgt);
}
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
variables back from device to host: if it is false, it is assumed that this
has been done already. */
attribute_hidden void
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
static inline __attribute__((always_inline)) void
gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
struct goacc_asyncqueue *aq)
{
struct gomp_device_descr *devicep = tgt->device_descr;
@ -1082,7 +1145,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|| tgt->list[i].always_copy_from)
gomp_copy_dev2host (devicep,
gomp_copy_dev2host (devicep, aq,
(void *) (k->host_start + tgt->list[i].offset),
(void *) (k->tgt->tgt_start + k->tgt_offset
+ tgt->list[i].offset),
@ -1091,14 +1154,28 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
gomp_remove_var (devicep, k);
}
if (tgt->refcount > 1)
tgt->refcount--;
if (aq)
devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
(void *) tgt);
else
gomp_unmap_tgt (tgt);
gomp_unref_tgt ((void *) tgt);
gomp_mutex_unlock (&devicep->lock);
}
attribute_hidden void
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
{
gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
}
attribute_hidden void
gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
struct goacc_asyncqueue *aq)
{
gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
}
static void
gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
size_t *sizes, void *kinds, bool short_mapkind)
@ -1148,9 +1225,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
size_t size = cur_node.host_end - cur_node.host_start;
if (GOMP_MAP_COPY_TO_P (kind & typemask))
gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
NULL);
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
}
}
gomp_mutex_unlock (&devicep->lock);
@ -1443,9 +1521,24 @@ gomp_init_device (struct gomp_device_descr *devicep)
false);
}
/* Initialize OpenACC asynchronous queues. */
goacc_init_asyncqueues (devicep);
devicep->state = GOMP_DEVICE_INITIALIZED;
}
/* This function finalizes the target device, specified by DEVICEP. DEVICEP
must be locked on entry, and remains locked on return. */
attribute_hidden bool
gomp_fini_device (struct gomp_device_descr *devicep)
{
bool ret = goacc_fini_asyncqueues (devicep);
ret &= devicep->fini_device_func (devicep->target_id);
devicep->state = GOMP_DEVICE_FINALIZED;
return ret;
}
attribute_hidden void
gomp_unload_device (struct gomp_device_descr *devicep)
{
@ -1954,7 +2047,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|| kind == GOMP_MAP_ALWAYS_FROM)
gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
(void *) (k->tgt->tgt_start + k->tgt_offset
+ cur_node.host_start
- k->host_start),
@ -2636,20 +2729,20 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
{
if (!DLSYM_OPT (openacc.exec, openacc_exec)
|| !DLSYM_OPT (openacc.register_async_cleanup,
openacc_register_async_cleanup)
|| !DLSYM_OPT (openacc.async_test, openacc_async_test)
|| !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
|| !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
|| !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
|| !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
|| !DLSYM_OPT (openacc.async_wait_all_async,
openacc_async_wait_all_async)
|| !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
|| !DLSYM_OPT (openacc.create_thread_data,
openacc_create_thread_data)
|| !DLSYM_OPT (openacc.destroy_thread_data,
openacc_destroy_thread_data))
openacc_destroy_thread_data)
|| !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
|| !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
|| !DLSYM_OPT (openacc.async.test, openacc_async_test)
|| !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
|| !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
|| !DLSYM_OPT (openacc.async.queue_callback,
openacc_async_queue_callback)
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
|| !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
{
/* Require all the OpenACC handlers if we have
GOMP_OFFLOAD_CAP_OPENACC_200. */
@ -2700,10 +2793,7 @@ gomp_target_fini (void)
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_INITIALIZED)
{
ret = devicep->fini_device_func (devicep->target_id);
devicep->state = GOMP_DEVICE_FINALIZED;
}
ret = gomp_fini_device (devicep);
gomp_mutex_unlock (&devicep->lock);
if (!ret)
gomp_fatal ("device finalization failed");