gcc/libgomp/libgomp.map
Marcel Vollweiler 6c420193e8 libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_async
This patch adds two new OpenMP runtime routines: omp_target_memcpy_async and
omp_target_memcpy_rect_async. Both functions are introduced in OpenMP 5.1 as
asynchronous variants of omp_target_memcpy and omp_target_memcpy_rect.

In contrast to the synchronous variants, the asynchronous functions have two
additional function parameters to allow the specification of task dependences:

	int depobj_count
	omp_depend_t *depobj_list

	integer(c_int), value :: depobj_count
	integer(omp_depend_kind), optional :: depobj_list(*)

The implementation splits the synchronous functions into two parts: (a) check
and (b) copy. Then (a) is used in the asynchronous functions for the sequential
part, and the actual copy process (b) is executed in a new created task. The
sequential part (a) takes into account the requirements for the return values:

"The routine returns zero if successful. Otherwise, it returns a non-zero
value." (omp_target_memcpy_async, OpenMP 5.1 spec, section 3.8.7)

"An application can determine the number of inclusive dimensions supported by an
implementation by passing NULL pointers (or C_NULL_PTR, for Fortran) for both
dst and src. The routine returns the number of dimensions supported by the
implementation for the specified device numbers. No copy operation is
performed." (omp_target_memcpy_rect_async, OpenMP 5.1 spec, section 3.8.8)

Due to asynchronicity an error is thrown if the asynchronous memcpy is not
successful (in contrast to the synchronous functions which use a return
value unequal to zero).

gcc/ChangeLog:

	* omp-low.cc (omp_runtime_api_call): Added target_memcpy_async and
	target_memcpy_rect_async to omp_runtime_apis array.

libgomp/ChangeLog:

	* libgomp.map: Added omp_target_memcpy_async and
	omp_target_memcpy_rect_async.
	* libgomp.texi: Both functions are now supported.
	* omp.h.in: Added omp_target_memcpy_async and
	omp_target_memcpy_rect_async.
	* omp_lib.f90.in: Added interfaces for both new functions.
	* omp_lib.h.in: Likewise.
	* target.c (ialias_redirect): Added for GOMP_task.
	(omp_target_memcpy): Restructured into check and copy part.
	(omp_target_memcpy_check): New helper function for omp_target_memcpy and
	omp_target_memcpy_async that checks requirements.
	(omp_target_memcpy_copy): New helper function for omp_target_memcpy and
	omp_target_memcpy_async that performs the memcpy.
	(omp_target_memcpy_async_helper): New helper function that is used in
	omp_target_memcpy_async for the asynchronous task.
	(omp_target_memcpy_async): Added.
	(omp_target_memcpy_rect): Restructured into check and copy part.
	(omp_target_memcpy_rect_check): New helper function for
	omp_target_memcpy_rect and omp_target_memcpy_rect_async that checks
	requirements.
	(omp_target_memcpy_rect_copy): New helper function for
	omp_target_memcpy_rect and omp_target_memcpy_rect_async that performs
	the memcpy.
	(omp_target_memcpy_rect_async_helper): New helper function that is used
	in omp_target_memcpy_rect_async for the asynchronous task.
	(omp_target_memcpy_rect_async): Added.
	* task.c (ialias): Added for GOMP_task.
	* testsuite/libgomp.c-c++-common/target-memcpy-async-1.c: New test.
	* testsuite/libgomp.c-c++-common/target-memcpy-async-2.c: New test.
	* testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c: New test.
	* testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c: New test.
	* testsuite/libgomp.fortran/target-memcpy-async-1.f90: New test.
	* testsuite/libgomp.fortran/target-memcpy-async-2.f90: New test.
	* testsuite/libgomp.fortran/target-memcpy-rect-async-1.f90: New test.
	* testsuite/libgomp.fortran/target-memcpy-rect-async-2.f90: New test.
2022-05-20 02:29:32 -07:00

620 lines
13 KiB
Plaintext

OMP_1.0 {
global:
omp_set_num_threads;
omp_get_num_threads;
omp_get_max_threads;
omp_get_thread_num;
omp_get_num_procs;
omp_in_parallel;
omp_set_dynamic;
omp_get_dynamic;
omp_set_nested;
omp_get_nested;
#ifdef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT
# If the assembler used lacks the .symver directive or the linker
# doesn't support GNU symbol versioning, we have the same symbol in
# two versions, which Sun ld chokes on.
omp_init_lock;
omp_init_nest_lock;
omp_destroy_lock;
omp_destroy_nest_lock;
omp_set_lock;
omp_set_nest_lock;
omp_unset_lock;
omp_unset_nest_lock;
omp_test_lock;
omp_test_nest_lock;
omp_destroy_lock_;
omp_destroy_nest_lock_;
omp_init_lock_;
omp_init_nest_lock_;
omp_set_lock_;
omp_set_nest_lock_;
omp_test_lock_;
omp_test_nest_lock_;
omp_unset_lock_;
omp_unset_nest_lock_;
#endif
omp_get_dynamic_;
omp_get_max_threads_;
omp_get_nested_;
omp_get_num_procs_;
omp_get_num_threads_;
omp_get_thread_num_;
omp_in_parallel_;
omp_set_dynamic_;
omp_set_dynamic_8_;
omp_set_nested_;
omp_set_nested_8_;
omp_set_num_threads_;
omp_set_num_threads_8_;
local:
*;
};
OMP_2.0 {
global:
omp_get_wtick;
omp_get_wtime;
omp_get_wtick_;
omp_get_wtime_;
} OMP_1.0;
OMP_3.0 {
global:
omp_set_schedule;
omp_set_schedule_;
omp_set_schedule_8_;
omp_get_schedule;
omp_get_schedule_;
omp_get_schedule_8_;
omp_get_thread_limit;
omp_get_thread_limit_;
omp_set_max_active_levels;
omp_set_max_active_levels_;
omp_set_max_active_levels_8_;
omp_get_max_active_levels;
omp_get_max_active_levels_;
omp_get_level;
omp_get_level_;
omp_get_ancestor_thread_num;
omp_get_ancestor_thread_num_;
omp_get_ancestor_thread_num_8_;
omp_get_team_size;
omp_get_team_size_;
omp_get_team_size_8_;
omp_get_active_level;
omp_get_active_level_;
omp_init_lock;
omp_init_nest_lock;
omp_destroy_lock;
omp_destroy_nest_lock;
omp_set_lock;
omp_set_nest_lock;
omp_unset_lock;
omp_unset_nest_lock;
omp_test_lock;
omp_test_nest_lock;
omp_destroy_lock_;
omp_destroy_nest_lock_;
omp_init_lock_;
omp_init_nest_lock_;
omp_set_lock_;
omp_set_nest_lock_;
omp_test_lock_;
omp_test_nest_lock_;
omp_unset_lock_;
omp_unset_nest_lock_;
} OMP_2.0;
OMP_3.1 {
global:
omp_in_final;
omp_in_final_;
} OMP_3.0;
OMP_4.0 {
global:
omp_get_cancellation;
omp_get_cancellation_;
omp_get_proc_bind;
omp_get_proc_bind_;
omp_set_default_device;
omp_set_default_device_;
omp_set_default_device_8_;
omp_get_default_device;
omp_get_default_device_;
omp_get_num_devices;
omp_get_num_devices_;
omp_get_num_teams;
omp_get_num_teams_;
omp_get_team_num;
omp_get_team_num_;
omp_is_initial_device;
omp_is_initial_device_;
} OMP_3.1;
OMP_4.5 {
global:
omp_get_max_task_priority;
omp_get_max_task_priority_;
omp_get_num_places;
omp_get_num_places_;
omp_get_place_num_procs;
omp_get_place_num_procs_;
omp_get_place_num_procs_8_;
omp_get_place_proc_ids;
omp_get_place_proc_ids_;
omp_get_place_proc_ids_8_;
omp_get_place_num;
omp_get_place_num_;
omp_get_partition_num_places;
omp_get_partition_num_places_;
omp_get_partition_place_nums;
omp_get_partition_place_nums_;
omp_get_partition_place_nums_8_;
omp_get_initial_device;
omp_get_initial_device_;
omp_target_alloc;
omp_target_free;
omp_target_is_present;
omp_target_memcpy;
omp_target_memcpy_rect;
omp_target_associate_ptr;
omp_target_disassociate_ptr;
} OMP_4.0;
OMP_5.0 {
global:
omp_capture_affinity;
omp_capture_affinity_;
omp_display_affinity;
omp_display_affinity_;
omp_get_affinity_format;
omp_get_affinity_format_;
omp_set_affinity_format;
omp_set_affinity_format_;
omp_pause_resource;
omp_pause_resource_;
omp_pause_resource_all;
omp_pause_resource_all_;
} OMP_4.5;
OMP_5.0.1 {
global:
omp_set_default_allocator;
omp_set_default_allocator_;
omp_get_default_allocator;
omp_get_default_allocator_;
omp_init_allocator;
omp_init_allocator_;
omp_init_allocator_8_;
omp_destroy_allocator;
omp_destroy_allocator_;
omp_alloc;
omp_free;
omp_get_supported_active_levels;
omp_get_supported_active_levels_;
omp_fulfill_event;
omp_fulfill_event_;
} OMP_5.0;
OMP_5.0.2 {
global:
omp_get_device_num;
omp_get_device_num_;
omp_aligned_alloc;
omp_calloc;
omp_aligned_calloc;
omp_realloc;
} OMP_5.0.1;
OMP_5.1 {
global:
omp_display_env;
omp_display_env_;
omp_display_env_8_;
omp_set_num_teams;
omp_set_num_teams_;
omp_set_num_teams_8_;
omp_get_max_teams;
omp_get_max_teams_;
omp_set_teams_thread_limit;
omp_set_teams_thread_limit_;
omp_set_teams_thread_limit_8_;
omp_get_teams_thread_limit;
omp_get_teams_thread_limit_;
} OMP_5.0.2;
OMP_5.1.1 {
global:
omp_get_mapped_ptr;
omp_target_is_accessible;
omp_target_memcpy_async;
omp_target_memcpy_rect_async;
} OMP_5.1;
GOMP_1.0 {
global:
GOMP_atomic_end;
GOMP_atomic_start;
GOMP_barrier;
GOMP_critical_end;
GOMP_critical_name_end;
GOMP_critical_name_start;
GOMP_critical_start;
GOMP_loop_dynamic_next;
GOMP_loop_dynamic_start;
GOMP_loop_end;
GOMP_loop_end_nowait;
GOMP_loop_guided_next;
GOMP_loop_guided_start;
GOMP_loop_ordered_dynamic_next;
GOMP_loop_ordered_dynamic_start;
GOMP_loop_ordered_guided_next;
GOMP_loop_ordered_guided_start;
GOMP_loop_ordered_runtime_next;
GOMP_loop_ordered_runtime_start;
GOMP_loop_ordered_static_next;
GOMP_loop_ordered_static_start;
GOMP_loop_runtime_next;
GOMP_loop_runtime_start;
GOMP_loop_static_next;
GOMP_loop_static_start;
GOMP_ordered_end;
GOMP_ordered_start;
GOMP_parallel_end;
GOMP_parallel_loop_dynamic_start;
GOMP_parallel_loop_guided_start;
GOMP_parallel_loop_runtime_start;
GOMP_parallel_loop_static_start;
GOMP_parallel_sections_start;
GOMP_parallel_start;
GOMP_sections_end;
GOMP_sections_end_nowait;
GOMP_sections_next;
GOMP_sections_start;
GOMP_single_copy_end;
GOMP_single_copy_start;
GOMP_single_start;
};
GOMP_2.0 {
global:
GOMP_task;
GOMP_taskwait;
GOMP_loop_ull_dynamic_next;
GOMP_loop_ull_dynamic_start;
GOMP_loop_ull_guided_next;
GOMP_loop_ull_guided_start;
GOMP_loop_ull_ordered_dynamic_next;
GOMP_loop_ull_ordered_dynamic_start;
GOMP_loop_ull_ordered_guided_next;
GOMP_loop_ull_ordered_guided_start;
GOMP_loop_ull_ordered_runtime_next;
GOMP_loop_ull_ordered_runtime_start;
GOMP_loop_ull_ordered_static_next;
GOMP_loop_ull_ordered_static_start;
GOMP_loop_ull_runtime_next;
GOMP_loop_ull_runtime_start;
GOMP_loop_ull_static_next;
GOMP_loop_ull_static_start;
} GOMP_1.0;
GOMP_3.0 {
global:
GOMP_taskyield;
} GOMP_2.0;
GOMP_4.0 {
global:
GOMP_barrier_cancel;
GOMP_cancel;
GOMP_cancellation_point;
GOMP_loop_end_cancel;
GOMP_parallel_loop_dynamic;
GOMP_parallel_loop_guided;
GOMP_parallel_loop_runtime;
GOMP_parallel_loop_static;
GOMP_parallel_sections;
GOMP_parallel;
GOMP_sections_end_cancel;
GOMP_taskgroup_start;
GOMP_taskgroup_end;
GOMP_target;
GOMP_target_data;
GOMP_target_end_data;
GOMP_target_update;
GOMP_teams;
} GOMP_3.0;
GOMP_4.0.1 {
global:
GOMP_offload_register;
GOMP_offload_unregister;
} GOMP_4.0;
GOMP_4.5 {
global:
GOMP_target_ext;
GOMP_target_data_ext;
GOMP_target_update_ext;
GOMP_target_enter_exit_data;
GOMP_taskloop;
GOMP_taskloop_ull;
GOMP_offload_register_ver;
GOMP_offload_unregister_ver;
GOMP_loop_doacross_dynamic_start;
GOMP_loop_doacross_guided_start;
GOMP_loop_doacross_runtime_start;
GOMP_loop_doacross_static_start;
GOMP_doacross_post;
GOMP_doacross_wait;
GOMP_loop_ull_doacross_dynamic_start;
GOMP_loop_ull_doacross_guided_start;
GOMP_loop_ull_doacross_runtime_start;
GOMP_loop_ull_doacross_static_start;
GOMP_doacross_ull_post;
GOMP_doacross_ull_wait;
GOMP_loop_nonmonotonic_dynamic_next;
GOMP_loop_nonmonotonic_dynamic_start;
GOMP_loop_nonmonotonic_guided_next;
GOMP_loop_nonmonotonic_guided_start;
GOMP_loop_ull_nonmonotonic_dynamic_next;
GOMP_loop_ull_nonmonotonic_dynamic_start;
GOMP_loop_ull_nonmonotonic_guided_next;
GOMP_loop_ull_nonmonotonic_guided_start;
GOMP_parallel_loop_nonmonotonic_dynamic;
GOMP_parallel_loop_nonmonotonic_guided;
} GOMP_4.0.1;
GOMP_5.0 {
global:
GOMP_loop_doacross_start;
GOMP_loop_maybe_nonmonotonic_runtime_next;
GOMP_loop_maybe_nonmonotonic_runtime_start;
GOMP_loop_nonmonotonic_runtime_next;
GOMP_loop_nonmonotonic_runtime_start;
GOMP_loop_ordered_start;
GOMP_loop_start;
GOMP_loop_ull_doacross_start;
GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
GOMP_loop_ull_nonmonotonic_runtime_next;
GOMP_loop_ull_nonmonotonic_runtime_start;
GOMP_loop_ull_ordered_start;
GOMP_loop_ull_start;
GOMP_parallel_loop_maybe_nonmonotonic_runtime;
GOMP_parallel_loop_nonmonotonic_runtime;
GOMP_parallel_reductions;
GOMP_sections2_start;
GOMP_taskgroup_reduction_register;
GOMP_taskgroup_reduction_unregister;
GOMP_task_reduction_remap;
GOMP_taskwait_depend;
GOMP_teams_reg;
GOMP_workshare_task_reduction_unregister;
} GOMP_4.5;
GOMP_5.0.1 {
global:
GOMP_alloc;
GOMP_free;
} GOMP_5.0;
GOMP_5.1 {
global:
GOMP_error;
GOMP_scope_start;
GOMP_warning;
GOMP_teams4;
} GOMP_5.0.1;
OACC_2.0 {
global:
acc_get_num_devices;
acc_get_num_devices_h_;
acc_set_device_type;
acc_set_device_type_h_;
acc_get_device_type;
acc_get_device_type_h_;
acc_set_device_num;
acc_set_device_num_h_;
acc_get_device_num;
acc_get_device_num_h_;
acc_async_test;
acc_async_test_h_;
acc_async_test_all;
acc_async_test_all_h_;
acc_wait;
acc_wait_h_;
acc_wait_async;
acc_wait_async_h_;
acc_wait_all;
acc_wait_all_h_;
acc_wait_all_async;
acc_wait_all_async_h_;
acc_init;
acc_init_h_;
acc_shutdown;
acc_shutdown_h_;
acc_on_device;
acc_on_device_h_;
acc_malloc;
acc_free;
acc_copyin;
acc_copyin_32_h_;
acc_copyin_64_h_;
acc_copyin_array_h_;
acc_present_or_copyin;
acc_present_or_copyin_32_h_;
acc_present_or_copyin_64_h_;
acc_present_or_copyin_array_h_;
acc_create;
acc_create_32_h_;
acc_create_64_h_;
acc_create_array_h_;
acc_present_or_create;
acc_present_or_create_32_h_;
acc_present_or_create_64_h_;
acc_present_or_create_array_h_;
acc_copyout;
acc_copyout_32_h_;
acc_copyout_64_h_;
acc_copyout_array_h_;
acc_delete;
acc_delete_32_h_;
acc_delete_64_h_;
acc_delete_array_h_;
acc_update_device;
acc_update_device_32_h_;
acc_update_device_64_h_;
acc_update_device_array_h_;
acc_update_self;
acc_update_self_32_h_;
acc_update_self_64_h_;
acc_update_self_array_h_;
acc_map_data;
acc_unmap_data;
acc_deviceptr;
acc_hostptr;
acc_is_present;
acc_is_present_32_h_;
acc_is_present_64_h_;
acc_is_present_array_h_;
acc_memcpy_to_device;
acc_memcpy_from_device;
acc_get_current_cuda_device;
acc_get_current_cuda_context;
acc_get_cuda_stream;
acc_set_cuda_stream;
};
OACC_2.0.1 {
global:
acc_async_wait;
acc_async_wait_all;
acc_pcopyin;
acc_pcreate;
} OACC_2.0;
OACC_2.5 {
global:
acc_copyin_async;
acc_copyin_async_32_h_;
acc_copyin_async_64_h_;
acc_copyin_async_array_h_;
acc_copyout_async;
acc_copyout_async_32_h_;
acc_copyout_async_64_h_;
acc_copyout_async_array_h_;
acc_copyout_finalize;
acc_copyout_finalize_32_h_;
acc_copyout_finalize_64_h_;
acc_copyout_finalize_array_h_;
acc_copyout_finalize_async;
acc_copyout_finalize_async_32_h_;
acc_copyout_finalize_async_64_h_;
acc_copyout_finalize_async_array_h_;
acc_create_async;
acc_create_async_32_h_;
acc_create_async_64_h_;
acc_create_async_array_h_;
acc_delete_async;
acc_delete_async_32_h_;
acc_delete_async_64_h_;
acc_delete_async_array_h_;
acc_delete_finalize;
acc_delete_finalize_32_h_;
acc_delete_finalize_64_h_;
acc_delete_finalize_array_h_;
acc_delete_finalize_async;
acc_delete_finalize_async_32_h_;
acc_delete_finalize_async_64_h_;
acc_delete_finalize_async_array_h_;
acc_memcpy_from_device_async;
acc_memcpy_to_device_async;
acc_update_device_async;
acc_update_device_async_32_h_;
acc_update_device_async_64_h_;
acc_update_device_async_array_h_;
acc_update_self_async;
acc_update_self_async_32_h_;
acc_update_self_async_64_h_;
acc_update_self_async_array_h_;
} OACC_2.0.1;
OACC_2.5.1 {
global:
acc_prof_lookup;
acc_prof_register;
acc_prof_unregister;
acc_register_library;
} OACC_2.5;
OACC_2.6 {
global:
acc_attach;
acc_attach_async;
acc_detach;
acc_detach_async;
acc_detach_finalize;
acc_detach_finalize_async;
acc_get_property;
acc_get_property_h_;
acc_get_property_string;
acc_get_property_string_h_;
} OACC_2.5.1;
GOACC_2.0 {
global:
GOACC_data_end;
GOACC_data_start;
GOACC_enter_exit_data;
GOACC_parallel;
GOACC_update;
GOACC_wait;
GOACC_get_thread_num;
GOACC_get_num_threads;
};
GOACC_2.0.1 {
global:
GOACC_declare;
GOACC_parallel_keyed;
} GOACC_2.0;
GOACC_2.0.2 {
global:
GOACC_enter_data;
GOACC_exit_data;
} GOACC_2.0.1;
GOMP_PLUGIN_1.0 {
global:
GOMP_PLUGIN_malloc;
GOMP_PLUGIN_malloc_cleared;
GOMP_PLUGIN_realloc;
GOMP_PLUGIN_debug;
GOMP_PLUGIN_error;
GOMP_PLUGIN_fatal;
GOMP_PLUGIN_async_unmap_vars;
GOMP_PLUGIN_acc_thread;
};
GOMP_PLUGIN_1.1 {
global:
GOMP_PLUGIN_target_task_completion;
} GOMP_PLUGIN_1.0;
GOMP_PLUGIN_1.2 {
global:
GOMP_PLUGIN_acc_default_dim;
} GOMP_PLUGIN_1.1;
GOMP_PLUGIN_1.3 {
global:
GOMP_PLUGIN_goacc_profiling_dispatch;
GOMP_PLUGIN_goacc_thread;
} GOMP_PLUGIN_1.2;