gcc/libgomp/libgomp.map
Jakub Jelinek 7d6da11fce openmp: Honor OpenMP 5.1 num_teams lower bound
The following patch implements what I've been talking about earlier,
honor that for explicit num_teams clause we create at least the
lower-bound (if not specified, upper-bound) teams in the league.
For host fallback, it still means we only have one thread doing all the
teams, sequentially one after another.
For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too
will or might fail.
For these offloads, I think it is ok to remove symbols no longer used
from libgomp.a.
If num_teams_lower is bigger than the provided num_blocks or num_workgroups,
we should arrange for gomp_num_teams_var to be num_teams_lower - 1,
stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num ()
and instead use for it some .shared var that GOMP_teams4 initializes to
%ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first
increment that by num_blocks or num_workgroups each time and only
return false when we are above num_teams_lower.
Any help with actually implementing this for the 2 architectures highly
appreciated.

2021-11-12  Jakub Jelinek  <jakub@redhat.com>

gcc/
	* omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove.
	(BUILT_IN_GOMP_TEAMS4): New.
	* builtin-types.def (BT_FN_VOID_UINT_UINT): Remove.
	(BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
	* omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of
	GOMP_teams, pass to it also num_teams lower-bound expression
	or a dup of upper-bound if it is missing and a flag whether
	it is the first call or not.
gcc/fortran/
	* types.def (BT_FN_VOID_UINT_UINT): Remove.
	(BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
libgomp/
	* libgomp_g.h (GOMP_teams4): Declare.
	* libgomp.map (GOMP_5.1): Export GOMP_teams4.
	* target.c (GOMP_teams4): New function.
	* config/nvptx/target.c (GOMP_teams): Remove.
	(GOMP_teams4): New function.
	* config/gcn/target.c (GOMP_teams): Remove.
	(GOMP_teams4): New function.
	* testsuite/libgomp.c/teams-4.c (main): Expect exactly 2
	teams instead of <= 2.
	* testsuite/libgomp.c-c++-common/teams-2.c: New test.
2021-11-12 12:41:22 +01:00

612 lines
13 KiB
Plaintext

OMP_1.0 {
global:
omp_set_num_threads;
omp_get_num_threads;
omp_get_max_threads;
omp_get_thread_num;
omp_get_num_procs;
omp_in_parallel;
omp_set_dynamic;
omp_get_dynamic;
omp_set_nested;
omp_get_nested;
#ifdef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT
# If the assembler used lacks the .symver directive or the linker
# doesn't support GNU symbol versioning, we have the same symbol in
# two versions, which Sun ld chokes on.
omp_init_lock;
omp_init_nest_lock;
omp_destroy_lock;
omp_destroy_nest_lock;
omp_set_lock;
omp_set_nest_lock;
omp_unset_lock;
omp_unset_nest_lock;
omp_test_lock;
omp_test_nest_lock;
omp_destroy_lock_;
omp_destroy_nest_lock_;
omp_init_lock_;
omp_init_nest_lock_;
omp_set_lock_;
omp_set_nest_lock_;
omp_test_lock_;
omp_test_nest_lock_;
omp_unset_lock_;
omp_unset_nest_lock_;
#endif
omp_get_dynamic_;
omp_get_max_threads_;
omp_get_nested_;
omp_get_num_procs_;
omp_get_num_threads_;
omp_get_thread_num_;
omp_in_parallel_;
omp_set_dynamic_;
omp_set_dynamic_8_;
omp_set_nested_;
omp_set_nested_8_;
omp_set_num_threads_;
omp_set_num_threads_8_;
local:
*;
};
OMP_2.0 {
global:
omp_get_wtick;
omp_get_wtime;
omp_get_wtick_;
omp_get_wtime_;
} OMP_1.0;
OMP_3.0 {
global:
omp_set_schedule;
omp_set_schedule_;
omp_set_schedule_8_;
omp_get_schedule;
omp_get_schedule_;
omp_get_schedule_8_;
omp_get_thread_limit;
omp_get_thread_limit_;
omp_set_max_active_levels;
omp_set_max_active_levels_;
omp_set_max_active_levels_8_;
omp_get_max_active_levels;
omp_get_max_active_levels_;
omp_get_level;
omp_get_level_;
omp_get_ancestor_thread_num;
omp_get_ancestor_thread_num_;
omp_get_ancestor_thread_num_8_;
omp_get_team_size;
omp_get_team_size_;
omp_get_team_size_8_;
omp_get_active_level;
omp_get_active_level_;
omp_init_lock;
omp_init_nest_lock;
omp_destroy_lock;
omp_destroy_nest_lock;
omp_set_lock;
omp_set_nest_lock;
omp_unset_lock;
omp_unset_nest_lock;
omp_test_lock;
omp_test_nest_lock;
omp_destroy_lock_;
omp_destroy_nest_lock_;
omp_init_lock_;
omp_init_nest_lock_;
omp_set_lock_;
omp_set_nest_lock_;
omp_test_lock_;
omp_test_nest_lock_;
omp_unset_lock_;
omp_unset_nest_lock_;
} OMP_2.0;
OMP_3.1 {
global:
omp_in_final;
omp_in_final_;
} OMP_3.0;
OMP_4.0 {
global:
omp_get_cancellation;
omp_get_cancellation_;
omp_get_proc_bind;
omp_get_proc_bind_;
omp_set_default_device;
omp_set_default_device_;
omp_set_default_device_8_;
omp_get_default_device;
omp_get_default_device_;
omp_get_num_devices;
omp_get_num_devices_;
omp_get_num_teams;
omp_get_num_teams_;
omp_get_team_num;
omp_get_team_num_;
omp_is_initial_device;
omp_is_initial_device_;
} OMP_3.1;
OMP_4.5 {
global:
omp_get_max_task_priority;
omp_get_max_task_priority_;
omp_get_num_places;
omp_get_num_places_;
omp_get_place_num_procs;
omp_get_place_num_procs_;
omp_get_place_num_procs_8_;
omp_get_place_proc_ids;
omp_get_place_proc_ids_;
omp_get_place_proc_ids_8_;
omp_get_place_num;
omp_get_place_num_;
omp_get_partition_num_places;
omp_get_partition_num_places_;
omp_get_partition_place_nums;
omp_get_partition_place_nums_;
omp_get_partition_place_nums_8_;
omp_get_initial_device;
omp_get_initial_device_;
omp_target_alloc;
omp_target_free;
omp_target_is_present;
omp_target_memcpy;
omp_target_memcpy_rect;
omp_target_associate_ptr;
omp_target_disassociate_ptr;
} OMP_4.0;
OMP_5.0 {
global:
omp_capture_affinity;
omp_capture_affinity_;
omp_display_affinity;
omp_display_affinity_;
omp_get_affinity_format;
omp_get_affinity_format_;
omp_set_affinity_format;
omp_set_affinity_format_;
omp_pause_resource;
omp_pause_resource_;
omp_pause_resource_all;
omp_pause_resource_all_;
} OMP_4.5;
OMP_5.0.1 {
global:
omp_set_default_allocator;
omp_set_default_allocator_;
omp_get_default_allocator;
omp_get_default_allocator_;
omp_init_allocator;
omp_init_allocator_;
omp_init_allocator_8_;
omp_destroy_allocator;
omp_destroy_allocator_;
omp_alloc;
omp_free;
omp_get_supported_active_levels;
omp_get_supported_active_levels_;
omp_fulfill_event;
omp_fulfill_event_;
} OMP_5.0;
OMP_5.0.2 {
global:
omp_get_device_num;
omp_get_device_num_;
omp_aligned_alloc;
omp_calloc;
omp_aligned_calloc;
omp_realloc;
} OMP_5.0.1;
OMP_5.1 {
global:
omp_display_env;
omp_display_env_;
omp_display_env_8_;
omp_set_num_teams;
omp_set_num_teams_;
omp_set_num_teams_8_;
omp_get_max_teams;
omp_get_max_teams_;
omp_set_teams_thread_limit;
omp_set_teams_thread_limit_;
omp_set_teams_thread_limit_8_;
omp_get_teams_thread_limit;
omp_get_teams_thread_limit_;
} OMP_5.0.2;
GOMP_1.0 {
global:
GOMP_atomic_end;
GOMP_atomic_start;
GOMP_barrier;
GOMP_critical_end;
GOMP_critical_name_end;
GOMP_critical_name_start;
GOMP_critical_start;
GOMP_loop_dynamic_next;
GOMP_loop_dynamic_start;
GOMP_loop_end;
GOMP_loop_end_nowait;
GOMP_loop_guided_next;
GOMP_loop_guided_start;
GOMP_loop_ordered_dynamic_next;
GOMP_loop_ordered_dynamic_start;
GOMP_loop_ordered_guided_next;
GOMP_loop_ordered_guided_start;
GOMP_loop_ordered_runtime_next;
GOMP_loop_ordered_runtime_start;
GOMP_loop_ordered_static_next;
GOMP_loop_ordered_static_start;
GOMP_loop_runtime_next;
GOMP_loop_runtime_start;
GOMP_loop_static_next;
GOMP_loop_static_start;
GOMP_ordered_end;
GOMP_ordered_start;
GOMP_parallel_end;
GOMP_parallel_loop_dynamic_start;
GOMP_parallel_loop_guided_start;
GOMP_parallel_loop_runtime_start;
GOMP_parallel_loop_static_start;
GOMP_parallel_sections_start;
GOMP_parallel_start;
GOMP_sections_end;
GOMP_sections_end_nowait;
GOMP_sections_next;
GOMP_sections_start;
GOMP_single_copy_end;
GOMP_single_copy_start;
GOMP_single_start;
};
GOMP_2.0 {
global:
GOMP_task;
GOMP_taskwait;
GOMP_loop_ull_dynamic_next;
GOMP_loop_ull_dynamic_start;
GOMP_loop_ull_guided_next;
GOMP_loop_ull_guided_start;
GOMP_loop_ull_ordered_dynamic_next;
GOMP_loop_ull_ordered_dynamic_start;
GOMP_loop_ull_ordered_guided_next;
GOMP_loop_ull_ordered_guided_start;
GOMP_loop_ull_ordered_runtime_next;
GOMP_loop_ull_ordered_runtime_start;
GOMP_loop_ull_ordered_static_next;
GOMP_loop_ull_ordered_static_start;
GOMP_loop_ull_runtime_next;
GOMP_loop_ull_runtime_start;
GOMP_loop_ull_static_next;
GOMP_loop_ull_static_start;
} GOMP_1.0;
GOMP_3.0 {
global:
GOMP_taskyield;
} GOMP_2.0;
GOMP_4.0 {
global:
GOMP_barrier_cancel;
GOMP_cancel;
GOMP_cancellation_point;
GOMP_loop_end_cancel;
GOMP_parallel_loop_dynamic;
GOMP_parallel_loop_guided;
GOMP_parallel_loop_runtime;
GOMP_parallel_loop_static;
GOMP_parallel_sections;
GOMP_parallel;
GOMP_sections_end_cancel;
GOMP_taskgroup_start;
GOMP_taskgroup_end;
GOMP_target;
GOMP_target_data;
GOMP_target_end_data;
GOMP_target_update;
GOMP_teams;
} GOMP_3.0;
GOMP_4.0.1 {
global:
GOMP_offload_register;
GOMP_offload_unregister;
} GOMP_4.0;
GOMP_4.5 {
global:
GOMP_target_ext;
GOMP_target_data_ext;
GOMP_target_update_ext;
GOMP_target_enter_exit_data;
GOMP_taskloop;
GOMP_taskloop_ull;
GOMP_offload_register_ver;
GOMP_offload_unregister_ver;
GOMP_loop_doacross_dynamic_start;
GOMP_loop_doacross_guided_start;
GOMP_loop_doacross_runtime_start;
GOMP_loop_doacross_static_start;
GOMP_doacross_post;
GOMP_doacross_wait;
GOMP_loop_ull_doacross_dynamic_start;
GOMP_loop_ull_doacross_guided_start;
GOMP_loop_ull_doacross_runtime_start;
GOMP_loop_ull_doacross_static_start;
GOMP_doacross_ull_post;
GOMP_doacross_ull_wait;
GOMP_loop_nonmonotonic_dynamic_next;
GOMP_loop_nonmonotonic_dynamic_start;
GOMP_loop_nonmonotonic_guided_next;
GOMP_loop_nonmonotonic_guided_start;
GOMP_loop_ull_nonmonotonic_dynamic_next;
GOMP_loop_ull_nonmonotonic_dynamic_start;
GOMP_loop_ull_nonmonotonic_guided_next;
GOMP_loop_ull_nonmonotonic_guided_start;
GOMP_parallel_loop_nonmonotonic_dynamic;
GOMP_parallel_loop_nonmonotonic_guided;
} GOMP_4.0.1;
GOMP_5.0 {
global:
GOMP_loop_doacross_start;
GOMP_loop_maybe_nonmonotonic_runtime_next;
GOMP_loop_maybe_nonmonotonic_runtime_start;
GOMP_loop_nonmonotonic_runtime_next;
GOMP_loop_nonmonotonic_runtime_start;
GOMP_loop_ordered_start;
GOMP_loop_start;
GOMP_loop_ull_doacross_start;
GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
GOMP_loop_ull_nonmonotonic_runtime_next;
GOMP_loop_ull_nonmonotonic_runtime_start;
GOMP_loop_ull_ordered_start;
GOMP_loop_ull_start;
GOMP_parallel_loop_maybe_nonmonotonic_runtime;
GOMP_parallel_loop_nonmonotonic_runtime;
GOMP_parallel_reductions;
GOMP_sections2_start;
GOMP_taskgroup_reduction_register;
GOMP_taskgroup_reduction_unregister;
GOMP_task_reduction_remap;
GOMP_taskwait_depend;
GOMP_teams_reg;
GOMP_workshare_task_reduction_unregister;
} GOMP_4.5;
GOMP_5.0.1 {
global:
GOMP_alloc;
GOMP_free;
} GOMP_5.0;
GOMP_5.1 {
global:
GOMP_error;
GOMP_scope_start;
GOMP_warning;
GOMP_teams4;
} GOMP_5.0.1;
OACC_2.0 {
global:
acc_get_num_devices;
acc_get_num_devices_h_;
acc_set_device_type;
acc_set_device_type_h_;
acc_get_device_type;
acc_get_device_type_h_;
acc_set_device_num;
acc_set_device_num_h_;
acc_get_device_num;
acc_get_device_num_h_;
acc_async_test;
acc_async_test_h_;
acc_async_test_all;
acc_async_test_all_h_;
acc_wait;
acc_wait_h_;
acc_wait_async;
acc_wait_async_h_;
acc_wait_all;
acc_wait_all_h_;
acc_wait_all_async;
acc_wait_all_async_h_;
acc_init;
acc_init_h_;
acc_shutdown;
acc_shutdown_h_;
acc_on_device;
acc_on_device_h_;
acc_malloc;
acc_free;
acc_copyin;
acc_copyin_32_h_;
acc_copyin_64_h_;
acc_copyin_array_h_;
acc_present_or_copyin;
acc_present_or_copyin_32_h_;
acc_present_or_copyin_64_h_;
acc_present_or_copyin_array_h_;
acc_create;
acc_create_32_h_;
acc_create_64_h_;
acc_create_array_h_;
acc_present_or_create;
acc_present_or_create_32_h_;
acc_present_or_create_64_h_;
acc_present_or_create_array_h_;
acc_copyout;
acc_copyout_32_h_;
acc_copyout_64_h_;
acc_copyout_array_h_;
acc_delete;
acc_delete_32_h_;
acc_delete_64_h_;
acc_delete_array_h_;
acc_update_device;
acc_update_device_32_h_;
acc_update_device_64_h_;
acc_update_device_array_h_;
acc_update_self;
acc_update_self_32_h_;
acc_update_self_64_h_;
acc_update_self_array_h_;
acc_map_data;
acc_unmap_data;
acc_deviceptr;
acc_hostptr;
acc_is_present;
acc_is_present_32_h_;
acc_is_present_64_h_;
acc_is_present_array_h_;
acc_memcpy_to_device;
acc_memcpy_from_device;
acc_get_current_cuda_device;
acc_get_current_cuda_context;
acc_get_cuda_stream;
acc_set_cuda_stream;
};
OACC_2.0.1 {
global:
acc_async_wait;
acc_async_wait_all;
acc_pcopyin;
acc_pcreate;
} OACC_2.0;
OACC_2.5 {
global:
acc_copyin_async;
acc_copyin_async_32_h_;
acc_copyin_async_64_h_;
acc_copyin_async_array_h_;
acc_copyout_async;
acc_copyout_async_32_h_;
acc_copyout_async_64_h_;
acc_copyout_async_array_h_;
acc_copyout_finalize;
acc_copyout_finalize_32_h_;
acc_copyout_finalize_64_h_;
acc_copyout_finalize_array_h_;
acc_copyout_finalize_async;
acc_copyout_finalize_async_32_h_;
acc_copyout_finalize_async_64_h_;
acc_copyout_finalize_async_array_h_;
acc_create_async;
acc_create_async_32_h_;
acc_create_async_64_h_;
acc_create_async_array_h_;
acc_delete_async;
acc_delete_async_32_h_;
acc_delete_async_64_h_;
acc_delete_async_array_h_;
acc_delete_finalize;
acc_delete_finalize_32_h_;
acc_delete_finalize_64_h_;
acc_delete_finalize_array_h_;
acc_delete_finalize_async;
acc_delete_finalize_async_32_h_;
acc_delete_finalize_async_64_h_;
acc_delete_finalize_async_array_h_;
acc_memcpy_from_device_async;
acc_memcpy_to_device_async;
acc_update_device_async;
acc_update_device_async_32_h_;
acc_update_device_async_64_h_;
acc_update_device_async_array_h_;
acc_update_self_async;
acc_update_self_async_32_h_;
acc_update_self_async_64_h_;
acc_update_self_async_array_h_;
} OACC_2.0.1;
OACC_2.5.1 {
global:
acc_prof_lookup;
acc_prof_register;
acc_prof_unregister;
acc_register_library;
} OACC_2.5;
OACC_2.6 {
global:
acc_attach;
acc_attach_async;
acc_detach;
acc_detach_async;
acc_detach_finalize;
acc_detach_finalize_async;
acc_get_property;
acc_get_property_h_;
acc_get_property_string;
acc_get_property_string_h_;
} OACC_2.5.1;
GOACC_2.0 {
global:
GOACC_data_end;
GOACC_data_start;
GOACC_enter_exit_data;
GOACC_parallel;
GOACC_update;
GOACC_wait;
GOACC_get_thread_num;
GOACC_get_num_threads;
};
GOACC_2.0.1 {
global:
GOACC_declare;
GOACC_parallel_keyed;
} GOACC_2.0;
GOACC_2.0.2 {
global:
GOACC_enter_data;
GOACC_exit_data;
} GOACC_2.0.1;
GOMP_PLUGIN_1.0 {
global:
GOMP_PLUGIN_malloc;
GOMP_PLUGIN_malloc_cleared;
GOMP_PLUGIN_realloc;
GOMP_PLUGIN_debug;
GOMP_PLUGIN_error;
GOMP_PLUGIN_fatal;
GOMP_PLUGIN_async_unmap_vars;
GOMP_PLUGIN_acc_thread;
};
GOMP_PLUGIN_1.1 {
global:
GOMP_PLUGIN_target_task_completion;
} GOMP_PLUGIN_1.0;
GOMP_PLUGIN_1.2 {
global:
GOMP_PLUGIN_acc_default_dim;
} GOMP_PLUGIN_1.1;
GOMP_PLUGIN_1.3 {
global:
GOMP_PLUGIN_goacc_profiling_dispatch;
GOMP_PLUGIN_goacc_thread;
} GOMP_PLUGIN_1.2;