openmp: Honor OpenMP 5.1 num_teams lower bound
The following patch implements what I've been talking about earlier, honor that for explicit num_teams clause we create at least the lower-bound (if not specified, upper-bound) teams in the league. For host fallback, it still means we only have one thread doing all the teams, sequentially one after another. For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too will or might fail. For these offloads, I think it is ok to remove symbols no longer used from libgomp.a. If num_teams_lower is bigger than the provided num_blocks or num_workgroups, we should arrange for gomp_num_teams_var to be num_teams_lower - 1, stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num () and instead use for it some .shared var that GOMP_teams4 initializes to %ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first increment that by num_blocks or num_workgroups each time and only return false when we are above num_teams_lower. Any help with actually implementing this for the 2 architectures highly appreciated. 2021-11-12 Jakub Jelinek <jakub@redhat.com> gcc/ * omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove. (BUILT_IN_GOMP_TEAMS4): New. * builtin-types.def (BT_FN_VOID_UINT_UINT): Remove. (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New. * omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of GOMP_teams, pass to it also num_teams lower-bound expression or a dup of upper-bound if it is missing and a flag whether it is the first call or not. gcc/fortran/ * types.def (BT_FN_VOID_UINT_UINT): Remove. (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New. libgomp/ * libgomp_g.h (GOMP_teams4): Declare. * libgomp.map (GOMP_5.1): Export GOMP_teams4. * target.c (GOMP_teams4): New function. * config/nvptx/target.c (GOMP_teams): Remove. (GOMP_teams4): New function. * config/gcn/target.c (GOMP_teams): Remove. (GOMP_teams4): New function. * testsuite/libgomp.c/teams-4.c (main): Expect exactly 2 teams instead of <= 2. * testsuite/libgomp.c-c++-common/teams-2.c: New test.
This commit is contained in:
parent
5f516a6a5d
commit
7d6da11fce
|
@ -489,7 +489,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
|
|||
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
|
||||
BT_CONST_VOLATILE_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_PTR, BT_UINT, BT_UINT, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_CONST_PTR, BT_UINT, BT_UINT, BT_CONST_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_SIZE)
|
||||
|
@ -680,6 +679,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
|
|||
BT_PTR_ULONGLONG)
|
||||
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
|
||||
BT_INT, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
|
||||
BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
|
||||
|
||||
DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
|
||||
BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
|
||||
|
|
|
@ -117,7 +117,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
|
|||
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
|
||||
BT_CONST_VOLATILE_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTRMODE,
|
||||
BT_VOID, BT_PTR, BT_PTRMODE)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_CONST_PTR_SIZE, BT_VOID, BT_CONST_PTR, BT_SIZE)
|
||||
|
@ -173,6 +172,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
|
|||
BT_PTR_ULONGLONG)
|
||||
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
|
||||
BT_INT, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
|
||||
BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
|
||||
|
||||
DEF_FUNCTION_TYPE_5 (BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT,
|
||||
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT,
|
||||
|
|
|
@ -442,8 +442,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_UPDATE, "GOMP_target_update_ext",
|
|||
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA,
|
||||
"GOMP_target_enter_exit_data",
|
||||
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, ATTR_NOTHROW_LIST)
|
||||
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS, "GOMP_teams",
|
||||
BT_FN_VOID_UINT_UINT, ATTR_NOTHROW_LIST)
|
||||
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4",
|
||||
BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST)
|
||||
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg",
|
||||
BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST)
|
||||
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER,
|
||||
|
|
|
@ -13902,14 +13902,24 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
|
||||
tree num_teams = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
|
||||
OMP_CLAUSE_NUM_TEAMS);
|
||||
tree num_teams_lower = NULL_TREE;
|
||||
if (num_teams == NULL_TREE)
|
||||
num_teams = build_int_cst (unsigned_type_node, 0);
|
||||
else
|
||||
{
|
||||
num_teams_lower = OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (num_teams);
|
||||
if (num_teams_lower)
|
||||
{
|
||||
num_teams_lower = fold_convert (unsigned_type_node, num_teams_lower);
|
||||
gimplify_expr (&num_teams_lower, &bind_body, NULL, is_gimple_val,
|
||||
fb_rvalue);
|
||||
}
|
||||
num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
|
||||
num_teams = fold_convert (unsigned_type_node, num_teams);
|
||||
gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
|
||||
}
|
||||
if (num_teams_lower == NULL_TREE)
|
||||
num_teams_lower = num_teams;
|
||||
tree thread_limit = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
|
||||
OMP_CLAUSE_THREAD_LIMIT);
|
||||
if (thread_limit == NULL_TREE)
|
||||
|
@ -13921,6 +13931,30 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
|
||||
fb_rvalue);
|
||||
}
|
||||
location_t loc = gimple_location (teams_stmt);
|
||||
tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS4);
|
||||
tree rettype = TREE_TYPE (TREE_TYPE (decl));
|
||||
tree first = create_tmp_var (rettype);
|
||||
gimple_seq_add_stmt (&bind_body,
|
||||
gimple_build_assign (first, build_one_cst (rettype)));
|
||||
tree llabel = create_artificial_label (loc);
|
||||
gimple_seq_add_stmt (&bind_body, gimple_build_label (llabel));
|
||||
gimple *call
|
||||
= gimple_build_call (decl, 4, num_teams_lower, num_teams, thread_limit,
|
||||
first);
|
||||
gimple_set_location (call, loc);
|
||||
tree temp = create_tmp_var (rettype);
|
||||
gimple_call_set_lhs (call, temp);
|
||||
gimple_seq_add_stmt (&bind_body, call);
|
||||
|
||||
tree tlabel = create_artificial_label (loc);
|
||||
tree flabel = create_artificial_label (loc);
|
||||
gimple *cond = gimple_build_cond (NE_EXPR, temp, build_zero_cst (rettype),
|
||||
tlabel, flabel);
|
||||
gimple_seq_add_stmt (&bind_body, cond);
|
||||
gimple_seq_add_stmt (&bind_body, gimple_build_label (tlabel));
|
||||
gimple_seq_add_stmt (&bind_body,
|
||||
gimple_build_assign (first, build_zero_cst (rettype)));
|
||||
|
||||
lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
|
||||
&bind_body, &dlist, ctx, NULL);
|
||||
|
@ -13929,17 +13963,13 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
|
|||
NULL, ctx);
|
||||
gimple_seq_add_stmt (&bind_body, teams_stmt);
|
||||
|
||||
location_t loc = gimple_location (teams_stmt);
|
||||
tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
|
||||
gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
|
||||
gimple_set_location (call, loc);
|
||||
gimple_seq_add_stmt (&bind_body, call);
|
||||
|
||||
gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
|
||||
gimple_omp_set_body (teams_stmt, NULL);
|
||||
gimple_seq_add_seq (&bind_body, olist);
|
||||
gimple_seq_add_seq (&bind_body, dlist);
|
||||
gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
|
||||
gimple_seq_add_stmt (&bind_body, gimple_build_goto (llabel));
|
||||
gimple_seq_add_stmt (&bind_body, gimple_build_label (flabel));
|
||||
gimple_bind_set_body (bind, bind_body);
|
||||
|
||||
pop_gimplify_context (bind);
|
||||
|
|
|
@ -26,9 +26,12 @@
|
|||
#include "libgomp.h"
|
||||
#include <limits.h>
|
||||
|
||||
void
|
||||
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
||||
bool
|
||||
GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
|
||||
unsigned int thread_limit, bool first)
|
||||
{
|
||||
if (!first)
|
||||
return false;
|
||||
if (thread_limit)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
|
@ -38,14 +41,15 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
|||
unsigned int num_workgroups, workgroup_id;
|
||||
num_workgroups = __builtin_gcn_dim_size (0);
|
||||
workgroup_id = __builtin_gcn_dim_pos (0);
|
||||
if (!num_teams || num_teams >= num_workgroups)
|
||||
num_teams = num_workgroups;
|
||||
else if (workgroup_id >= num_teams)
|
||||
{
|
||||
gomp_free_thread (gcn_thrs ());
|
||||
exit (0);
|
||||
}
|
||||
gomp_num_teams_var = num_teams - 1;
|
||||
/* FIXME: If num_teams_lower > num_workgroups, we want to loop
|
||||
multiple times at least for some workgroups. */
|
||||
(void) num_teams_lower;
|
||||
if (!num_teams_upper || num_teams_upper >= num_workgroups)
|
||||
num_teams_upper = num_workgroups;
|
||||
else if (workgroup_id >= num_teams_upper)
|
||||
return false;
|
||||
gomp_num_teams_var = num_teams_upper - 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -26,9 +26,12 @@
|
|||
#include "libgomp.h"
|
||||
#include <limits.h>
|
||||
|
||||
void
|
||||
GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
||||
bool
|
||||
GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
|
||||
unsigned int thread_limit, bool first)
|
||||
{
|
||||
if (!first)
|
||||
return false;
|
||||
if (thread_limit)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
|
@ -38,14 +41,15 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
|||
unsigned int num_blocks, block_id;
|
||||
asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
|
||||
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
|
||||
if (!num_teams || num_teams >= num_blocks)
|
||||
num_teams = num_blocks;
|
||||
else if (block_id >= num_teams)
|
||||
{
|
||||
gomp_free_thread (nvptx_thrs);
|
||||
asm ("exit;");
|
||||
}
|
||||
gomp_num_teams_var = num_teams - 1;
|
||||
/* FIXME: If num_teams_lower > num_blocks, we want to loop multiple
|
||||
times for some CTAs. */
|
||||
(void) num_teams_lower;
|
||||
if (!num_teams_upper || num_teams_upper >= num_blocks)
|
||||
num_teams_upper = num_blocks;
|
||||
else if (block_id >= num_teams_upper)
|
||||
return false;
|
||||
gomp_num_teams_var = num_teams_upper - 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -399,6 +399,7 @@ GOMP_5.1 {
|
|||
GOMP_error;
|
||||
GOMP_scope_start;
|
||||
GOMP_warning;
|
||||
GOMP_teams4;
|
||||
} GOMP_5.0.1;
|
||||
|
||||
OACC_2.0 {
|
||||
|
|
|
@ -355,6 +355,7 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *,
|
|||
unsigned short *, unsigned int,
|
||||
void **);
|
||||
extern void GOMP_teams (unsigned int, unsigned int);
|
||||
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
|
||||
|
||||
/* teams.c */
|
||||
|
||||
|
|
|
@ -3088,6 +3088,32 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
|
|||
(void) num_teams;
|
||||
}
|
||||
|
||||
bool
|
||||
GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
|
||||
unsigned int thread_limit, bool first)
|
||||
{
|
||||
struct gomp_thread *thr = gomp_thread ();
|
||||
if (first)
|
||||
{
|
||||
if (thread_limit)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
icv->thread_limit_var
|
||||
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
||||
}
|
||||
(void) num_teams_high;
|
||||
if (num_teams_low == 0)
|
||||
num_teams_low = 1;
|
||||
thr->num_teams = num_teams_low - 1;
|
||||
thr->team_num = 0;
|
||||
}
|
||||
else if (thr->team_num == thr->num_teams)
|
||||
return false;
|
||||
else
|
||||
++thr->team_num;
|
||||
return true;
|
||||
}
|
||||
|
||||
void *
|
||||
omp_target_alloc (size_t size, int device_num)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int
|
||||
foo ()
|
||||
{
|
||||
return 934;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int a[934] = {};
|
||||
int k, e;
|
||||
#pragma omp target map(a)
|
||||
#pragma omp teams num_teams (foo ())
|
||||
{
|
||||
int i = omp_get_team_num ();
|
||||
if (omp_get_num_teams () != 934
|
||||
|| (unsigned) i >= 934U
|
||||
|| a[i] != 0)
|
||||
abort ();
|
||||
++a[i];
|
||||
}
|
||||
#pragma omp target map(a)
|
||||
#pragma omp teams num_teams (foo () - 50 : foo ())
|
||||
{
|
||||
int i = omp_get_team_num ();
|
||||
int j = omp_get_num_teams ();
|
||||
if (j < 884
|
||||
|| j > 934
|
||||
|| (unsigned) i >= (unsigned) j
|
||||
|| a[i] != 1)
|
||||
abort ();
|
||||
++a[i];
|
||||
}
|
||||
#pragma omp target teams map(a) num_teams (foo () / 2)
|
||||
{
|
||||
int i = omp_get_team_num ();
|
||||
if (omp_get_num_teams () != 467
|
||||
|| (unsigned) i >= 467U
|
||||
|| a[i] != 2)
|
||||
abort ();
|
||||
++a[i];
|
||||
}
|
||||
#pragma omp target teams map(a) num_teams (foo () / 2 - 50 : foo () / 2)
|
||||
{
|
||||
int i = omp_get_team_num ();
|
||||
int j = omp_get_num_teams ();
|
||||
if (j < 417
|
||||
|| j > 467
|
||||
|| (unsigned) i >= (unsigned) j
|
||||
|| a[i] != 3)
|
||||
abort ();
|
||||
++a[i];
|
||||
}
|
||||
e = 4;
|
||||
for (k = 0; k < 934; k++)
|
||||
{
|
||||
if (k >= 417 && k < 467 && a[k] == 3)
|
||||
e = 3;
|
||||
else if (k == 467)
|
||||
e = 2;
|
||||
else if (k >= 884 && a[k] == 1)
|
||||
e = 1;
|
||||
if (a[k] != e)
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -20,7 +20,7 @@ main ()
|
|||
#pragma omp parallel if (0)
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams (2)
|
||||
if (omp_get_num_teams () > 2
|
||||
if (omp_get_num_teams () != 2
|
||||
|| (unsigned) omp_get_team_num () >= 2U)
|
||||
abort ();
|
||||
if (omp_get_num_teams () != 4 || (unsigned) team >= 4U)
|
||||
|
|
Loading…
Reference in New Issue