7d6da11fce
The following patch implements what I've been talking about earlier, honor that for explicit num_teams clause we create at least the lower-bound (if not specified, upper-bound) teams in the league. For host fallback, it still means we only have one thread doing all the teams, sequentially one after another. For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too will or might fail. For these offloads, I think it is ok to remove symbols no longer used from libgomp.a. If num_teams_lower is bigger than the provided num_blocks or num_workgroups, we should arrange for gomp_num_teams_var to be num_teams_lower - 1, stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num () and instead use for it some .shared var that GOMP_teams4 initializes to %ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first increment that by num_blocks or num_workgroups each time and only return false when we are above num_teams_lower. Any help with actually implementing this for the 2 architectures highly appreciated. 2021-11-12 Jakub Jelinek <jakub@redhat.com> gcc/ * omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove. (BUILT_IN_GOMP_TEAMS4): New. * builtin-types.def (BT_FN_VOID_UINT_UINT): Remove. (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New. * omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of GOMP_teams, pass to it also num_teams lower-bound expression or a dup of upper-bound if it is missing and a flag whether it is the first call or not. gcc/fortran/ * types.def (BT_FN_VOID_UINT_UINT): Remove. (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New. libgomp/ * libgomp_g.h (GOMP_teams4): Declare. * libgomp.map (GOMP_5.1): Export GOMP_teams4. * target.c (GOMP_teams4): New function. * config/nvptx/target.c (GOMP_teams): Remove. (GOMP_teams4): New function. * config/gcn/target.c (GOMP_teams): Remove. (GOMP_teams4): New function. * testsuite/libgomp.c/teams-4.c (main): Expect exactly 2 teams instead of <= 2. * testsuite/libgomp.c-c++-common/teams-2.c: New test.
71 lines
1.2 KiB
C
71 lines
1.2 KiB
C
#include <omp.h>
|
|
#include <stdlib.h>
|
|
|
|
int
|
|
foo ()
|
|
{
|
|
return 934;
|
|
}
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int a[934] = {};
|
|
int k, e;
|
|
#pragma omp target map(a)
|
|
#pragma omp teams num_teams (foo ())
|
|
{
|
|
int i = omp_get_team_num ();
|
|
if (omp_get_num_teams () != 934
|
|
|| (unsigned) i >= 934U
|
|
|| a[i] != 0)
|
|
abort ();
|
|
++a[i];
|
|
}
|
|
#pragma omp target map(a)
|
|
#pragma omp teams num_teams (foo () - 50 : foo ())
|
|
{
|
|
int i = omp_get_team_num ();
|
|
int j = omp_get_num_teams ();
|
|
if (j < 884
|
|
|| j > 934
|
|
|| (unsigned) i >= (unsigned) j
|
|
|| a[i] != 1)
|
|
abort ();
|
|
++a[i];
|
|
}
|
|
#pragma omp target teams map(a) num_teams (foo () / 2)
|
|
{
|
|
int i = omp_get_team_num ();
|
|
if (omp_get_num_teams () != 467
|
|
|| (unsigned) i >= 467U
|
|
|| a[i] != 2)
|
|
abort ();
|
|
++a[i];
|
|
}
|
|
#pragma omp target teams map(a) num_teams (foo () / 2 - 50 : foo () / 2)
|
|
{
|
|
int i = omp_get_team_num ();
|
|
int j = omp_get_num_teams ();
|
|
if (j < 417
|
|
|| j > 467
|
|
|| (unsigned) i >= (unsigned) j
|
|
|| a[i] != 3)
|
|
abort ();
|
|
++a[i];
|
|
}
|
|
e = 4;
|
|
for (k = 0; k < 934; k++)
|
|
{
|
|
if (k >= 417 && k < 467 && a[k] == 3)
|
|
e = 3;
|
|
else if (k == 467)
|
|
e = 2;
|
|
else if (k >= 884 && a[k] == 1)
|
|
e = 1;
|
|
if (a[k] != e)
|
|
abort ();
|
|
}
|
|
return 0;
|
|
}
|