libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound
Here is a PTX implementation of what I was talking about, that for num_teams_upper 0 or whenever num_teams_lower <= num_blocks, the current implementation is fine but if the user explicitly asks for more teams than we can provide in hardware, we need to stop assuming that omp_get_team_num () is equal to the hw team id, but instead need to use some team specific memory (it is .shared for PTX), or if none is provided, array indexed by the hw team id and run some teams serially within the same hw thread. 2021-11-15 Jakub Jelinek <jakub@redhat.com> * config/nvptx/team.c (__gomp_team_num): Define as __attribute__((shared)) var. (gomp_nvptx_main): Initialize __gomp_team_num to 0. * config/nvptx/target.c (__gomp_team_num): Declare as extern __attribute__((shared)) var. (GOMP_teams4): Use __gomp_team_num as the team number instead of %ctaid.x. If first, initialize it to %ctaid.x. If num_teams_lower is bigger than num_blocks, use num_teams_lower teams and arrange for bumping of __gomp_team_num if !first and returning false once we run out of teams. * config/nvptx/teams.c (__gomp_team_num): Declare as extern __attribute__((shared)) var. (omp_get_team_num): Return __gomp_team_num value instead of %ctaid.x.
This commit is contained in:
parent
d294459720
commit
9fa72756d9
@ -26,28 +26,41 @@
|
||||
#include "libgomp.h"
|
||||
#include <limits.h>
|
||||
|
||||
extern int __gomp_team_num __attribute__((shared));
|
||||
|
||||
bool
|
||||
GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
|
||||
unsigned int thread_limit, bool first)
|
||||
{
|
||||
unsigned int num_blocks, block_id;
|
||||
asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
|
||||
if (!first)
|
||||
return false;
|
||||
{
|
||||
unsigned int team_num;
|
||||
if (num_blocks > gomp_num_teams_var)
|
||||
return false;
|
||||
team_num = __gomp_team_num;
|
||||
if (team_num > gomp_num_teams_var - num_blocks)
|
||||
return false;
|
||||
__gomp_team_num = team_num + num_blocks;
|
||||
return true;
|
||||
}
|
||||
if (thread_limit)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
icv->thread_limit_var
|
||||
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
|
||||
}
|
||||
unsigned int num_blocks, block_id;
|
||||
asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
|
||||
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
|
||||
/* FIXME: If num_teams_lower > num_blocks, we want to loop multiple
|
||||
times for some CTAs. */
|
||||
(void) num_teams_lower;
|
||||
if (!num_teams_upper || num_teams_upper >= num_blocks)
|
||||
if (!num_teams_upper)
|
||||
num_teams_upper = num_blocks;
|
||||
else if (block_id >= num_teams_upper)
|
||||
else if (num_blocks < num_teams_lower)
|
||||
num_teams_upper = num_teams_lower;
|
||||
else if (num_blocks < num_teams_upper)
|
||||
num_teams_upper = num_blocks;
|
||||
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
|
||||
if (block_id >= num_teams_upper)
|
||||
return false;
|
||||
__gomp_team_num = block_id;
|
||||
gomp_num_teams_var = num_teams_upper - 1;
|
||||
return true;
|
||||
}
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <string.h>
|
||||
|
||||
struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
|
||||
int __gomp_team_num __attribute__((shared,nocommon));
|
||||
|
||||
static void gomp_thread_start (struct gomp_thread_pool *);
|
||||
|
||||
@ -57,6 +58,7 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
|
||||
/* Starting additional threads is not supported. */
|
||||
gomp_global_icv.dyn_var = true;
|
||||
|
||||
__gomp_team_num = 0;
|
||||
nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
|
||||
memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
|
||||
|
||||
|
@ -28,6 +28,8 @@
|
||||
|
||||
#include "libgomp.h"
|
||||
|
||||
extern int __gomp_team_num __attribute__((shared));
|
||||
|
||||
void
|
||||
GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
|
||||
unsigned int thread_limit, unsigned int flags)
|
||||
@ -48,9 +50,7 @@ omp_get_num_teams (void)
|
||||
int
|
||||
omp_get_team_num (void)
|
||||
{
|
||||
int ctaid;
|
||||
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
|
||||
return ctaid;
|
||||
return __gomp_team_num;
|
||||
}
|
||||
|
||||
ialias (omp_get_num_teams)
|
||||
|
Loading…
x
Reference in New Issue
Block a user