From fa4107fcf1766a78d5a43bd0d6075564f94d9e74 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 15 Nov 2021 13:20:53 +0100 Subject: [PATCH] openmp: Add support for thread_limit clause on target OpenMP 5.1 says that thread_limit clause can also appear on target, and similarly to teams should affect the thread-limit-var ICV. On combined target teams, the clause goes to both. We actually passed thread_limit internally on target already before, but only used it for gcn/ptx offloading to hint how many threads should be created and for ptx didn't set thread_limit_var in that case. Similarly for host fallback. Also, I found that we weren't copying the args array that contains encoded thread_limit and num_teams clause for target (etc.) for async target. 2021-11-15 Jakub Jelinek gcc/ * gimplify.c (optimize_target_teams): Only add OMP_CLAUSE_THREAD_LIMIT to OMP_TARGET_CLAUSES if it isn't there already. gcc/c-family/ * c-omp.c (c_omp_split_clauses) : Duplicate to both OMP_TARGET and OMP_TEAMS. gcc/c/ * c-parser.c (OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_THREAD_LIMIT. gcc/cp/ * parser.c (OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_THREAD_LIMIT. libgomp/ * task.c (gomp_create_target_task): Copy args array as well. * target.c (gomp_target_fallback): Add args argument. Set gomp_icv (true)->thread_limit_var if thread_limit is present. (GOMP_target): Adjust gomp_target_fallback caller. (GOMP_target_ext): Likewise. (gomp_target_task_fn): Likewise. * config/nvptx/team.c (gomp_nvptx_main): Set gomp_global_icv.thread_limit_var. * testsuite/libgomp.c-c++-common/thread-limit-1.c: New test. (cherry picked from commit aea72386831c0c5672f55983034cc709b968daea) --- gcc/ChangeLog.omp | 8 ++++++ gcc/c-family/ChangeLog.omp | 8 ++++++ gcc/c-family/c-omp.c | 25 ++++++++++++++++- gcc/c/ChangeLog.omp | 8 ++++++ gcc/c/c-parser.c | 1 + gcc/cp/ChangeLog.omp | 8 ++++++ gcc/cp/parser.c | 1 + gcc/gimplify.c | 11 +++++--- libgomp/ChangeLog.omp | 15 ++++++++++ libgomp/config/nvptx/team.c | 1 + libgomp/target.c | 28 ++++++++++++++++--- libgomp/task.c | 26 +++++++++++++++-- .../libgomp.c-c++-common/thread-limit-1.c | 23 +++++++++++++++ 13 files changed, 152 insertions(+), 11 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 33c521b3876..a8e810ee746 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,11 @@ +2022-02-27 Tobias Burnus + + Backported from master: + 2021-11-15 Jakub Jelinek + + * gimplify.c (optimize_target_teams): Only add OMP_CLAUSE_THREAD_LIMIT + to OMP_TARGET_CLAUSES if it isn't there already. + 2022-02-27 Tobias Burnus Backported from master: diff --git a/gcc/c-family/ChangeLog.omp b/gcc/c-family/ChangeLog.omp index 5290e72bd07..3828d6caab7 100644 --- a/gcc/c-family/ChangeLog.omp +++ b/gcc/c-family/ChangeLog.omp @@ -1,3 +1,11 @@ +2022-02-27 Tobias Burnus + + Backported from master: + 2021-11-15 Jakub Jelinek + + * c-omp.c (c_omp_split_clauses) : + Duplicate to both OMP_TARGET and OMP_TEAMS. + 2022-01-25 Kwok Cheung Yeung * c-omp.c (c_omp_expand_metadirective_r): New. diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c index 920e9847784..fed909e1da3 100644 --- a/gcc/c-family/c-omp.c +++ b/gcc/c-family/c-omp.c @@ -2012,7 +2012,6 @@ c_omp_split_clauses (location_t loc, enum tree_code code, s = C_OMP_CLAUSE_SPLIT_TARGET; break; case OMP_CLAUSE_NUM_TEAMS: - case OMP_CLAUSE_THREAD_LIMIT: s = C_OMP_CLAUSE_SPLIT_TEAMS; break; case OMP_CLAUSE_DIST_SCHEDULE: @@ -2676,6 +2675,30 @@ c_omp_split_clauses (location_t loc, enum tree_code code, else s = C_OMP_CLAUSE_SPLIT_FOR; break; + /* thread_limit is allowed on target and teams. Distribute it + to all. */ + case OMP_CLAUSE_THREAD_LIMIT: + if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_MAP)) + != 0) + { + if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS)) + != 0) + { + c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses), + OMP_CLAUSE_THREAD_LIMIT); + OMP_CLAUSE_THREAD_LIMIT_EXPR (c) + = OMP_CLAUSE_THREAD_LIMIT_EXPR (clauses); + OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_TARGET]; + cclauses[C_OMP_CLAUSE_SPLIT_TARGET] = c; + } + else + { + s = C_OMP_CLAUSE_SPLIT_TARGET; + break; + } + } + s = C_OMP_CLAUSE_SPLIT_TEAMS; + break; /* Allocate clause is allowed on target, teams, distribute, parallel, for, sections and taskloop. Distribute it to all. */ case OMP_CLAUSE_ALLOCATE: diff --git a/gcc/c/ChangeLog.omp b/gcc/c/ChangeLog.omp index 919a4733299..3838d18e571 100644 --- a/gcc/c/ChangeLog.omp +++ b/gcc/c/ChangeLog.omp @@ -1,3 +1,11 @@ +2022-02-27 Tobias Burnus + + Backported from master: + 2021-11-15 Jakub Jelinek + + * c-parser.c (OMP_TARGET_CLAUSE_MASK): Add + PRAGMA_OMP_CLAUSE_THREAD_LIMIT. + 2022-02-27 Tobias Burnus Backported from master: diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 466435a62b2..1dab35b19ad 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -21020,6 +21020,7 @@ c_parser_omp_target_exit_data (location_t loc, c_parser *parser, | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)) static bool diff --git a/gcc/cp/ChangeLog.omp b/gcc/cp/ChangeLog.omp index 0b041e834f7..79221b0eece 100644 --- a/gcc/cp/ChangeLog.omp +++ b/gcc/cp/ChangeLog.omp @@ -1,3 +1,11 @@ +2022-02-27 Tobias Burnus + + Backported from master: + 2021-11-15 Jakub Jelinek + + * parser.c (OMP_TARGET_CLAUSE_MASK): Add + PRAGMA_OMP_CLAUSE_THREAD_LIMIT. + 2022-02-27 Tobias Burnus Backported from master: diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 368bcb39f23..15aa72ea6ad 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -43810,6 +43810,7 @@ cp_parser_omp_target_update (cp_parser *parser, cp_token *pragma_tok, | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)) static bool diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 2af3c10e76b..bafa3f1312f 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -14450,10 +14450,13 @@ optimize_target_teams (tree target, gimple_seq *pre_p) if (!DECL_P (expr) && TREE_CODE (expr) != TARGET_EXPR) OMP_CLAUSE_OPERAND (c, 0) = *p; } - c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT); - OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit; - OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target); - OMP_TARGET_CLAUSES (target) = c; + if (!omp_find_clause (OMP_TARGET_CLAUSES (target), OMP_CLAUSE_THREAD_LIMIT)) + { + c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT); + OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit; + OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target); + OMP_TARGET_CLAUSES (target) = c; + } c = build_omp_clause (num_teams_loc, OMP_CLAUSE_NUM_TEAMS); OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c) = num_teams_upper; OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (c) = num_teams_lower; diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index b8ebc9bde3f..3149d78027b 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,18 @@ +2022-02-27 Tobias Burnus + + Backported from master: + 2021-11-15 Jakub Jelinek + + * task.c (gomp_create_target_task): Copy args array as well. + * target.c (gomp_target_fallback): Add args argument. + Set gomp_icv (true)->thread_limit_var if thread_limit is present. + (GOMP_target): Adjust gomp_target_fallback caller. + (GOMP_target_ext): Likewise. + (gomp_target_task_fn): Likewise. + * config/nvptx/team.c (gomp_nvptx_main): Set + gomp_global_icv.thread_limit_var. + * testsuite/libgomp.c-c++-common/thread-limit-1.c: New test.2022-01-25 Kwok Cheung Yeung + 2022-02-27 Tobias Burnus Backported from master: diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index 9ae7a470a19..ae3cccc206c 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -58,6 +58,7 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) if (tid == 0) { gomp_global_icv.nthreads_var = ntids; + gomp_global_icv.thread_limit_var = ntids; /* Starting additional threads is not supported. */ gomp_global_icv.dyn_var = true; diff --git a/libgomp/target.c b/libgomp/target.c index fd2a68e53e5..ef30e6a2963 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -2589,7 +2589,7 @@ gomp_unload_device (struct gomp_device_descr *devicep) static void gomp_target_fallback (void (*fn) (void *), void **hostaddrs, - struct gomp_device_descr *devicep) + struct gomp_device_descr *devicep, void **args) { struct gomp_thread old_thr, *thr = gomp_thread (); @@ -2605,6 +2605,25 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs, thr->place = old_thr.place; thr->ts.place_partition_len = gomp_places_list_len; } + if (args) + while (*args) + { + intptr_t id = (intptr_t) *args++, val; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + val = (intptr_t) *args++; + else + val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; + if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) + continue; + id &= GOMP_TARGET_ARG_ID_MASK; + if (id != GOMP_TARGET_ARG_THREAD_LIMIT) + continue; + val = val > INT_MAX ? INT_MAX : val; + if (val) + gomp_icv (true)->thread_limit_var = val; + break; + } + fn (hostaddrs); gomp_free_thread (thr); *thr = old_thr; @@ -2705,7 +2724,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, /* All shared memory devices should use the GOMP_target_ext function. */ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) - return gomp_target_fallback (fn, hostaddrs, devicep); + return gomp_target_fallback (fn, hostaddrs, devicep, NULL); htab_t refcount_set = htab_create (mapnum); struct target_mem_desc *tgt_vars @@ -2844,7 +2863,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_align, tgt_size); } } - gomp_target_fallback (fn, hostaddrs, devicep); + gomp_target_fallback (fn, hostaddrs, devicep, args); return; } @@ -3299,7 +3318,8 @@ gomp_target_task_fn (void *data) || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { ttask->state = GOMP_TARGET_TASK_FALLBACK; - gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep); + gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep, + ttask->args); return false; } diff --git a/libgomp/task.c b/libgomp/task.c index feb4796a3ac..414ca6e89ae 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -745,6 +745,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep, size_t depend_size = 0; uintptr_t depend_cnt = 0; size_t tgt_align = 0, tgt_size = 0; + uintptr_t args_cnt = 0; if (depend != NULL) { @@ -769,10 +770,22 @@ gomp_create_target_task (struct gomp_device_descr *devicep, tgt_size += tgt_align - 1; else tgt_size = 0; + if (args) + { + void **cargs = args; + while (*cargs) + { + intptr_t id = (intptr_t) *cargs++; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + cargs++; + } + args_cnt = cargs + 1 - args; + } } task = gomp_malloc (sizeof (*task) + depend_size + sizeof (*ttask) + + args_cnt * sizeof (void *) + mapnum * (sizeof (void *) + sizeof (size_t) + sizeof (unsigned short)) + tgt_size); @@ -785,9 +798,18 @@ gomp_create_target_task (struct gomp_device_descr *devicep, ttask->devicep = devicep; ttask->fn = fn; ttask->mapnum = mapnum; - ttask->args = args; memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); - ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; + if (args_cnt) + { + ttask->args = (void **) &ttask->hostaddrs[mapnum]; + memcpy (ttask->args, args, args_cnt * sizeof (void *)); + ttask->sizes = (size_t *) &ttask->args[args_cnt]; + } + else + { + ttask->args = args; + ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; + } memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); diff --git a/libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c b/libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c new file mode 100644 index 00000000000..cac220246d8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c @@ -0,0 +1,23 @@ +#include +#include + +void +foo () +{ + { + #pragma omp target parallel nowait thread_limit (4) num_threads (1) + if (omp_get_thread_limit () > 4) + abort (); + } + #pragma omp taskwait +} + +int +main () +{ + #pragma omp target thread_limit (6) + if (omp_get_thread_limit () > 6) + abort (); + foo (); + return 0; +}