7619d33471
This patch adds support for in_reduction clause on target construct, though for now only for synchronous targets (without nowait clause). The encountering thread in that case runs the target task and blocks until the target region ends, so it is implemented by remapping it before entering the target, initializing the private copy if not yet initialized for the current thread and then using the remapped addresses for the mapping addresses. For nowait combined with in_reduction the patch contains a hack where the nowait clause is ignored. To implement it correctly, I think we would need to create a new private variable for the in_reduction and initialize it before doing the async target and adjust the map addresses to that private variable and then pass a function pointer to the library routine with code where the callback would remap the address to the current threads private variable and use in_reduction combiner to combine the private variable we've created into the thread's copy. The library would then need to make sure that the routine is called in some thread participating in the parallel (and not in an unshackeled thread). 2021-06-24 Jakub Jelinek <jakub@redhat.com> gcc/ * tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): Document meaning for OpenMP. * gimplify.c (gimplify_scan_omp_clauses): For OpenMP map clauses with OMP_CLAUSE_MAP_IN_REDUCTION flag partially defer gimplification of non-decl OMP_CLAUSE_DECL. For OMP_CLAUSE_IN_REDUCTION on OMP_TARGET user outer_ctx instead of ctx for placeholders and initializer/combiner gimplification. * omp-low.c (scan_sharing_clauses): Handle OMP_CLAUSE_MAP_IN_REDUCTION on target constructs. (lower_rec_input_clauses): Likewise. (lower_omp_target): Likewise. * omp-expand.c (expand_omp_target): Temporarily ignore nowait clause on target if in_reduction is present. gcc/c-family/ * c-common.h (enum c_omp_region_type): Add C_ORT_TARGET and C_ORT_OMP_TARGET. * c-omp.c (c_omp_split_clauses): For OMP_CLAUSE_IN_REDUCTION on combined target constructs also add map (always, tofrom:) clause. gcc/c/ * c-parser.c (omp_split_clauses): Pass C_ORT_OMP_TARGET instead of C_ORT_OMP for clauses on target construct. (OMP_TARGET_CLAUSE_MASK): Add in_reduction clause. (c_parser_omp_target): For non-combined target add map (always, tofrom:) clauses for OMP_CLAUSE_IN_REDUCTION. Pass C_ORT_OMP_TARGET to c_finish_omp_clauses. * c-typeck.c (handle_omp_array_sections): Adjust ort handling for addition of C_ORT_OMP_TARGET and simplify, mapping clauses are never present on C_ORT_*DECLARE_SIMD. (c_finish_omp_clauses): Likewise. Handle OMP_CLAUSE_IN_REDUCTION on C_ORT_OMP_TARGET, set OMP_CLAUSE_MAP_IN_REDUCTION on corresponding map clauses. gcc/cp/ * parser.c (cp_omp_split_clauses): Pass C_ORT_OMP_TARGET instead of C_ORT_OMP for clauses on target construct. (OMP_TARGET_CLAUSE_MASK): Add in_reduction clause. (cp_parser_omp_target): For non-combined target add map (always, tofrom:) clauses for OMP_CLAUSE_IN_REDUCTION. Pass C_ORT_OMP_TARGET to finish_omp_clauses. * semantics.c (handle_omp_array_sections_1): Adjust ort handling for addition of C_ORT_OMP_TARGET and simplify, mapping clauses are never present on C_ORT_*DECLARE_SIMD. (handle_omp_array_sections): Likewise. (finish_omp_clauses): Likewise. Handle OMP_CLAUSE_IN_REDUCTION on C_ORT_OMP_TARGET, set OMP_CLAUSE_MAP_IN_REDUCTION on corresponding map clauses. * pt.c (tsubst_expr): Pass C_ORT_OMP_TARGET instead of C_ORT_OMP for clauses on target construct. gcc/testsuite/ * c-c++-common/gomp/target-in-reduction-1.c: New test. * c-c++-common/gomp/clauses-1.c: Add in_reduction clauses on target or combined target constructs. libgomp/ * testsuite/libgomp.c-c++-common/target-in-reduction-1.c: New test. * testsuite/libgomp.c-c++-common/target-in-reduction-2.c: New test. * testsuite/libgomp.c++/target-in-reduction-1.C: New test. * testsuite/libgomp.c++/target-in-reduction-2.C: New test.
114 lines
2.3 KiB
C
114 lines
2.3 KiB
C
void
|
|
foo (int &x, int *&y, int n, int v)
|
|
{
|
|
int zu[3] = { 45, 46, 47 };
|
|
int uu[n], wu[n], i;
|
|
int (&z)[3] = zu;
|
|
int (&u)[n] = uu;
|
|
int (&w)[n] = wu;
|
|
for (i = 0; i < n; i++)
|
|
w[i] = u[i] = n + i;
|
|
#pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
|
|
{
|
|
#pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
|
|
{
|
|
x++;
|
|
y[0] += 2;
|
|
y[1] += 3;
|
|
z[1] += 4;
|
|
u[0] += 5;
|
|
w[1] += 6;
|
|
}
|
|
#pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
|
|
{
|
|
x += 4;
|
|
y[0] += 5;
|
|
y[1] += 6;
|
|
z[2] += 7;
|
|
u[1] += 8;
|
|
w[2] += 7;
|
|
}
|
|
#pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
|
|
{
|
|
x += 9;
|
|
y[0] += 10;
|
|
y[1] += 11;
|
|
z[1] += 12;
|
|
u[2] += 13;
|
|
w[1] += 14;
|
|
}
|
|
}
|
|
if (x != 56 || y[0] != 60 || y[1] != 64)
|
|
__builtin_abort ();
|
|
if (z[0] != 45 || z[1] != 62 || z[2] != 54)
|
|
__builtin_abort ();
|
|
if (u[0] != 8 || u[1] != 12 || u[2] != 18)
|
|
__builtin_abort ();
|
|
if (w[0] != 3 || w[1] != 24 || w[2] != 12)
|
|
__builtin_abort ();
|
|
}
|
|
|
|
void
|
|
bar (int &x, int *&y, int n, int v)
|
|
{
|
|
int zu[3] = { 45, 46, 47 };
|
|
int uu[n], wu[n], i;
|
|
int (&z)[3] = zu;
|
|
int (&u)[n] = uu;
|
|
int (&w)[n] = wu;
|
|
for (i = 0; i < n; i++)
|
|
w[i] = u[i] = n + i;
|
|
#pragma omp parallel master
|
|
#pragma omp taskgroup task_reduction (+: x, y[:2], z[1:2], u, w[1:v])
|
|
{
|
|
#pragma omp task in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
|
|
{
|
|
x++;
|
|
y[0] += 2;
|
|
y[1] += 3;
|
|
z[1] += 4;
|
|
u[0] += 5;
|
|
w[1] += 6;
|
|
}
|
|
#pragma omp target in_reduction (+: x, y[:2], z[1:2], u, w[1:v])
|
|
{
|
|
x += 4;
|
|
y[0] += 5;
|
|
y[1] += 6;
|
|
z[2] += 7;
|
|
u[1] += 8;
|
|
w[2] += 7;
|
|
}
|
|
#pragma omp target in_reduction (+: x, y[:v], z[1:v], u, w[1:2])
|
|
{
|
|
x += 9;
|
|
y[0] += 10;
|
|
y[1] += 11;
|
|
z[1] += 12;
|
|
u[2] += 13;
|
|
w[1] += 14;
|
|
}
|
|
}
|
|
if (x != 56 || y[0] != 77 || y[1] != 84)
|
|
__builtin_abort ();
|
|
if (z[0] != 45 || z[1] != 62 || z[2] != 54)
|
|
__builtin_abort ();
|
|
if (u[0] != 8 || u[1] != 12 || u[2] != 18)
|
|
__builtin_abort ();
|
|
if (w[0] != 3 || w[1] != 24 || w[2] != 12)
|
|
__builtin_abort ();
|
|
}
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int x = 42;
|
|
int yu[2] = { 43, 44 };
|
|
int *y = yu;
|
|
#pragma omp parallel master
|
|
foo (x, y, 3, 2);
|
|
x = 42;
|
|
bar (x, y, 3, 2);
|
|
return 0;
|
|
}
|