From 2f6bb511d1003d31ec1213081b6c2514cc10f0f9 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 3 Jul 2019 07:03:58 +0200 Subject: [PATCH] tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause. * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause. * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of OMP_CLAUSE__CONDTEMP_ as range's upper bound. (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define. * tree.c (omp_clause_num_ops, omp_clause_code_name): Add OMP_CLAUSE__SCANTEMP_ entry. (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_. * tree-pretty-print.c (dump_omp_clause): Likewise. * tree-nested.c (convert_nonlocal_omp_clauses, convert_local_omp_clauses): Likewise. * omp-general.h (struct omp_for_data): Add have_scantemp and have_nonctrl_scantemp members. * omp-general.c (omp_extract_for_data): Initialize them. * omp-low.c (struct omp_context): Add scan_exclusive member. (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind result again with GF_OMP_FOR_KIND_MASK. Initialize also ctx->scan_exclusive. (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead of !ctx->scan_inclusive. (lower_rec_input_clauses): Simplify gimplification of dtors using gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing loops. Don't add barrier for reduction_omp_orig_ref if ctx->scan_??xclusive. (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive. (lower_omp_scan): Use ctx->scan_exclusive instead of !ctx->scan_inclusive. Handle worksharing loops with inscan reductions. Use new_vard != new_var instead of repeated omp_is_reference calls. (omp_find_scan, lower_omp_for_scan): New functions. (lower_omp_for): Call lower_omp_for_scan for worksharing loops with inscan reductions. * omp-expand.c (expand_omp_scantemp_alloc): New function. (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp and fd->have_scantemp. * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message. * c-c++-common/gomp/scan-5.c (foo): Likewise. * testsuite/libgomp.c++/scan-1.C: New test. * testsuite/libgomp.c++/scan-2.C: New test. * testsuite/libgomp.c++/scan-3.C: New test. * testsuite/libgomp.c++/scan-4.C: New test. * testsuite/libgomp.c++/scan-5.C: New test. * testsuite/libgomp.c++/scan-6.C: New test. * testsuite/libgomp.c++/scan-7.C: New test. * testsuite/libgomp.c++/scan-8.C: New test. * testsuite/libgomp.c/scan-1.c: New test. * testsuite/libgomp.c/scan-2.c: New test. * testsuite/libgomp.c/scan-3.c: New test. * testsuite/libgomp.c/scan-4.c: New test. * testsuite/libgomp.c/scan-5.c: New test. * testsuite/libgomp.c/scan-6.c: New test. * testsuite/libgomp.c/scan-7.c: New test. * testsuite/libgomp.c/scan-8.c: New test. From-SVN: r272958 --- gcc/ChangeLog | 37 + gcc/omp-expand.c | 292 ++++++- gcc/omp-general.c | 8 + gcc/omp-general.h | 2 +- gcc/omp-low.c | 939 ++++++++++++++++++++--- gcc/testsuite/ChangeLog | 3 + gcc/testsuite/c-c++-common/gomp/scan-3.c | 2 +- gcc/testsuite/c-c++-common/gomp/scan-5.c | 2 +- gcc/tree-core.h | 3 + gcc/tree-nested.c | 2 + gcc/tree-pretty-print.c | 3 + gcc/tree.c | 3 + gcc/tree.h | 13 +- libgomp/ChangeLog | 19 + libgomp/testsuite/libgomp.c++/scan-1.C | 151 ++++ libgomp/testsuite/libgomp.c++/scan-2.C | 116 +++ libgomp/testsuite/libgomp.c++/scan-3.C | 119 +++ libgomp/testsuite/libgomp.c++/scan-4.C | 150 ++++ libgomp/testsuite/libgomp.c++/scan-5.C | 158 ++++ libgomp/testsuite/libgomp.c++/scan-6.C | 120 +++ libgomp/testsuite/libgomp.c++/scan-7.C | 118 +++ libgomp/testsuite/libgomp.c++/scan-8.C | 150 ++++ libgomp/testsuite/libgomp.c/scan-1.c | 115 +++ libgomp/testsuite/libgomp.c/scan-2.c | 117 +++ libgomp/testsuite/libgomp.c/scan-3.c | 88 +++ libgomp/testsuite/libgomp.c/scan-4.c | 179 +++++ libgomp/testsuite/libgomp.c/scan-5.c | 115 +++ libgomp/testsuite/libgomp.c/scan-6.c | 117 +++ libgomp/testsuite/libgomp.c/scan-7.c | 86 +++ libgomp/testsuite/libgomp.c/scan-8.c | 179 +++++ 30 files changed, 3264 insertions(+), 142 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c++/scan-1.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-2.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-3.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-4.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-5.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-6.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-7.C create mode 100644 libgomp/testsuite/libgomp.c++/scan-8.C create mode 100644 libgomp/testsuite/libgomp.c/scan-1.c create mode 100644 libgomp/testsuite/libgomp.c/scan-2.c create mode 100644 libgomp/testsuite/libgomp.c/scan-3.c create mode 100644 libgomp/testsuite/libgomp.c/scan-4.c create mode 100644 libgomp/testsuite/libgomp.c/scan-5.c create mode 100644 libgomp/testsuite/libgomp.c/scan-6.c create mode 100644 libgomp/testsuite/libgomp.c/scan-7.c create mode 100644 libgomp/testsuite/libgomp.c/scan-8.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0f00fce6e38..2e7c74f5270 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,42 @@ 2019-07-03 Jakub Jelinek + * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ + clause. + * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of + OMP_CLAUSE__CONDTEMP_ as range's upper bound. + (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define. + * tree.c (omp_clause_num_ops, omp_clause_code_name): Add + OMP_CLAUSE__SCANTEMP_ entry. + (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_. + * tree-pretty-print.c (dump_omp_clause): Likewise. + * tree-nested.c (convert_nonlocal_omp_clauses, + convert_local_omp_clauses): Likewise. + * omp-general.h (struct omp_for_data): Add have_scantemp and + have_nonctrl_scantemp members. + * omp-general.c (omp_extract_for_data): Initialize them. + * omp-low.c (struct omp_context): Add scan_exclusive member. + (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind + result again with GF_OMP_FOR_KIND_MASK. Initialize also + ctx->scan_exclusive. + (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead + of !ctx->scan_inclusive. + (lower_rec_input_clauses): Simplify gimplification of dtors using + gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN + rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing + loops. Don't add barrier for reduction_omp_orig_ref if + ctx->scan_??xclusive. + (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive. + (lower_omp_scan): Use ctx->scan_exclusive instead + of !ctx->scan_inclusive. Handle worksharing loops with inscan + reductions. Use new_vard != new_var instead of repeated + omp_is_reference calls. + (omp_find_scan, lower_omp_for_scan): New functions. + (lower_omp_for): Call lower_omp_for_scan for worksharing loops with + inscan reductions. + * omp-expand.c (expand_omp_scantemp_alloc): New function. + (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp + and fd->have_scantemp. + * gimplify.c (gimplify_scan_omp_clauses): For inscan reductions on worksharing loop propagate it as shared clause to containing combined parallel. diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 6902425a3d6..9236b3665e4 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_region *region, } } +/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, + compute needed allocation size. If !ALLOC of team allocations, + if ALLOC of thread allocation. SZ is the initial needed size for + other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, + CNT number of elements of each array, for !ALLOC this is + omp_get_num_threads (), for ALLOC number of iterations handled by the + current thread. If PTR is non-NULL, it is the start of the allocation + and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ + clauses pointers to the corresponding arrays. */ + +static tree +expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, + unsigned HOST_WIDE_INT alloc_align, tree cnt, + gimple_stmt_iterator *gsi, bool alloc) +{ + tree eltsz = NULL_TREE; + unsigned HOST_WIDE_INT preval = 0; + if (ptr && sz) + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), + ptr, size_int (sz)); + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && !OMP_CLAUSE__SCANTEMP__CONTROL (c) + && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) + { + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); + unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) + { + unsigned HOST_WIDE_INT szl + = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); + szl = least_bit_hwi (szl); + if (szl) + al = MIN (al, szl); + } + if (ptr == NULL_TREE) + { + if (eltsz == NULL_TREE) + eltsz = TYPE_SIZE_UNIT (pointee_type); + else + eltsz = size_binop (PLUS_EXPR, eltsz, + TYPE_SIZE_UNIT (pointee_type)); + } + if (preval == 0 && al <= alloc_align) + { + unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; + sz += diff; + if (diff && ptr) + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), + ptr, size_int (diff)); + } + else if (al > preval) + { + if (ptr) + { + ptr = fold_convert (pointer_sized_int_node, ptr); + ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, + build_int_cst (pointer_sized_int_node, + al - 1)); + ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, + build_int_cst (pointer_sized_int_node, + -(HOST_WIDE_INT) al)); + ptr = fold_convert (ptr_type_node, ptr); + } + else + sz += al - 1; + } + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) + preval = al; + else + preval = 1; + if (ptr) + { + expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); + ptr = OMP_CLAUSE_DECL (c); + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, + size_binop (MULT_EXPR, cnt, + TYPE_SIZE_UNIT (pointee_type))); + } + } + + if (ptr == NULL_TREE) + { + eltsz = size_binop (MULT_EXPR, eltsz, cnt); + if (sz) + eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); + return eltsz; + } + else + return ptr; +} + /* A subroutine of expand_omp_for. Generate code for a parallel loop with static schedule and no specified chunk size. Given parameters: @@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, struct omp_for_data *fd, gimple *inner_stmt) { - tree n, q, s0, e0, e, t, tt, nthreads, threadid; + tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; tree type, itype, vmain, vback; basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; basic_block body_bb, cont_bb, collapse_bb = NULL; - basic_block fin_bb; + basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; + basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; gimple_stmt_iterator gsi, gsip; edge ep; bool broken_loop = region->cont == NULL; @@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); cond_var = OMP_CLAUSE_DECL (c); } - if (fd->have_reductemp || fd->have_pointer_condtemp) + if (fd->have_reductemp + || fd->have_pointer_condtemp + || fd->have_nonctrl_scantemp) { tree t1 = build_int_cst (long_integer_type_node, 0); tree t2 = build_int_cst (long_integer_type_node, 1); @@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct omp_region *region, gimple_stmt_iterator gsi2 = gsi_none (); gimple *g = NULL; tree mem = null_pointer_node, memv = NULL_TREE; + unsigned HOST_WIDE_INT condtemp_sz = 0; + unsigned HOST_WIDE_INT alloc_align = 0; if (fd->have_reductemp) { + gcc_assert (!fd->have_nonctrl_scantemp); tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); reductions = OMP_CLAUSE_DECL (c); gcc_assert (TREE_CODE (reductions) == SSA_NAME); @@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi2 = gsip; reductions = null_pointer_node; } - if (fd->have_pointer_condtemp) + if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) { - tree type = TREE_TYPE (condtemp); + tree type; + if (fd->have_pointer_condtemp) + type = TREE_TYPE (condtemp); + else + type = ptr_type_node; memv = create_tmp_var (type); TREE_ADDRESSABLE (memv) = 1; - unsigned HOST_WIDE_INT sz - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); - sz *= fd->lastprivate_conditional; - expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), - false); + unsigned HOST_WIDE_INT sz = 0; + tree size = NULL_TREE; + if (fd->have_pointer_condtemp) + { + sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); + sz *= fd->lastprivate_conditional; + condtemp_sz = sz; + } + if (fd->have_nonctrl_scantemp) + { + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + gimple *g = gimple_build_call (nthreads, 0); + nthreads = create_tmp_var (integer_type_node); + gimple_call_set_lhs (g, nthreads); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + nthreads = fold_convert (sizetype, nthreads); + alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); + size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, + alloc_align, nthreads, NULL, + false); + size = fold_convert (type, size); + } + else + size = build_int_cst (type, sz); + expand_omp_build_assign (&gsi2, memv, size, false); mem = build_fold_addr_expr (memv); } tree t @@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, true, GSI_SAME_STMT); if (fd->have_pointer_condtemp) expand_omp_build_assign (&gsi2, condtemp, memv, false); + if (fd->have_nonctrl_scantemp) + { + tree ptr = fd->have_pointer_condtemp ? condtemp : memv; + expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, + alloc_align, nthreads, &gsi2, false); + } if (fd->have_reductemp) { gsi_remove (&gsi2, true); @@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi = gsi_last_nondebug_bb (third_bb); gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + if (fd->have_nonctrl_scantemp) + { + tree clauses = gimple_omp_for_clauses (fd->for_stmt); + tree controlp = NULL_TREE, controlb = NULL_TREE; + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && OMP_CLAUSE__SCANTEMP__CONTROL (c)) + { + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) + controlb = OMP_CLAUSE_DECL (c); + else + controlp = OMP_CLAUSE_DECL (c); + if (controlb && controlp) + break; + } + gcc_assert (controlp && controlb); + tree cnt = create_tmp_var (sizetype); + gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); + tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, + alloc_align, cnt, NULL, true); + tree size = create_tmp_var (sizetype); + expand_omp_build_assign (&gsi, size, sz, false); + tree cmp = fold_build2 (GT_EXPR, boolean_type_node, + size, size_int (16384)); + expand_omp_build_assign (&gsi, controlb, cmp); + g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + fourth_bb = split_block (third_bb, g)->dest; + gsi = gsi_last_nondebug_bb (fourth_bb); + /* FIXME: Once we have allocators, this should use allocator. */ + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size); + gimple_call_set_lhs (g, controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, + &gsi, true); + gsi_prev (&gsi); + g = gsi_stmt (gsi); + fifth_bb = split_block (fourth_bb, g)->dest; + gsi = gsi_last_nondebug_bb (fifth_bb); + + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); + gimple_call_set_lhs (g, controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && OMP_CLAUSE__SCANTEMP__ALLOC (c)) + { + tree tmp = create_tmp_var (sizetype); + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); + g = gimple_build_assign (tmp, MULT_EXPR, cnt, + TYPE_SIZE_UNIT (pointee_type)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + g = gimple_build_call (alloca_decl, 2, tmp, + size_int (TYPE_ALIGN (pointee_type))); + gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } + + sixth_bb = split_block (fifth_bb, g)->dest; + gsi = gsi_last_nondebug_bb (sixth_bb); + } + t = build2 (MULT_EXPR, itype, q, threadid); t = build2 (PLUS_EXPR, itype, t, tt); s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); @@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) { t = gimple_omp_return_lhs (gsi_stmt (gsi)); - if (fd->have_reductemp || fd->have_pointer_condtemp) + if (fd->have_reductemp + || ((fd->have_pointer_condtemp || fd->have_scantemp) + && !fd->have_nonctrl_scantemp)) { tree fn; if (t) @@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct omp_region *region, gcall *g = gimple_build_call (fn, 0); gsi_insert_after (&gsi, g, GSI_SAME_STMT); } + if (fd->have_scantemp && !fd->have_nonctrl_scantemp) + { + tree clauses = gimple_omp_for_clauses (fd->for_stmt); + tree controlp = NULL_TREE, controlb = NULL_TREE; + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && OMP_CLAUSE__SCANTEMP__CONTROL (c)) + { + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) + controlb = OMP_CLAUSE_DECL (c); + else + controlp = OMP_CLAUSE_DECL (c); + if (controlb && controlp) + break; + } + gcc_assert (controlp && controlb); + gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + exit1_bb = split_block (exit_bb, g)->dest; + gsi = gsi_after_labels (exit1_bb); + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, + controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + exit2_bb = split_block (exit1_bb, g)->dest; + gsi = gsi_after_labels (exit2_bb); + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1, + controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + exit3_bb = split_block (exit2_bb, g)->dest; + gsi = gsi_after_labels (exit3_bb); + } gsi_remove (&gsi, true); /* Connect all the blocks. */ @@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct omp_region *region, ep = find_edge (entry_bb, second_bb); ep->flags = EDGE_TRUE_VALUE; ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); - find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; - find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; + if (fourth_bb) + { + ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (third_bb, fourth_bb); + ep->flags = EDGE_TRUE_VALUE; + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (fourth_bb, fifth_bb); + redirect_edge_and_branch (ep, sixth_bb); + } + else + sixth_bb = third_bb; + find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; + find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; + if (exit1_bb) + { + ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (exit_bb, exit1_bb); + ep->flags = EDGE_TRUE_VALUE; + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (exit1_bb, exit2_bb); + redirect_edge_and_branch (ep, exit3_bb); + } if (!broken_loop) { @@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct omp_region *region, set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); + if (fourth_bb) + { + set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); + set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); + } + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); set_immediate_dominator (CDI_DOMINATORS, body_bb, recompute_dominator (CDI_DOMINATORS, body_bb)); set_immediate_dominator (CDI_DOMINATORS, fin_bb, recompute_dominator (CDI_DOMINATORS, fin_bb)); + if (exit1_bb) + { + set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); + set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); + } struct loop *loop = body_bb->loop_father; if (loop != entry_bb->loop_father) diff --git a/gcc/omp-general.c b/gcc/omp-general.c index c7376666ca6..8086f9ac392 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -169,6 +169,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->have_ordered = false; fd->have_reductemp = false; fd->have_pointer_condtemp = false; + fd->have_scantemp = false; + fd->have_nonctrl_scantemp = false; fd->lastprivate_conditional = 0; fd->tiling = NULL_TREE; fd->collapse = 1; @@ -231,6 +233,12 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) fd->have_pointer_condtemp = true; break; + case OMP_CLAUSE__SCANTEMP_: + fd->have_scantemp = true; + if (!OMP_CLAUSE__SCANTEMP__ALLOC (t) + && !OMP_CLAUSE__SCANTEMP__CONTROL (t)) + fd->have_nonctrl_scantemp = true; + break; default: break; } diff --git a/gcc/omp-general.h b/gcc/omp-general.h index 6ce891f2e94..80d42aff3c8 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -63,7 +63,7 @@ struct omp_for_data int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */ int ordered; bool have_nowait, have_ordered, simd_schedule, have_reductemp; - bool have_pointer_condtemp; + bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp; int lastprivate_conditional; unsigned char sched_modifiers; enum omp_clause_schedule_kind sched_kind; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 6b1e6a8a624..98a9df583bc 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -144,6 +144,9 @@ struct omp_context /* True if there is nested scan context with inclusive clause. */ bool scan_inclusive; + + /* True if there is nested scan context with exclusive clause. */ + bool scan_exclusive; }; static splay_tree all_contexts; @@ -3316,8 +3319,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, break; case GIMPLE_OMP_FOR: - if (((gimple_omp_for_kind (as_a (stmt)) - & GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD) + if ((gimple_omp_for_kind (as_a (stmt)) + == GF_OMP_FOR_KIND_SIMD) && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf ()) scan_omp_simd (gsi, as_a (stmt), ctx); @@ -3335,8 +3338,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, case GIMPLE_OMP_SCAN: if (tree clauses = gimple_omp_scan_clauses (as_a (stmt))) - if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE) - ctx->scan_inclusive = true; + { + if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE) + ctx->scan_inclusive = true; + else if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_EXCLUSIVE) + ctx->scan_exclusive = true; + } /* FALLTHRU */ case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: @@ -3769,7 +3776,7 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, sctx->lastlane, NULL_TREE, NULL_TREE); TREE_THIS_NOTRAP (*rvar) = 1; - if (!ctx->scan_inclusive) + if (ctx->scan_exclusive) { /* And for exclusive scan yet another one, which will hold the value during the scan phase. */ @@ -3854,7 +3861,7 @@ static void lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, omp_context *ctx, struct omp_for_data *fd) { - tree c, dtor, copyin_seq, x, ptr; + tree c, copyin_seq, x, ptr; bool copyin_by_ref = false; bool lastprivate_firstprivate = false; bool reduction_omp_orig_ref = false; @@ -4541,12 +4548,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = lang_hooks.decls.omp_clause_dtor (c, build_simple_mem_ref (y2)); if (x) - { - gimple_seq tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); - } + gimplify_and_add (x, dlist); } } else @@ -4913,13 +4915,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, { y = lang_hooks.decls.omp_clause_dtor (c, ivar); if (y) - { - gimple_seq tseq = NULL; - - dtor = y; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (&llist[1], tseq); - } + gimplify_and_add (y, &llist[1]); } break; } @@ -4949,13 +4945,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, do_dtor: x = lang_hooks.decls.omp_clause_dtor (c, new_var); if (x) - { - gimple_seq tseq = NULL; - - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); - } + gimplify_and_add (x, dlist); break; case OMP_CLAUSE_LINEAR: @@ -5103,13 +5093,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_and_add (x, &llist[0]); x = lang_hooks.decls.omp_clause_dtor (c, ivar); if (x) - { - gimple_seq tseq = NULL; - - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (&llist[1], tseq); - } + gimplify_and_add (x, &llist[1]); break; } if (omp_is_reference (var)) @@ -5282,12 +5266,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = lang_hooks.decls.omp_clause_dtor (c, nv); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); - } + gimplify_and_add (x, dlist); } tree ref = build_outer_var_ref (var, ctx); @@ -5310,34 +5289,19 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = lang_hooks.decls.omp_clause_dtor (c, ivar); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (&llist[1], tseq); - } + gimplify_and_add (x, &llist[1]); tree ivar2 = unshare_expr (lvar); TREE_OPERAND (ivar2, 1) = sctx.idx; x = lang_hooks.decls.omp_clause_dtor (c, ivar2); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (&llist[1], tseq); - } + gimplify_and_add (x, &llist[1]); if (rvar2) { x = lang_hooks.decls.omp_clause_dtor (c, rvar2); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (&llist[1], tseq); - } + gimplify_and_add (x, &llist[1]); } break; } @@ -5362,12 +5326,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, build_fold_addr_expr (lvar)); x = lang_hooks.decls.omp_clause_dtor (c, ivar); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (&llist[1], tseq); - } + gimplify_and_add (x, &llist[1]); break; } /* If this is a reference to constant size reduction var @@ -5409,16 +5368,19 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, if (x) gimplify_and_add (x, ilist); - if (rvarp) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_INSCAN (c)) { - if (x) + if (x || (!is_simd + && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))) { tree nv = create_tmp_var_raw (TREE_TYPE (new_var)); gimple_add_tmp_var (nv); ctx->cb.decl_map->put (new_vard, nv); x = lang_hooks.decls.omp_clause_default_ctor (c, nv, build_outer_var_ref (var, ctx)); - gimplify_and_add (x, ilist); + if (x) + gimplify_and_add (x, ilist); if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) { tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); @@ -5433,7 +5395,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimple_seq_add_seq (ilist, tseq); } OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; - if (!ctx->scan_inclusive) + if (is_simd && ctx->scan_exclusive) { tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); @@ -5444,23 +5406,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_and_add (x, ilist); x = lang_hooks.decls.omp_clause_dtor (c, nv2); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); - } + gimplify_and_add (x, dlist); } x = lang_hooks.decls.omp_clause_dtor (c, nv); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); - } + gimplify_and_add (x, dlist); } - else if (!ctx->scan_inclusive + else if (is_simd + && ctx->scan_exclusive && TREE_ADDRESSABLE (TREE_TYPE (new_var))) { tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); @@ -5468,12 +5421,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ctx->cb.decl_map->put (new_vard, nv2); x = lang_hooks.decls.omp_clause_dtor (c, nv2); if (x) - { - tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); - } + gimplify_and_add (x, dlist); } DECL_HAS_VALUE_EXPR_P (placeholder) = 0; goto do_dtor; @@ -5611,7 +5559,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, { if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); - if (rvarp) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_INSCAN (c)) break; gimplify_assign (new_var, x, ilist); if (is_simd) @@ -5815,7 +5764,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, lastprivate clauses we need to ensure the lastprivate copying happens after firstprivate copying in all threads. And similarly for UDRs if initializer expression refers to omp_orig. */ - if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref) + if (copyin_by_ref || lastprivate_firstprivate + || (reduction_omp_orig_ref + && !ctx->scan_inclusive + && !ctx->scan_exclusive)) { /* Don't add any barrier for #pragma omp simd or #pragma omp distribute. */ @@ -6464,6 +6416,10 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) return; + /* inscan reductions are handled elsewhere. */ + if (ctx->scan_inclusive || ctx->scan_exclusive) + return; + /* First see if there is exactly one reduction clause. Use OMP_ATOMIC update in that case, otherwise use a lock. */ for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c)) @@ -8650,7 +8606,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq before = NULL; omp_context *octx = ctx->outer; gcc_assert (octx); - if (!octx->scan_inclusive && !has_clauses) + if (octx->scan_exclusive && !has_clauses) { gimple_stmt_iterator gsi2 = *gsi_p; gsi_next (&gsi2); @@ -8672,23 +8628,29 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) } bool input_phase = has_clauses ^ octx->scan_inclusive; - if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR - && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) - && !gimple_omp_for_combined_into_p (octx->stmt)) + bool is_simd = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) + && !gimple_omp_for_combined_into_p (octx->stmt)); + bool is_for = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (octx->stmt) == GF_OMP_FOR_KIND_FOR + && !gimple_omp_for_combined_p (octx->stmt)); + if (is_simd) + if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), + OMP_CLAUSE__SIMDUID_)) + { + tree uid = OMP_CLAUSE__SIMDUID__DECL (c); + lane = create_tmp_var (unsigned_type_node); + tree t = build_int_cst (integer_type_node, + input_phase ? 1 + : octx->scan_inclusive ? 2 : 3); + gimple *g + = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); + gimple_call_set_lhs (g, lane); + gimple_seq_add_stmt (&before, g); + } + + if (is_simd || is_for) { - if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), - OMP_CLAUSE__SIMDUID_)) - { - tree uid = OMP_CLAUSE__SIMDUID__DECL (c); - lane = create_tmp_var (unsigned_type_node); - tree t = build_int_cst (integer_type_node, - input_phase ? 1 - : octx->scan_inclusive ? 2 : 3); - gimple *g - = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); - gimple_call_set_lhs (g, lane); - gimple_seq_add_stmt (&before, g); - } for (tree c = gimple_omp_for_clauses (octx->stmt); c; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION @@ -8711,7 +8673,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (DECL_HAS_VALUE_EXPR_P (new_vard)) { val = DECL_VALUE_EXPR (new_vard); - if (omp_is_reference (var)) + if (new_vard != new_var) { gcc_assert (TREE_CODE (val) == ADDR_EXPR); val = TREE_OPERAND (val, 0); @@ -8727,7 +8689,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) lane0 = TREE_OPERAND (val, 1); TREE_OPERAND (val, 1) = lane; var2 = lookup_decl (v, octx); - if (!octx->scan_inclusive) + if (octx->scan_exclusive) var4 = lookup_decl (var2, octx); if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) @@ -8737,7 +8699,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) var2 = build4 (ARRAY_REF, TREE_TYPE (val), var2, lane, NULL_TREE, NULL_TREE); TREE_THIS_NOTRAP (var2) = 1; - if (!octx->scan_inclusive) + if (octx->scan_exclusive) { var4 = build4 (ARRAY_REF, TREE_TYPE (val), var4, lane, NULL_TREE, @@ -8759,7 +8721,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) var3 = maybe_lookup_decl (new_vard, octx); if (var3 == new_vard || var3 == NULL_TREE) var3 = NULL_TREE; - else if (!octx->scan_inclusive && !input_phase) + else if (is_simd && octx->scan_exclusive && !input_phase) { var4 = maybe_lookup_decl (var3, octx); if (var4 == var3 || var4 == NULL_TREE) @@ -8774,7 +8736,10 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } } - if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE) + if (is_simd + && octx->scan_exclusive + && !input_phase + && var4 == NULL_TREE) var4 = create_tmp_var (TREE_TYPE (val)); } if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) @@ -8794,12 +8759,14 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) { /* Otherwise, assign to it the identity element. */ gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); + if (is_for) + tseq = copy_gimple_seq_and_replace_locals (tseq); tree ref = build_outer_var_ref (var, octx); tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); if (x) { - if (omp_is_reference (var)) + if (new_vard != new_var) val = build_fold_addr_expr_loc (clause_loc, val); SET_DECL_VALUE_EXPR (new_vard, val); } @@ -8811,13 +8778,14 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); DECL_HAS_VALUE_EXPR_P (placeholder) = 0; gimple_seq_add_seq (&before, tseq); - OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + if (is_simd) + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; } } - else + else if (is_simd) { tree x; - if (!octx->scan_inclusive) + if (octx->scan_exclusive) { tree v4 = unshare_expr (var4); tree v2 = unshare_expr (var2); @@ -8828,7 +8796,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) x = (DECL_HAS_VALUE_EXPR_P (new_vard) ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); tree vexpr = val; - if (x && omp_is_reference (var)) + if (x && new_vard != new_var) vexpr = build_fold_addr_expr_loc (clause_loc, val); if (x) SET_DECL_VALUE_EXPR (new_vard, vexpr); @@ -8864,7 +8832,7 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree x = omp_reduction_init (c, TREE_TYPE (new_var)); gimplify_assign (val, x, &before); } - else + else if (is_simd) { /* scan phase. */ enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); @@ -8888,11 +8856,11 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } } - if (!octx->scan_inclusive && !input_phase && lane0) + if (octx->scan_exclusive && !input_phase && lane0) { tree vexpr = unshare_expr (var4); TREE_OPERAND (vexpr, 1) = lane0; - if (omp_is_reference (var)) + if (new_vard != new_var) vexpr = build_fold_addr_expr_loc (clause_loc, vexpr); SET_DECL_VALUE_EXPR (new_vard, vexpr); } @@ -8901,9 +8869,17 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (has_clauses) sorry_at (gimple_location (stmt), "%<#pragma omp scan%> not supported yet"); - gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); - gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT); - gsi_replace (gsi_p, gimple_build_nop (), true); + if (!is_for) + { + gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); + gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT); + gsi_replace (gsi_p, gimple_build_nop (), true); + } + else if (before) + { + gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt)); + gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT); + } } @@ -9124,6 +9100,712 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, } } +/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */ + +tree +omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, + struct walk_stmt_info *wi) +{ + gimple *stmt = gsi_stmt (*gsi_p); + + *handled_ops_p = true; + switch (gimple_code (stmt)) + { + WALK_SUBSTMTS; + + case GIMPLE_OMP_SCAN: + *(gimple_stmt_iterator *) (wi->info) = *gsi_p; + return integer_zero_node; + default: + break; + } + return NULL; +} + +/* Helper function for lower_omp_for, add transformations for a worksharing + loop with scan directives inside of it. + For worksharing loop not combined with simd, transform: + #pragma omp for reduction(inscan,+:r) private(i) + for (i = 0; i < n; i = i + 1) + { + { + update (r); + } + #pragma omp scan inclusive(r) + { + use (r); + } + } + + into two worksharing loops + code to merge results: + + num_threads = omp_get_num_threads (); + thread_num = omp_get_thread_num (); + if (thread_num == 0) goto ; else goto ; + : + var2 = r; + goto ; + : + // For UDRs this is UDR init, or if ctors are needed, copy from + // var3 that has been constructed to contain the neutral element. + var2 = 0; + : + ivar = 0; + // The _scantemp_ clauses will arrange for rpriva to be initialized to + // a shared array with num_threads elements and rprivb to a local array + // number of elements equal to the number of (contiguous) iterations the + // current thread will perform. controlb and controlp variables are + // temporaries to handle deallocation of rprivb at the end of second + // GOMP_FOR. + #pragma omp for _scantemp_(rpriva) _scantemp_(rprivb) _scantemp_(controlb) \ + _scantemp_(controlp) reduction(inscan,+:r) private(i) nowait + for (i = 0; i < n; i = i + 1) + { + { + // For UDRs this is UDR init or copy from var3. + r = 0; + // This is the input phase from user code. + update (r); + } + { + // For UDRs this is UDR merge. + var2 = var2 + r; + // Rather than handing it over to the user, save to local thread's + // array. + rprivb[ivar] = var2; + // For exclusive scan, the above two statements are swapped. + ivar = ivar + 1; + } + } + // And remember the final value from this thread's into the shared + // rpriva array. + rpriva[(sizetype) thread_num] = var2; + // If more than one thread, compute using Work-Efficient prefix sum + // the inclusive parallel scan of the rpriva array. + if (num_threads > 1) goto ; else goto ; + : + GOMP_barrier (); + down = 0; + k = 1; + num_threadsu = (unsigned int) num_threads; + thread_numup1 = (unsigned int) thread_num + 1; + : + twok = k << 1; + if (twok > num_threadsu) goto ; else goto ; + : + down = 4294967295; + k = k >> 1; + if (k == num_threadsu) goto ; else goto ; + : + k = k >> 1; + : + twok = k << 1; + cplx = .MUL_OVERFLOW (thread_nump1, twok); + mul = REALPART_EXPR ; + ovf = IMAGPART_EXPR ; + if (ovf == 0) goto ; else goto ; + : + andv = k & down; + andvm1 = andv + 4294967295; + l = mul + andvm1; + if (l < num_threadsu) goto ; else goto ; + : + // For UDRs this is UDR merge, performed using var2 variable as temporary, + // i.e. var2 = rpriva[l - k]; UDR merge (var2, rpriva[l]); rpriva[l] = var2; + rpriva[l] = rpriva[l - k] + rpriva[l]; + : + if (down == 0) goto ; else goto ; + : + k = k << 1; + goto ; + : + k = k >> 1; + : + GOMP_barrier (); + if (k != 0) goto ; else goto ; + : + if (thread_num == 0) goto ; else goto ; + : + // For UDRs this is UDR init or copy from var3. + var2 = 0; + goto ; + : + var2 = rpriva[thread_num - 1]; + : + ivar = 0; + #pragma omp for _scantemp_(controlb) _scantemp_(controlp) \ + reduction(inscan,+:r) private(i) + for (i = 0; i < n; i = i + 1) + { + { + // For UDRs, this is UDR merge (rprivb[ivar], var2); r = rprivb[ivar]; + r = rprivb[ivar] + var2; + } + { + // This is the scan phase from user code. + use (r); + // Plus a bump of the iterator. + ivar = ivar + 1; + } + } */ + +static void +lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt, + struct omp_for_data *fd, omp_context *ctx) +{ + gcc_assert (ctx->scan_inclusive || ctx->scan_exclusive); + + gimple_seq body = gimple_omp_body (stmt); + gimple_stmt_iterator input1_gsi = gsi_none (); + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.val_only = true; + wi.info = (void *) &input1_gsi; + walk_gimple_seq_mod (&body, omp_find_scan, NULL, &wi); + gcc_assert (!gsi_end_p (input1_gsi)); + + gimple *input_stmt1 = gsi_stmt (input1_gsi); + gimple_stmt_iterator gsi = input1_gsi; + gsi_next (&gsi); + gimple_stmt_iterator scan1_gsi = gsi; + gimple *scan_stmt1 = gsi_stmt (gsi); + gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN); + + gimple_seq input_body = gimple_omp_body (input_stmt1); + gimple_seq scan_body = gimple_omp_body (scan_stmt1); + gimple_omp_set_body (input_stmt1, NULL); + gimple_omp_set_body (scan_stmt1, NULL); + gimple_omp_set_body (stmt, NULL); + + gomp_for *new_stmt = as_a (gimple_copy (stmt)); + gimple_seq new_body = copy_gimple_seq_and_replace_locals (body); + gimple_omp_set_body (stmt, body); + gimple_omp_set_body (input_stmt1, input_body); + + gimple_stmt_iterator input2_gsi = gsi_none (); + memset (&wi, 0, sizeof (wi)); + wi.val_only = true; + wi.info = (void *) &input2_gsi; + walk_gimple_seq_mod (&new_body, omp_find_scan, NULL, &wi); + gcc_assert (!gsi_end_p (input2_gsi)); + + gimple *input_stmt2 = gsi_stmt (input2_gsi); + gsi = input2_gsi; + gsi_next (&gsi); + gimple_stmt_iterator scan2_gsi = gsi; + gimple *scan_stmt2 = gsi_stmt (gsi); + gcc_assert (scan_stmt2 && gimple_code (scan_stmt2) == GIMPLE_OMP_SCAN); + gimple_omp_set_body (scan_stmt2, scan_body); + + tree num_threads = create_tmp_var (integer_type_node); + tree thread_num = create_tmp_var (integer_type_node); + tree nthreads_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + tree threadnum_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + gimple *g = gimple_build_call (nthreads_decl, 0); + gimple_call_set_lhs (g, num_threads); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_call (threadnum_decl, 0); + gimple_call_set_lhs (g, thread_num); + gimple_seq_add_stmt (body_p, g); + + tree ivar = create_tmp_var (sizetype); + tree new_clauses1 = NULL_TREE, new_clauses2 = NULL_TREE; + tree *cp1 = &new_clauses1, *cp2 = &new_clauses2; + tree k = create_tmp_var (unsigned_type_node); + tree l = create_tmp_var (unsigned_type_node); + + gimple_seq clist = NULL, mdlist = NULL; + gimple_seq thr01_list = NULL, thrn1_list = NULL; + gimple_seq thr02_list = NULL, thrn2_list = NULL; + gimple_seq scan1_list = NULL, input2_list = NULL; + gimple_seq last_list = NULL, reduc_list = NULL; + for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_INSCAN (c)) + { + location_t clause_loc = OMP_CLAUSE_LOCATION (c); + tree var = OMP_CLAUSE_DECL (c); + tree new_var = lookup_decl (var, ctx); + tree var3 = NULL_TREE; + tree new_vard = new_var; + if (omp_is_reference (var)) + new_var = build_simple_mem_ref_loc (clause_loc, new_var); + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + { + var3 = maybe_lookup_decl (new_vard, ctx); + if (var3 == new_vard) + var3 = NULL_TREE; + } + + tree ptype = build_pointer_type (TREE_TYPE (new_var)); + tree rpriva = create_tmp_var (ptype); + tree nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_); + OMP_CLAUSE_DECL (nc) = rpriva; + *cp1 = nc; + cp1 = &OMP_CLAUSE_CHAIN (nc); + + tree rprivb = create_tmp_var (ptype); + nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_); + OMP_CLAUSE_DECL (nc) = rprivb; + OMP_CLAUSE__SCANTEMP__ALLOC (nc) = 1; + *cp1 = nc; + cp1 = &OMP_CLAUSE_CHAIN (nc); + + tree var2 = create_tmp_var_raw (TREE_TYPE (new_var)); + if (new_vard != new_var) + TREE_ADDRESSABLE (var2) = 1; + gimple_add_tmp_var (var2); + + tree x = fold_convert_loc (clause_loc, sizetype, thread_num); + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); + tree rpriva_ref = build_simple_mem_ref_loc (clause_loc, x); + + x = fold_build2_loc (clause_loc, PLUS_EXPR, integer_type_node, + thread_num, integer_minus_one_node); + x = fold_convert_loc (clause_loc, sizetype, x); + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); + tree rprivam1_ref = build_simple_mem_ref_loc (clause_loc, x); + + x = fold_convert_loc (clause_loc, sizetype, l); + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); + tree rprival_ref = build_simple_mem_ref_loc (clause_loc, x); + + x = fold_build2_loc (clause_loc, MINUS_EXPR, unsigned_type_node, l, k); + x = fold_convert_loc (clause_loc, sizetype, x); + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x, + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x); + tree rprivalmk_ref = build_simple_mem_ref_loc (clause_loc, x); + + x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, ivar, + TYPE_SIZE_UNIT (TREE_TYPE (ptype))); + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rprivb), rprivb, x); + tree rprivb_ref = build_simple_mem_ref_loc (clause_loc, x); + + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + { + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); + tree val = var2; + if (new_vard != new_var) + val = build_fold_addr_expr_loc (clause_loc, val); + + x = lang_hooks.decls.omp_clause_default_ctor + (c, var2, build_outer_var_ref (var, ctx)); + if (x) + gimplify_and_add (x, &clist); + + x = build_outer_var_ref (var, ctx); + x = lang_hooks.decls.omp_clause_assign_op (c, var2, x); + gimplify_and_add (x, &thr01_list); + + tree y = (DECL_HAS_VALUE_EXPR_P (new_vard) + ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); + if (var3) + { + x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3); + gimplify_and_add (x, &thrn1_list); + x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3); + gimplify_and_add (x, &thr02_list); + } + else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) + { + /* Otherwise, assign to it the identity element. */ + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); + tseq = copy_gimple_seq_and_replace_locals (tseq); + SET_DECL_VALUE_EXPR (new_vard, val); + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; + SET_DECL_VALUE_EXPR (placeholder, error_mark_node); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + lower_omp (&tseq, ctx); + gimple_seq_add_seq (&thrn1_list, tseq); + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); + lower_omp (&tseq, ctx); + gimple_seq_add_seq (&thr02_list, tseq); + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + if (y) + SET_DECL_VALUE_EXPR (new_vard, y); + else + { + DECL_HAS_VALUE_EXPR_P (new_vard) = 0; + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); + } + } + + x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivam1_ref); + gimplify_and_add (x, &thrn2_list); + + if (ctx->scan_exclusive) + { + x = unshare_expr (rprivb_ref); + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); + gimplify_and_add (x, &scan1_list); + } + + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); + tseq = copy_gimple_seq_and_replace_locals (tseq); + SET_DECL_VALUE_EXPR (placeholder, var2); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + lower_omp (&tseq, ctx); + gimple_seq_add_seq (&scan1_list, tseq); + + if (ctx->scan_inclusive) + { + x = unshare_expr (rprivb_ref); + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); + gimplify_and_add (x, &scan1_list); + } + + x = unshare_expr (rpriva_ref); + x = lang_hooks.decls.omp_clause_assign_op (c, x, var2); + gimplify_and_add (x, &mdlist); + + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); + tseq = copy_gimple_seq_and_replace_locals (tseq); + SET_DECL_VALUE_EXPR (new_vard, val); + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; + SET_DECL_VALUE_EXPR (placeholder, rprivb_ref); + lower_omp (&tseq, ctx); + if (y) + SET_DECL_VALUE_EXPR (new_vard, y); + else + { + DECL_HAS_VALUE_EXPR_P (new_vard) = 0; + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); + } + gimple_seq_add_seq (&input2_list, tseq); + + x = unshare_expr (new_var); + x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref); + gimplify_and_add (x, &input2_list); + + x = build_outer_var_ref (var, ctx); + x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref); + gimplify_and_add (x, &last_list); + + x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivalmk_ref); + gimplify_and_add (x, &reduc_list); + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); + tseq = copy_gimple_seq_and_replace_locals (tseq); + val = rprival_ref; + if (new_vard != new_var) + val = build_fold_addr_expr_loc (clause_loc, val); + SET_DECL_VALUE_EXPR (new_vard, val); + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; + SET_DECL_VALUE_EXPR (placeholder, var2); + lower_omp (&tseq, ctx); + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + if (y) + SET_DECL_VALUE_EXPR (new_vard, y); + else + { + DECL_HAS_VALUE_EXPR_P (new_vard) = 0; + SET_DECL_VALUE_EXPR (new_vard, NULL_TREE); + } + gimple_seq_add_seq (&reduc_list, tseq); + x = lang_hooks.decls.omp_clause_assign_op (c, rprival_ref, var2); + gimplify_and_add (x, &reduc_list); + + x = lang_hooks.decls.omp_clause_dtor (c, var2); + if (x) + gimplify_and_add (x, dlist); + } + else + { + x = build_outer_var_ref (var, ctx); + gimplify_assign (var2, x, &thr01_list); + + x = omp_reduction_init (c, TREE_TYPE (new_var)); + gimplify_assign (var2, unshare_expr (x), &thrn1_list); + gimplify_assign (var2, x, &thr02_list); + + gimplify_assign (var2, rprivam1_ref, &thrn2_list); + + enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); + if (code == MINUS_EXPR) + code = PLUS_EXPR; + + if (ctx->scan_exclusive) + gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list); + x = build2 (code, TREE_TYPE (new_var), var2, new_var); + gimplify_assign (var2, x, &scan1_list); + if (ctx->scan_inclusive) + gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list); + + gimplify_assign (unshare_expr (rpriva_ref), var2, &mdlist); + + x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2); + gimplify_assign (new_var, x, &input2_list); + + gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref, + &last_list); + + x = build2 (code, TREE_TYPE (new_var), rprivalmk_ref, + unshare_expr (rprival_ref)); + gimplify_assign (rprival_ref, x, &reduc_list); + } + } + + g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node); + gimple_seq_add_stmt (&scan1_list, g); + g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node); + gimple_seq_add_stmt (gimple_omp_body_ptr (scan_stmt2), g); + + tree controlb = create_tmp_var (boolean_type_node); + tree controlp = create_tmp_var (ptr_type_node); + tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); + OMP_CLAUSE_DECL (nc) = controlb; + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; + *cp1 = nc; + cp1 = &OMP_CLAUSE_CHAIN (nc); + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); + OMP_CLAUSE_DECL (nc) = controlp; + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; + *cp1 = nc; + cp1 = &OMP_CLAUSE_CHAIN (nc); + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); + OMP_CLAUSE_DECL (nc) = controlb; + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; + *cp2 = nc; + cp2 = &OMP_CLAUSE_CHAIN (nc); + nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_); + OMP_CLAUSE_DECL (nc) = controlp; + OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1; + *cp2 = nc; + cp2 = &OMP_CLAUSE_CHAIN (nc); + + *cp1 = gimple_omp_for_clauses (stmt); + gimple_omp_for_set_clauses (stmt, new_clauses1); + *cp2 = gimple_omp_for_clauses (new_stmt); + gimple_omp_for_set_clauses (new_stmt, new_clauses2); + + gimple_omp_set_body (scan_stmt1, scan1_list); + gimple_omp_set_body (input_stmt2, input2_list); + + gsi_insert_seq_after (&input1_gsi, gimple_omp_body (input_stmt1), + GSI_SAME_STMT); + gsi_remove (&input1_gsi, true); + gsi_insert_seq_after (&scan1_gsi, gimple_omp_body (scan_stmt1), + GSI_SAME_STMT); + gsi_remove (&scan1_gsi, true); + gsi_insert_seq_after (&input2_gsi, gimple_omp_body (input_stmt2), + GSI_SAME_STMT); + gsi_remove (&input2_gsi, true); + gsi_insert_seq_after (&scan2_gsi, gimple_omp_body (scan_stmt2), + GSI_SAME_STMT); + gsi_remove (&scan2_gsi, true); + + gimple_seq_add_seq (body_p, clist); + + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + tree lab2 = create_artificial_label (UNKNOWN_LOCATION); + tree lab3 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab1); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_seq (body_p, thr01_list); + g = gimple_build_goto (lab3); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab2); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_seq (body_p, thrn1_list); + g = gimple_build_label (lab3); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_assign (ivar, size_zero_node); + gimple_seq_add_stmt (body_p, g); + + gimple_seq_add_stmt (body_p, stmt); + gimple_seq_add_seq (body_p, body); + gimple_seq_add_stmt (body_p, gimple_build_omp_continue (fd->loop.v, + fd->loop.v)); + + g = gimple_build_omp_return (true); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_seq (body_p, mdlist); + + lab1 = create_artificial_label (UNKNOWN_LOCATION); + lab2 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (GT_EXPR, num_threads, integer_one_node, lab1, lab2); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab1); + gimple_seq_add_stmt (body_p, g); + + g = omp_build_barrier (NULL); + gimple_seq_add_stmt (body_p, g); + + tree down = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (down, build_zero_cst (unsigned_type_node)); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_assign (k, build_one_cst (unsigned_type_node)); + gimple_seq_add_stmt (body_p, g); + + tree num_threadsu = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (num_threadsu, NOP_EXPR, num_threads); + gimple_seq_add_stmt (body_p, g); + + tree thread_numu = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (thread_numu, NOP_EXPR, thread_num); + gimple_seq_add_stmt (body_p, g); + + tree thread_nump1 = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (thread_nump1, PLUS_EXPR, thread_numu, + build_int_cst (unsigned_type_node, 1)); + gimple_seq_add_stmt (body_p, g); + + lab3 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_label (lab3); + gimple_seq_add_stmt (body_p, g); + + tree twok = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node); + gimple_seq_add_stmt (body_p, g); + + tree lab4 = create_artificial_label (UNKNOWN_LOCATION); + tree lab5 = create_artificial_label (UNKNOWN_LOCATION); + tree lab6 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (GT_EXPR, twok, num_threadsu, lab4, lab5); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab4); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_assign (down, build_all_ones_cst (unsigned_type_node)); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_cond (EQ_EXPR, k, num_threadsu, lab6, lab5); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab6); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_label (lab5); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node); + gimple_seq_add_stmt (body_p, g); + + tree cplx = create_tmp_var (build_complex_type (unsigned_type_node, false)); + g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok); + gimple_call_set_lhs (g, cplx); + gimple_seq_add_stmt (body_p, g); + tree mul = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (mul, REALPART_EXPR, + build1 (REALPART_EXPR, unsigned_type_node, cplx)); + gimple_seq_add_stmt (body_p, g); + tree ovf = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (ovf, IMAGPART_EXPR, + build1 (IMAGPART_EXPR, unsigned_type_node, cplx)); + gimple_seq_add_stmt (body_p, g); + + tree lab7 = create_artificial_label (UNKNOWN_LOCATION); + tree lab8 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (EQ_EXPR, ovf, build_zero_cst (unsigned_type_node), + lab7, lab8); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab7); + gimple_seq_add_stmt (body_p, g); + + tree andv = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (andv, BIT_AND_EXPR, k, down); + gimple_seq_add_stmt (body_p, g); + tree andvm1 = create_tmp_var (unsigned_type_node); + g = gimple_build_assign (andvm1, PLUS_EXPR, andv, + build_minus_one_cst (unsigned_type_node)); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_assign (l, PLUS_EXPR, mul, andvm1); + gimple_seq_add_stmt (body_p, g); + + tree lab9 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (LT_EXPR, l, num_threadsu, lab9, lab8); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab9); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_seq (body_p, reduc_list); + g = gimple_build_label (lab8); + gimple_seq_add_stmt (body_p, g); + + tree lab10 = create_artificial_label (UNKNOWN_LOCATION); + tree lab11 = create_artificial_label (UNKNOWN_LOCATION); + tree lab12 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (EQ_EXPR, down, build_zero_cst (unsigned_type_node), + lab10, lab11); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab10); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_assign (k, LSHIFT_EXPR, k, integer_one_node); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_goto (lab12); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab11); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab12); + gimple_seq_add_stmt (body_p, g); + + g = omp_build_barrier (NULL); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_cond (NE_EXPR, k, build_zero_cst (unsigned_type_node), + lab3, lab2); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_label (lab2); + gimple_seq_add_stmt (body_p, g); + + lab1 = create_artificial_label (UNKNOWN_LOCATION); + lab2 = create_artificial_label (UNKNOWN_LOCATION); + lab3 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab1); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_seq (body_p, thr02_list); + g = gimple_build_goto (lab3); + gimple_seq_add_stmt (body_p, g); + g = gimple_build_label (lab2); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_seq (body_p, thrn2_list); + g = gimple_build_label (lab3); + gimple_seq_add_stmt (body_p, g); + + g = gimple_build_assign (ivar, size_zero_node); + gimple_seq_add_stmt (body_p, g); + gimple_seq_add_stmt (body_p, new_stmt); + gimple_seq_add_seq (body_p, new_body); + + gimple_seq new_dlist = NULL; + lab1 = create_artificial_label (UNKNOWN_LOCATION); + lab2 = create_artificial_label (UNKNOWN_LOCATION); + tree num_threadsm1 = create_tmp_var (integer_type_node); + g = gimple_build_assign (num_threadsm1, PLUS_EXPR, num_threads, + integer_minus_one_node); + gimple_seq_add_stmt (&new_dlist, g); + g = gimple_build_cond (EQ_EXPR, thread_num, num_threadsm1, lab1, lab2); + gimple_seq_add_stmt (&new_dlist, g); + g = gimple_build_label (lab1); + gimple_seq_add_stmt (&new_dlist, g); + gimple_seq_add_seq (&new_dlist, last_list); + g = gimple_build_label (lab2); + gimple_seq_add_stmt (&new_dlist, g); + gimple_seq_add_seq (&new_dlist, *dlist); + *dlist = new_dlist; +} /* Lower code for an OMP loop directive. */ @@ -9317,9 +9999,18 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP && gimple_omp_for_grid_phony (stmt)); - if (!phony_loop) - gimple_seq_add_stmt (&body, stmt); - gimple_seq_add_seq (&body, gimple_omp_body (stmt)); + if ((ctx->scan_inclusive || ctx->scan_exclusive) + && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR) + { + gcc_assert (!phony_loop); + lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx); + } + else + { + if (!phony_loop) + gimple_seq_add_stmt (&body, stmt); + gimple_seq_add_seq (&body, gimple_omp_body (stmt)); + } if (!phony_loop) gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2e3999ed9ef..b2c4cc31b10 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2019-07-03 Jakub Jelinek + * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message. + * c-c++-common/gomp/scan-5.c (foo): Likewise. + * c-c++-common/gomp/scan-5.c: New test. * c-c++-common/gomp/lastprivate-conditional-5.c: New test. diff --git a/gcc/testsuite/c-c++-common/gomp/scan-3.c b/gcc/testsuite/c-c++-common/gomp/scan-3.c index 4865e08ea2c..4e290eb02df 100644 --- a/gcc/testsuite/c-c++-common/gomp/scan-3.c +++ b/gcc/testsuite/c-c++-common/gomp/scan-3.c @@ -8,7 +8,7 @@ f1 (int *c, int *d) for (i = 0; i < 64; i++) { d[i] = a; - #pragma omp scan inclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ + #pragma omp scan inclusive (a) a += c[i]; } } diff --git a/gcc/testsuite/c-c++-common/gomp/scan-5.c b/gcc/testsuite/c-c++-common/gomp/scan-5.c index 92945bab016..8241621d359 100644 --- a/gcc/testsuite/c-c++-common/gomp/scan-5.c +++ b/gcc/testsuite/c-c++-common/gomp/scan-5.c @@ -6,7 +6,7 @@ foo (int *a, int *b) for (int i = 0; i < 64; i++) { r += a[i]; - #pragma omp scan inclusive (r) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ + #pragma omp scan inclusive (r) b[i] = r; } return r; diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 8ac1978b93a..9ea8638c3c0 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -352,6 +352,9 @@ enum omp_clause_code { /* Internal clause: temporary for lastprivate(conditional:). */ OMP_CLAUSE__CONDTEMP_, + /* Internal clause: temporary for inscan reductions. */ + OMP_CLAUSE__SCANTEMP_, + /* OpenACC/OpenMP clause: if (scalar-expression). */ OMP_CLAUSE_IF, diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 22aa6423756..e703cd982a6 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1349,6 +1349,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE__CONDTEMP_: + case OMP_CLAUSE__SCANTEMP_: break; /* The following clause belongs to the OpenACC cache directive, which @@ -2078,6 +2079,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE__CONDTEMP_: + case OMP_CLAUSE__SCANTEMP_: break; /* The following clause belongs to the OpenACC cache directive, which diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 329cc6fceeb..dacda7bd053 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -483,6 +483,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case OMP_CLAUSE__CONDTEMP_: name = "_condtemp_"; goto print_remap; + case OMP_CLAUSE__SCANTEMP_: + name = "_scantemp_"; + goto print_remap; case OMP_CLAUSE_TO_DECLARE: name = "to"; goto print_remap; diff --git a/gcc/tree.c b/gcc/tree.c index f2c779f1fe5..76d94c6578a 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -311,6 +311,7 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE__LOOPTEMP_ */ 1, /* OMP_CLAUSE__REDUCTEMP_ */ 1, /* OMP_CLAUSE__CONDTEMP_ */ + 1, /* OMP_CLAUSE__SCANTEMP_ */ 1, /* OMP_CLAUSE_IF */ 1, /* OMP_CLAUSE_NUM_THREADS */ 1, /* OMP_CLAUSE_SCHEDULE */ @@ -391,6 +392,7 @@ const char * const omp_clause_code_name[] = "_looptemp_", "_reductemp_", "_condtemp_", + "_scantemp_", "if", "num_threads", "schedule", @@ -12316,6 +12318,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__CONDTEMP_: + case OMP_CLAUSE__SCANTEMP_: case OMP_CLAUSE__SIMDUID_: WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); /* FALLTHRU */ diff --git a/gcc/tree.h b/gcc/tree.h index ed59a5d06bf..3dce602dfba 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1449,7 +1449,7 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE_DECL(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ OMP_CLAUSE_PRIVATE, \ - OMP_CLAUSE__CONDTEMP_), 0) + OMP_CLAUSE__SCANTEMP_), 0) #define OMP_CLAUSE_HAS_LOCATION(NODE) \ (LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus) \ != UNKNOWN_LOCATION) @@ -1761,6 +1761,17 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE__CONDTEMP__ITER(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag) +/* _SCANTEMP_ holding temporary with pointer to thread's local array; + allocation. */ +#define OMP_CLAUSE__SCANTEMP__ALLOC(NODE) \ + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)->base.public_flag) + +/* _SCANTEMP_ holding temporary with a control variable for deallocation; + one boolean_type_node for test whether alloca was used, another one + to pass to __builtin_stack_restore or free. */ +#define OMP_CLAUSE__SCANTEMP__CONTROL(NODE) \ + TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)) + /* SSA_NAME accessors. */ /* Whether SSA_NAME NODE is a virtual operand. This simply caches the diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 61358ee5dcf..e54f260a6a7 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,22 @@ +2019-07-03 Jakub Jelinek + + * testsuite/libgomp.c++/scan-1.C: New test. + * testsuite/libgomp.c++/scan-2.C: New test. + * testsuite/libgomp.c++/scan-3.C: New test. + * testsuite/libgomp.c++/scan-4.C: New test. + * testsuite/libgomp.c++/scan-5.C: New test. + * testsuite/libgomp.c++/scan-6.C: New test. + * testsuite/libgomp.c++/scan-7.C: New test. + * testsuite/libgomp.c++/scan-8.C: New test. + * testsuite/libgomp.c/scan-1.c: New test. + * testsuite/libgomp.c/scan-2.c: New test. + * testsuite/libgomp.c/scan-3.c: New test. + * testsuite/libgomp.c/scan-4.c: New test. + * testsuite/libgomp.c/scan-5.c: New test. + * testsuite/libgomp.c/scan-6.c: New test. + * testsuite/libgomp.c/scan-7.c: New test. + * testsuite/libgomp.c/scan-8.c: New test. + 2019-06-18 Thomas Schwinge * testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C: New file. diff --git a/libgomp/testsuite/libgomp.c++/scan-1.C b/libgomp/testsuite/libgomp.c++/scan-1.C new file mode 100644 index 00000000000..d148dac924b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-1.C @@ -0,0 +1,151 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); + +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + int s; +}; + +S::S () : s (0) +{ +} + +S::~S () +{ +} + +S::S (const S &x) +{ + s = x.s; +} + +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +__attribute__((noipa)) void +foo (S *a, S *b) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r.s += a[i].s; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp parallel + #pragma omp for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + s.s += 2 * a[i].s; + #pragma omp scan inclusive(s) + b[i] = s; + } + return S (s); +} + +__attribute__((noipa)) void +baz (S *a, S *b) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r.s += a[i].s; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp parallel for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + s.s += 2 * a[i].s; + #pragma omp scan inclusive(s) + b[i] = s; + } + return S (s); +} + +int +main () +{ + S s; + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s.s += i; + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += 2 * i; + if (b[i].s != s.s) + abort (); + } + r.s = 0; + baz (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += i; + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += 2 * i; + if (b[i].s != s.s) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/scan-2.C b/libgomp/testsuite/libgomp.c++/scan-2.C new file mode 100644 index 00000000000..94555ccde30 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-2.C @@ -0,0 +1,116 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); +int r, a[1024], b[1024], q; + +__attribute__((noipa)) void +foo (int *a, int *b, int &r) +{ + #pragma omp for reduction (inscan, +:r) nowait + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int &s = q; + q = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s) nowait + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, int &r) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int &s = q; + q = 0; + #pragma omp parallel for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } +} diff --git a/libgomp/testsuite/libgomp.c++/scan-3.C b/libgomp/testsuite/libgomp.c++/scan-3.C new file mode 100644 index 00000000000..5e83958bf77 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-3.C @@ -0,0 +1,119 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); +int r, a[1024], b[1024], q; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b, int &r) +{ + #pragma omp for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int &s = q; + q = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, int &r) +{ + #pragma omp parallel for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int &s = q; + q = 0; + #pragma omp parallel for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/scan-4.C b/libgomp/testsuite/libgomp.c++/scan-4.C new file mode 100644 index 00000000000..fc2c682849f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-4.C @@ -0,0 +1,150 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); + +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + int s; +}; + +S::S () : s (0) +{ +} + +S::~S () +{ +} + +S::S (const S &x) +{ + s = x.s; +} + +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +__attribute__((noipa)) void +foo (S *a, S *b, S &r) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r.s += a[i].s; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) S +bar () +{ + S s; + #pragma omp parallel + #pragma omp for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + s.s += 2 * a[i].s; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (S *a, S *b, S &r) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r.s += a[i].s; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) S +qux () +{ + S s; + #pragma omp parallel for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + s.s += 2 * a[i].s; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + S s; + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s.s += i; + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += 2 * i; + if (b[i].s != s.s) + abort (); + } + r.s = 0; + baz (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += i; + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += 2 * i; + if (b[i].s != s.s) + abort (); + } +} diff --git a/libgomp/testsuite/libgomp.c++/scan-5.C b/libgomp/testsuite/libgomp.c++/scan-5.C new file mode 100644 index 00000000000..5931a7fe14e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-5.C @@ -0,0 +1,158 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); + +template +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + T s; +}; + +template +S::S () : s (0) +{ +} + +template +S::~S () +{ +} + +template +S::S (const S &x) +{ + s = x.s; +} + +template +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +template +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +template +__attribute__((noipa)) void +foo (S *a, S *b) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +template +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp parallel + #pragma omp for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return S (s); +} + +__attribute__((noipa)) void +baz (S *a, S *b) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp parallel for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return S (s); +} + +int +main () +{ + S s; + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + r.s = 0; + baz (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } +} diff --git a/libgomp/testsuite/libgomp.c++/scan-6.C b/libgomp/testsuite/libgomp.c++/scan-6.C new file mode 100644 index 00000000000..46674e7a243 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-6.C @@ -0,0 +1,120 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); +int r, a[1024], b[1024], q; + +template +__attribute__((noipa)) void +foo (T a, T b, U r) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +template +__attribute__((noipa)) T +bar () +{ + T &s = q; + q = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +template +__attribute__((noipa)) void +baz (T *a, T *b, T &r) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (T i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +template +__attribute__((noipa)) int +qux () +{ + T s = q; + q = 0; + #pragma omp parallel for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } +} diff --git a/libgomp/testsuite/libgomp.c++/scan-7.C b/libgomp/testsuite/libgomp.c++/scan-7.C new file mode 100644 index 00000000000..ebeb20302bf --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-7.C @@ -0,0 +1,118 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); +int r, a[1024], b[1024], q; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b, int &r) +{ + #pragma omp for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int &s = q; + q = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, foo:s) nowait + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, int &r) +{ + #pragma omp parallel for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int &s = q; + q = 0; + #pragma omp parallel for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } +} diff --git a/libgomp/testsuite/libgomp.c++/scan-8.C b/libgomp/testsuite/libgomp.c++/scan-8.C new file mode 100644 index 00000000000..cfdfb6b7624 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/scan-8.C @@ -0,0 +1,150 @@ +// { dg-require-effective-target size32plus } + +extern "C" void abort (); + +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + int s; +}; + +S::S () : s (0) +{ +} + +S::~S () +{ +} + +S::S (const S &x) +{ + s = x.s; +} + +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +__attribute__((noipa)) void +foo (S *a, S *b, S &r) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp parallel + #pragma omp for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return s; +} + +__attribute__((noipa)) void +baz (S *a, S *b, S &r) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp parallel for reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return s; +} + +int +main () +{ + S s; + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + r.s = 0; + baz (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } +} diff --git a/libgomp/testsuite/libgomp.c/scan-1.c b/libgomp/testsuite/libgomp.c/scan-1.c new file mode 100644 index 00000000000..dd34f33a694 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-1.c @@ -0,0 +1,115 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-2.c b/libgomp/testsuite/libgomp.c/scan-2.c new file mode 100644 index 00000000000..476bbedfd60 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-2.c @@ -0,0 +1,117 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024]; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-3.c b/libgomp/testsuite/libgomp.c/scan-3.c new file mode 100644 index 00000000000..2e1fcadba04 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-3.c @@ -0,0 +1,88 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +float r = 1.0f, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (float *a, float *b) +{ + #pragma omp for reduction (inscan, *:r) + for (int i = 0; i < 1024; i++) + { + r *= a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) float +bar (void) +{ + float s = -__builtin_inff (); + #pragma omp parallel for reduction (inscan, max:s) + for (int i = 0; i < 1024; i++) + { + s = s > a[i] ? s : a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + float s = 1.0f; + for (int i = 0; i < 1024; ++i) + { + if (i < 80) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 200) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else if (i < 280) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 380) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else + switch (i % 6) + { + case 0: a[i] = 0.25f; break; + case 1: a[i] = 2.0f; break; + case 2: a[i] = -1.0f; break; + case 3: a[i] = -4.0f; break; + case 4: a[i] = 0.5f; break; + case 5: a[i] = 1.0f; break; + default: a[i] = 0.0f; break; + } + b[i] = -19.0f; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r * 16384.0f != 0.125f) + abort (); + float m = -175.25f; + for (int i = 0; i < 1024; ++i) + { + s *= a[i]; + if (b[i] != s) + abort (); + else + { + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); + b[i] = -231.75f; + m += 0.75f; + } + } + if (bar () != 592.0f) + abort (); + s = -__builtin_inff (); + for (int i = 0; i < 1024; ++i) + { + if (s < a[i]) + s = a[i]; + if (b[i] != s) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-4.c b/libgomp/testsuite/libgomp.c/scan-4.c new file mode 100644 index 00000000000..b1e999352fa --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-4.c @@ -0,0 +1,179 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024]; +unsigned short r2, b2[1024]; +unsigned char r3, b3[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp for reduction (inscan, +:r, r2, r3) + for (int i = 0; i < 1024; i++) + { + { r += a[i]; r2 += a[i]; r3 += a[i]; } + #pragma omp scan inclusive(r, r2, r3) + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + } +} + +__attribute__((noipa)) int +bar (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s, s2, s3) + for (int i = 0; i < 1024; i++) + { + { + s += 2 * a[i]; + s2 += 2 * a[i]; + s3 += 2 * a[i]; + } + #pragma omp scan inclusive(s, s2, s3) + { b[i] = s; b2[i] = s2; b3[i] = s3; } + } + *s2p = s2; + *s3p = s3; + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp parallel for reduction (inscan, +:r, r2, r3) + for (int i = 0; i < 1024; i++) + { + { + r += a[i]; + r2 += a[i]; + r3 += a[i]; + } + #pragma omp scan inclusive(r, r2, r3) + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + } +} + +__attribute__((noipa)) int +qux (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp parallel for reduction (inscan, +:s, s2, s3) + for (int i = 0; i < 1024; i++) + { + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } + #pragma omp scan inclusive(s, s2, s3) + { b[i] = s; b2[i] = s2; b3[i] = s3; } + } + *s2p = s2; + *s3p = s3; + return s; +} + +int +main () +{ + int s = 0; + unsigned short s2; + unsigned char s3; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + } + if (bar (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + } + } + r = 0; + r2 = 0; + r3 = 0; + baz (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + } + s2 = 0; + s3 = 0; + if (qux (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-5.c b/libgomp/testsuite/libgomp.c/scan-5.c new file mode 100644 index 00000000000..c50418264ef --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-5.c @@ -0,0 +1,115 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-6.c b/libgomp/testsuite/libgomp.c/scan-6.c new file mode 100644 index 00000000000..9434ebb2b5a --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-6.c @@ -0,0 +1,117 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024]; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-7.c b/libgomp/testsuite/libgomp.c/scan-7.c new file mode 100644 index 00000000000..20b7071e8f6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-7.c @@ -0,0 +1,86 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +float r = 1.0f, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (float *a, float *b) +{ + #pragma omp for reduction (inscan, *:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r *= a[i]; + } +} + +__attribute__((noipa)) float +bar (void) +{ + float s = -__builtin_inff (); + #pragma omp parallel for reduction (inscan, max:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s = s > a[i] ? s : a[i]; + } + return s; +} + +int +main () +{ + float s = 1.0f; + for (int i = 0; i < 1024; ++i) + { + if (i < 80) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 200) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else if (i < 280) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 380) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else + switch (i % 6) + { + case 0: a[i] = 0.25f; break; + case 1: a[i] = 2.0f; break; + case 2: a[i] = -1.0f; break; + case 3: a[i] = -4.0f; break; + case 4: a[i] = 0.5f; break; + case 5: a[i] = 1.0f; break; + default: a[i] = 0.0f; break; + } + b[i] = -19.0f; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r * 16384.0f != 0.125f) + abort (); + float m = -175.25f; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -231.75f; + s *= a[i]; + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); + m += 0.75f; + } + if (bar () != 592.0f) + abort (); + s = -__builtin_inff (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + if (s < a[i]) + s = a[i]; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/scan-8.c b/libgomp/testsuite/libgomp.c/scan-8.c new file mode 100644 index 00000000000..f09c85c36e7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/scan-8.c @@ -0,0 +1,179 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024]; +unsigned short r2, b2[1024]; +unsigned char r3, b3[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp for reduction (inscan, +:r, r2, r3) + for (int i = 0; i < 1024; i++) + { + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + #pragma omp scan exclusive(r, r2, r3) + { r += a[i]; r2 += a[i]; r3 += a[i]; } + } +} + +__attribute__((noipa)) int +bar (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s, s2, s3) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; b2[i] = s2; b3[i] = s3; } + #pragma omp scan exclusive(s, s2, s3) + { + s += 2 * a[i]; + s2 += 2 * a[i]; + s3 += 2 * a[i]; + } + } + *s2p = s2; + *s3p = s3; + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp parallel for reduction (inscan, +:r, r2, r3) + for (int i = 0; i < 1024; i++) + { + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + #pragma omp scan exclusive(r, r2, r3) + { + r += a[i]; + r2 += a[i]; + r3 += a[i]; + } + } +} + +__attribute__((noipa)) int +qux (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp parallel for reduction (inscan, +:s, s2, s3) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; b2[i] = s2; b3[i] = s3; } + #pragma omp scan exclusive(s, s2, s3) + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } + } + *s2p = s2; + *s3p = s3; + return s; +} + +int +main () +{ + int s = 0; + unsigned short s2; + unsigned char s3; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + s += i; + } + if (bar (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + } + s += 2 * i; + } + r = 0; + r2 = 0; + r3 = 0; + baz (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + s += i; + } + s2 = 0; + s3 = 0; + if (qux (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + s += 2 * i; + } + return 0; +}