omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument...

* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
	create another "omp scan inscan exclusive" array if
	!ctx->scan_inclusive.
	(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
	(lower_omp_scan): Likewise.
	* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
	2-bit bitfield for simd_lane_access_p member.
	* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
	aux == (void *)-4 as simd lane access.
	* tree-vect-stmts.c (check_scan_store): Handle exclusive scan.  Update
	comment with permutations to show the canonical permutation order.
	(vectorizable_scan_store): Handle exclusive scan.
	(vectorizable_store): Call vectorizable_scan_store even for
	STMT_VINFO_SIMD_LANE_ACCESS_P > 3.

	* gcc.dg/vect/vect-simd-12.c: New test.
	* gcc.dg/vect/vect-simd-13.c: New test.
	* gcc.dg/vect/vect-simd-14.c: New test.
	* gcc.dg/vect/vect-simd-15.c: New test.
	* gcc.target/i386/sse2-vect-simd-12.c: New test.
	* gcc.target/i386/sse2-vect-simd-13.c: New test.
	* gcc.target/i386/sse2-vect-simd-14.c: New test.
	* gcc.target/i386/sse2-vect-simd-15.c: New test.
	* gcc.target/i386/avx2-vect-simd-12.c: New test.
	* gcc.target/i386/avx2-vect-simd-13.c: New test.
	* gcc.target/i386/avx2-vect-simd-14.c: New test.
	* gcc.target/i386/avx2-vect-simd-15.c: New test.
	* gcc.target/i386/avx512f-vect-simd-12.c: New test.
	* gcc.target/i386/avx512f-vect-simd-13.c: New test.
	* gcc.target/i386/avx512f-vect-simd-14.c: New test.
	* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
	* g++.dg/vect/simd-6.cc: New test.
	* g++.dg/vect/simd-7.cc: New test.
	* g++.dg/vect/simd-8.cc: New test.
	* g++.dg/vect/simd-9.cc: New test.
	* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.

From-SVN: r272544
This commit is contained in:
Jakub Jelinek 2019-06-21 08:48:57 +02:00 committed by Jakub Jelinek
parent e73fb06d5a
commit 1612b1febd
27 changed files with 1757 additions and 63 deletions

View File

@ -1,5 +1,20 @@
2019-06-21 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
create another "omp scan inscan exclusive" array if
!ctx->scan_inclusive.
(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
(lower_omp_scan): Likewise.
* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
2-bit bitfield for simd_lane_access_p member.
* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
aux == (void *)-4 as simd lane access.
* tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update
comment with permutations to show the canonical permutation order.
(vectorizable_scan_store): Handle exclusive scan.
(vectorizable_store): Call vectorizable_scan_store even for
STMT_VINFO_SIMD_LANE_ACCESS_P > 3.
* tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
"omp simd array" arrays with one byte elements.

View File

@ -3692,7 +3692,8 @@ struct omplow_simd_context {
static bool
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
omplow_simd_context *sctx, tree &ivar,
tree &lvar, tree *rvar = NULL)
tree &lvar, tree *rvar = NULL,
tree *rvar2 = NULL)
{
if (known_eq (sctx->max_vf, 0U))
{
@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
*rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
sctx->lastlane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar) = 1;
if (!ctx->scan_inclusive)
{
/* And for exclusive scan yet another one, which will
hold the value during the scan phase. */
tree savar = create_tmp_var_raw (atype);
if (TREE_ADDRESSABLE (new_var))
TREE_ADDRESSABLE (savar) = 1;
DECL_ATTRIBUTES (savar)
= tree_cons (get_identifier ("omp simd array"), NULL,
tree_cons (get_identifier ("omp simd inscan "
"exclusive"), NULL,
DECL_ATTRIBUTES (savar)));
gimple_add_tmp_var (savar);
ctx->cb.decl_map->put (iavar, savar);
*rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
sctx->idx, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar2) = 1;
}
}
ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
NULL_TREE, NULL_TREE);
@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
tree rvar = NULL_TREE, *rvarp = NULL;
tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
ivar, lvar, rvarp))
ivar, lvar, rvarp,
&rvar2))
{
if (new_vard == new_var)
{
@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
(c, ivar2, build_outer_var_ref (var, ctx));
gimplify_and_add (x, &llist[0]);
if (rvar2)
{
x = lang_hooks.decls.omp_clause_default_ctor
(c, unshare_expr (rvar2),
build_outer_var_ref (var, ctx));
gimplify_and_add (x, &llist[0]);
}
/* For types that need construction, add another
private var which will be default constructed
and optionally initialized with
@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
iteration. */
tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
gimple_add_tmp_var (nv);
ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
ctx->cb.decl_map->put (TREE_OPERAND (rvar2
? rvar2
: ivar, 0),
nv);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx));
@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
if (rvar2)
{
x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (&llist[1], tseq);
}
}
break;
}
if (x)
@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_seq (ilist, tseq);
}
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
if (!ctx->scan_inclusive)
{
tree nv2
= create_tmp_var_raw (TREE_TYPE (new_var));
gimple_add_tmp_var (nv2);
ctx->cb.decl_map->put (nv, nv2);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv2, build_outer_var_ref (var, ctx));
gimplify_and_add (x, ilist);
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
}
x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x)
{
@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
gimple_seq_add_seq (dlist, tseq);
}
}
else if (!ctx->scan_inclusive
&& TREE_ADDRESSABLE (TREE_TYPE (new_var)))
{
tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
gimple_add_tmp_var (nv2);
ctx->cb.decl_map->put (new_vard, nv2);
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x)
{
tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gimple_seq_add_seq (dlist, tseq);
}
}
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
goto do_dtor;
}
@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
tree rvar = NULL_TREE, *rvarp = NULL;
tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
if (is_simd
&& OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
ivar, lvar, rvarp))
ivar, lvar, rvarp,
&rvar2))
{
if (new_vard != new_var)
{
@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gimple_seq before = NULL;
omp_context *octx = ctx->outer;
gcc_assert (octx);
if (!octx->scan_inclusive && !has_clauses)
{
gimple_stmt_iterator gsi2 = *gsi_p;
gsi_next (&gsi2);
gimple *stmt2 = gsi_stmt (gsi2);
/* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses
with following GIMPLE_OMP_SCAN with clauses, so that input_phase,
the one with exclusive clause(s), comes first. */
if (stmt2
&& gimple_code (stmt2) == GIMPLE_OMP_SCAN
&& gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt2)) != NULL)
{
gsi_remove (gsi_p, false);
gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT);
ctx = maybe_lookup_ctx (stmt2);
gcc_assert (ctx);
lower_omp_scan (gsi_p, ctx);
return;
}
}
bool input_phase = has_clauses ^ octx->scan_inclusive;
if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
&& (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
&& !gimple_omp_for_combined_into_p (octx->stmt)
&& octx->scan_inclusive)
&& !gimple_omp_for_combined_into_p (octx->stmt))
{
if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
OMP_CLAUSE__SIMDUID_))
{
tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
lane = create_tmp_var (unsigned_type_node);
tree t = build_int_cst (integer_type_node, 1 + !input_phase);
tree t = build_int_cst (integer_type_node,
input_phase ? 1
: octx->scan_inclusive ? 2 : 3);
gimple *g
= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
gimple_call_set_lhs (g, lane);
@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree val = new_var;
tree var2 = NULL_TREE;
tree var3 = NULL_TREE;
tree var4 = NULL_TREE;
tree lane0 = NULL_TREE;
tree new_vard = new_var;
if (omp_is_reference (var))
{
@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
DECL_ATTRIBUTES (v)))
{
val = unshare_expr (val);
lane0 = TREE_OPERAND (val, 1);
TREE_OPERAND (val, 1) = lane;
var2 = lookup_decl (v, octx);
if (!octx->scan_inclusive)
var4 = lookup_decl (var2, octx);
if (input_phase
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
var3 = maybe_lookup_decl (var2, octx);
var3 = maybe_lookup_decl (var4 ? var4 : var2, octx);
if (!input_phase)
{
var2 = build4 (ARRAY_REF, TREE_TYPE (val),
var2, lane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (var2) = 1;
if (!octx->scan_inclusive)
{
var4 = build4 (ARRAY_REF, TREE_TYPE (val),
var4, lane, NULL_TREE,
NULL_TREE);
TREE_THIS_NOTRAP (var4) = 1;
}
}
else
var2 = val;
@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
else
{
var2 = build_outer_var_ref (var, octx);
if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
var3 = maybe_lookup_decl (new_vard, octx);
if (var3 == new_vard)
if (var3 == new_vard || var3 == NULL_TREE)
var3 = NULL_TREE;
else if (!octx->scan_inclusive && !input_phase)
{
var4 = maybe_lookup_decl (var3, octx);
if (var4 == var3 || var4 == NULL_TREE)
{
if (TREE_ADDRESSABLE (TREE_TYPE (new_var)))
{
var4 = var3;
var3 = NULL_TREE;
}
else
var4 = NULL_TREE;
}
}
}
if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
var4 = create_tmp_var (TREE_TYPE (val));
}
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
@ -8689,9 +8816,17 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
}
else
{
tree x;
if (!octx->scan_inclusive)
{
tree v4 = unshare_expr (var4);
tree v2 = unshare_expr (var2);
x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2);
gimplify_and_add (x, &before);
}
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
tree vexpr = val;
if (x && omp_is_reference (var))
vexpr = build_fold_addr_expr_loc (clause_loc, val);
@ -8706,8 +8841,18 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
SET_DECL_VALUE_EXPR (new_vard, x);
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
gimplify_and_add (x, &before);
if (octx->scan_inclusive)
{
x = lang_hooks.decls.omp_clause_assign_op (c, val,
var2);
gimplify_and_add (x, &before);
}
else if (lane0 == NULL_TREE)
{
x = lang_hooks.decls.omp_clause_assign_op (c, val,
var4);
gimplify_and_add (x, &before);
}
}
}
else
@ -8728,10 +8873,29 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree x = build2 (code, TREE_TYPE (var2),
unshare_expr (var2), unshare_expr (val));
gimplify_assign (unshare_expr (var2), x, &before);
gimplify_assign (val, var2, &before);
if (octx->scan_inclusive)
{
gimplify_assign (unshare_expr (var2), x, &before);
gimplify_assign (val, var2, &before);
}
else
{
gimplify_assign (unshare_expr (var4),
unshare_expr (var2), &before);
gimplify_assign (var2, x, &before);
if (lane0 == NULL_TREE)
gimplify_assign (val, var4, &before);
}
}
}
if (!octx->scan_inclusive && !input_phase && lane0)
{
tree vexpr = unshare_expr (var4);
TREE_OPERAND (vexpr, 1) = lane0;
if (omp_is_reference (var))
vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
SET_DECL_VALUE_EXPR (new_vard, vexpr);
}
}
}
else if (has_clauses)

View File

@ -1,5 +1,27 @@
2019-06-21 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-12.c: New test.
* gcc.dg/vect/vect-simd-13.c: New test.
* gcc.dg/vect/vect-simd-14.c: New test.
* gcc.dg/vect/vect-simd-15.c: New test.
* gcc.target/i386/sse2-vect-simd-12.c: New test.
* gcc.target/i386/sse2-vect-simd-13.c: New test.
* gcc.target/i386/sse2-vect-simd-14.c: New test.
* gcc.target/i386/sse2-vect-simd-15.c: New test.
* gcc.target/i386/avx2-vect-simd-12.c: New test.
* gcc.target/i386/avx2-vect-simd-13.c: New test.
* gcc.target/i386/avx2-vect-simd-14.c: New test.
* gcc.target/i386/avx2-vect-simd-15.c: New test.
* gcc.target/i386/avx512f-vect-simd-12.c: New test.
* gcc.target/i386/avx512f-vect-simd-13.c: New test.
* gcc.target/i386/avx512f-vect-simd-14.c: New test.
* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
* g++.dg/vect/simd-6.cc: New test.
* g++.dg/vect/simd-7.cc: New test.
* g++.dg/vect/simd-8.cc: New test.
* g++.dg/vect/simd-9.cc: New test.
* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.
PR c++/90950
* g++.dg/gomp/lastprivate-1.C: New test.

View File

@ -8,7 +8,7 @@ f1 (int *c, int *d)
for (i = 0; i < 64; i++)
{
d[i] = a;
#pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
#pragma omp scan exclusive (a)
a += c[i];
}
}

View File

@ -0,0 +1,161 @@
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
template <typename T>
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
T s;
};
template <typename T>
S<T>::S () : s (0)
{
}
template <typename T>
S<T>::~S ()
{
}
template <typename T>
S<T>::S (const S &x)
{
s = x.s;
}
template <typename T>
S<T> &
S<T>::operator= (const S &x)
{
s = x.s;
return *this;
}
template <typename T>
static inline void
ini (S<T> &x)
{
x.s = 0;
}
S<int> r, a[1024], b[1024];
#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
template <typename T>
__attribute__((noipa)) void
foo (S<T> *a, S<T> *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
template <typename T>
__attribute__((noipa)) S<T>
bar (void)
{
S<T> s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<T> (s);
}
__attribute__((noipa)) void
baz (S<int> *a, S<int> *b)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S<int>
qux (void)
{
S<int> s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<int> (s);
}
int
main ()
{
S<int> s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar<int> ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,124 @@
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#include "../../gcc.dg/vect/tree-vect.h"
int r, a[1024], b[1024], q;
template <typename T, typename U>
__attribute__((noipa)) void
foo (T a, T b, U r)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) T
bar (void)
{
T &s = q;
q = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
template <typename T>
__attribute__((noipa)) void
baz (T *a, T *b, T &r)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (T i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) int
qux (void)
{
T s = q;
q = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo<int *, int &> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar<int> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz<int> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux<int &> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,122 @@
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
int r, a[1024], b[1024], q;
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b, int &r)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int &s = q;
q = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, int &r)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int &s = q;
q = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,153 @@
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b, S &r)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
bar (void)
{
S s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
__attribute__((noipa)) void
baz (S *a, S *b, S &r)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
qux (void)
{
S s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
int
main ()
{
S s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,122 @@
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,124 @@
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,94 @@
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
float r = 1.0f, a[1024], b[1024];
__attribute__((noipa)) void
foo (float *a, float *b)
{
#pragma omp simd reduction (inscan, *:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r *= a[i];
}
}
__attribute__((noipa)) float
bar (void)
{
float s = -__builtin_inff ();
#pragma omp simd reduction (inscan, max:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s = s > a[i] ? s : a[i];
}
return s;
}
int
main ()
{
float s = 1.0f;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
if (i < 80)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 200)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else if (i < 280)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 380)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else
switch (i % 6)
{
case 0: a[i] = 0.25f; break;
case 1: a[i] = 2.0f; break;
case 2: a[i] = -1.0f; break;
case 3: a[i] = -4.0f; break;
case 4: a[i] = 0.5f; break;
case 5: a[i] = 1.0f; break;
default: a[i] = 0.0f; break;
}
b[i] = -19.0f;
asm ("" : "+g" (i));
}
foo (a, b);
if (r * 16384.0f != 0.125f)
abort ();
float m = -175.25f;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -231.75f;
s *= a[i];
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
m += 0.75f;
}
if (bar () != 592.0f)
abort ();
s = -__builtin_inff ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
if (s < a[i])
s = a[i];
}
return 0;
}

View File

@ -0,0 +1,186 @@
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
unsigned short r2, b2[1024];
unsigned char r3, b3[1024];
__attribute__((noipa)) void
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp simd reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
}
}
__attribute__((noipa)) int
bar (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp simd reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{
s += 2 * a[i];
s2 += 2 * a[i];
s3 += 2 * a[i];
}
}
*s2p = s2;
*s3p = s3;
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{
r += a[i];
r2 += a[i];
r3 += a[i];
}
}
}
__attribute__((noipa)) int
qux (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
}
*s2p = s2;
*s3p = s3;
return s;
}
int
main ()
{
int s = 0;
unsigned short s2;
unsigned char s3;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
if (bar (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
}
s += 2 * i;
}
r = 0;
r2 = 0;
r3 = 0;
baz (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
s2 = 0;
s3 = 0;
if (qux (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
s += 2 * i;
}
return 0;
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
avx2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
avx2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
avx2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
avx2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512bw } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512bw-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
avx512bw_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
avx512f_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
avx512f_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
avx512f_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
sse2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
sse2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,15 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
sse2_test (void)
{
do_main ();
}

View File

@ -0,0 +1,16 @@
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
sse2_test (void)
{
do_main ();
}

View File

@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
/* See if this was detected as SIMD lane access. */
if (dr->aux == (void *)-1
|| dr->aux == (void *)-2
|| dr->aux == (void *)-3)
|| dr->aux == (void *)-3
|| dr->aux == (void *)-4)
{
if (nested_in_vect_loop_p (loop, stmt_info))
return opt_result::failure_at (stmt_info->stmt,

View File

@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
kinds are there in order to allow optimizing the initializer store
and combiner sequence, e.g. if it is originally some C++ish user
defined reduction, but allow the vectorizer to pattern recognize it
and turn into the appropriate vectorized scan. */
and turn into the appropriate vectorized scan.
For exclusive scan, this is slightly different:
#pragma omp simd reduction(inscan,+:r)
for (...)
{
use (r);
#pragma omp scan exclusive (r)
r += something ();
}
shall have body with:
// Initialization for input phase, store the reduction initializer:
_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
D.2042[_21] = 0;
// Actual input phase:
...
r.0_5 = D.2042[_20];
_6 = _4 + r.0_5;
D.2042[_20] = _6;
// Initialization for scan phase:
_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
_26 = D.2043[_25];
D.2044[_25] = _26;
_27 = D.2042[_25];
_28 = _26 + _27;
D.2043[_25] = _28;
// Actual scan phase:
...
r.1_8 = D.2044[_20];
... */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
{
@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
if (TREE_CODE (rhs) != SSA_NAME)
goto fail;
use_operand_p use_p;
imm_use_iterator iter;
gimple *other_store_stmt = NULL;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
if (gimple_bb (use_stmt) != gimple_bb (stmt)
|| !gimple_store_p (use_stmt)
|| other_store_stmt)
goto fail;
other_store_stmt = use_stmt;
if (!inscan_var_store)
{
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
if (gimple_bb (use_stmt) != gimple_bb (stmt)
|| !is_gimple_assign (use_stmt)
|| gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
|| other_store_stmt
|| TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
goto fail;
other_store_stmt = use_stmt;
}
if (other_store_stmt == NULL)
goto fail;
rhs = gimple_assign_lhs (other_store_stmt);
if (!single_imm_use (rhs, &use_p, &other_store_stmt))
goto fail;
}
}
if (other_store_stmt == NULL)
goto fail;
stmt_vec_info other_store_stmt_info
= loop_vinfo->lookup_stmt (other_store_stmt);
if (other_store_stmt_info == NULL
|| STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
{
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
if (other_store_stmt)
goto fail;
other_store_stmt = use_stmt;
}
}
else
goto fail;
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
tree rhs1 = gimple_assign_rhs1 (def_stmt);
tree rhs2 = gimple_assign_rhs2 (def_stmt);
if (TREE_CODE (rhs1) != SSA_NAME
|| TREE_CODE (rhs2) != SSA_NAME)
if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
goto fail;
gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
if (load1_stmt_info == NULL
|| load2_stmt_info == NULL
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
|| STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
goto fail;
if (scan_operand_equal_p (gimple_assign_lhs (stmt),
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
{
dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
goto fail;
tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
tree lrhs;
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
lrhs = rhs1;
else
lrhs = rhs2;
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
continue;
if (other_store_stmt)
goto fail;
other_store_stmt = use_stmt;
}
}
if (other_store_stmt == NULL)
goto fail;
if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
|| !gimple_store_p (other_store_stmt))
goto fail;
stmt_vec_info other_store_stmt_info
= loop_vinfo->lookup_stmt (other_store_stmt);
if (other_store_stmt_info == NULL
|| (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
goto fail;
gimple *stmt1 = stmt;
gimple *stmt2 = other_store_stmt;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
std::swap (stmt1, stmt2);
if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
gimple_assign_rhs1 (load2_stmt)))
{
std::swap (rhs1, rhs2);
std::swap (load1_stmt, load2_stmt);
std::swap (load1_stmt_info, load2_stmt_info);
}
if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
gimple_assign_rhs1 (load1_stmt))
|| !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
gimple_assign_rhs1 (load1_stmt)))
goto fail;
tree var3 = NULL_TREE;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
&& !scan_operand_equal_p (gimple_assign_lhs (stmt2),
gimple_assign_rhs1 (load2_stmt)))
goto fail;
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
goto fail;
var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
|| lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
|| lookup_attribute ("omp simd inscan exclusive",
DECL_ATTRIBUTES (var3)))
goto fail;
}
dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
std::swap (var1, var2);
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
if (!lookup_attribute ("omp simd inscan exclusive",
DECL_ATTRIBUTES (var1)))
goto fail;
var1 = var3;
}
if (loop_vinfo->scan_map == NULL)
goto fail;
tree *init = loop_vinfo->scan_map->get (var1);
@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
goto fail;
/* The IL is as expected, now check if we can actually vectorize it.
Inclusive scan:
_26 = D.2043[_25];
_27 = D.2042[_25];
_28 = _26 + _27;
@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
from the D.2042[_21] = 0; store):
_30 = MEM <vector(8) int> [(int *)&D.2043];
_31 = MEM <vector(8) int> [(int *)&D.2042];
_32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_33 = _31 + _32;
// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
_34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
_34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
_35 = _33 + _34;
// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[1]+.._31[4], ... _31[4]+.._31[7] };
_36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
_36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
_37 = _35 + _36;
// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[0]+.._31[4], ... _31[0]+.._31[7] };
_38 = _30 + _37;
_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
MEM <vector(8) int> [(int *)&D.2043] = _39;
MEM <vector(8) int> [(int *)&D.2042] = _38; */
MEM <vector(8) int> [(int *)&D.2042] = _38;
Exclusive scan:
_26 = D.2043[_25];
D.2044[_25] = _26;
_27 = D.2042[_25];
_28 = _26 + _27;
D.2043[_25] = _28;
should be vectorized as (where _40 is the vectorized rhs
from the D.2042[_21] = 0; store):
_30 = MEM <vector(8) int> [(int *)&D.2043];
_31 = MEM <vector(8) int> [(int *)&D.2042];
_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
_34 = _32 + _33;
// _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
// _31[3]+_31[4], ... _31[5]+.._31[6] };
_35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
_36 = _34 + _35;
// _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[1]+.._31[4], ... _31[3]+.._31[6] };
_37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
_38 = _36 + _37;
// _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
// _31[0]+.._31[4], ... _31[0]+.._31[6] };
_39 = _30 + _38;
_50 = _31 + _39;
_51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
MEM <vector(8) int> [(int *)&D.2044] = _39;
MEM <vector(8) int> [(int *)&D.2042] = _51; */
enum machine_mode vec_mode = TYPE_MODE (vectype);
optab optab = optab_for_tree_code (code, vectype, optab_default);
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree rhs = gimple_assign_rhs1 (stmt);
gcc_assert (TREE_CODE (rhs) == SSA_NAME);
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
{
use_operand_p use_p;
imm_use_iterator iter;
FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
{
gimple *use_stmt = USE_STMT (use_p);
if (use_stmt == stmt || is_gimple_debug (use_stmt))
continue;
rhs = gimple_assign_lhs (use_stmt);
break;
}
}
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
enum tree_code code = gimple_assign_rhs_code (def_stmt);
if (code == POINTER_PLUS_EXPR)
@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
{
std::swap (rhs1, rhs2);
std::swap (var1, var2);
std::swap (load1_dr_info, load2_dr_info);
}
tree *init = loop_vinfo->scan_map->get (var1);
gcc_assert (init);
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
bool inscan_var_store
= lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable ();
@ -6789,29 +6957,50 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE;
tree vec_oprnd3 = NULL_TREE;
tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
tree dataref_offset = build_int_cst (ref_type, 0);
tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
tree ldataref_ptr = NULL_TREE;
tree orig = NULL_TREE;
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
for (int j = 0; j < ncopies; j++)
{
stmt_vec_info new_stmt_info;
if (j == 0)
{
vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
if (ldataref_ptr == NULL)
vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
orig = vec_oprnd3;
}
else
{
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
if (ldataref_ptr == NULL)
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
if (!inscan_var_store)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
}
if (ldataref_ptr)
{
vec_oprnd2 = make_ssa_name (vectype);
tree data_ref = fold_build2 (MEM_REF, vectype,
unshare_expr (ldataref_ptr),
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
}
tree v = vec_oprnd2;
for (int i = 0; i < units_log2; ++i)
{
@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
new_temp = new_temp2;
}
/* For exclusive scan, perform the perms[i] permutation once
more. */
if (i == 0
&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
&& v == vec_oprnd2)
{
v = new_temp;
--i;
continue;
}
tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, code, v, new_temp);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
tree last_perm_arg = new_temp;
/* For exclusive scan, new_temp computed above is the exclusive scan
prefix sum. Turn it into inclusive prefix sum for the broadcast
of the last element into orig. */
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
{
last_perm_arg = make_ssa_name (vectype);
g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
}
orig = make_ssa_name (vectype);
g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
perms[units_log2]);
g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
last_perm_arg, perms[units_log2]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
if (!inscan_var_store)
{
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
tree data_ref = fold_build2 (MEM_REF, vectype,
unshare_expr (dataref_ptr),
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
g = gimple_build_assign (data_ref, new_temp);
@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (j != 0)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
tree data_ref = fold_build2 (MEM_REF, vectype,
unshare_expr (dataref_ptr),
dataref_offset);
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
gimple *g = gimple_build_assign (data_ref, orig);
@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
}
return true;
}
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))

View File

@ -917,7 +917,7 @@ struct _stmt_vec_info {
bool strided_p;
/* For both loads and stores. */
unsigned simd_lane_access_p : 2;
unsigned simd_lane_access_p : 3;
/* Classifies how the load or store is going to be implemented
for loop vectorization. */