slp: Don't traverse tree on (nil) nodes.
The given testcase shows that one of the children of the complex MUL contains a PHI node. This results in the vectorizer having a child that's (nil). The pattern matcher handles this correctly, but optimize_load_redistribution_1 needs to not traverse/inspect the NULL nodes. This however does high-light a missed opportunity. This testcase seems to result in a different canonicalization than normally. Normally the expressions are right leaning. But sometimes, especially when type casts are introduced the trees suddenly become left leaning. For instance this testcase (even without type casts) won't detect the FMA form because the addition gets the MUL node in the left and not right node as it expects. Checking all forms would be quite expensive so for GCC 12 it probably makes sense to make forms with type casts in them have the same form as those without? gcc/ChangeLog: * tree-vect-slp.c (optimize_load_redistribution_1): Abort on NULL nodes. gcc/testsuite/ChangeLog: * g++.dg/vect/simd-complex-num-null-node.cc: New test.
This commit is contained in:
parent
557a0d3b1b
commit
28af4b9a7c
78
gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
Normal file
78
gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
Normal file
@ -0,0 +1,78 @@
|
||||
/* { dg-do compile { target { aarch64-*-* } } } */
|
||||
/* { dg-skip-if "incorrect syntax for c++98" { *-*-* } { "-std=c++98" } { "" } } */
|
||||
/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-vect-all" } */
|
||||
/* { dg-require-effective-target le } */
|
||||
|
||||
typedef struct {
|
||||
float b;
|
||||
float c;
|
||||
} d;
|
||||
namespace {
|
||||
typedef int e;
|
||||
template <typename, typename> struct f;
|
||||
template <template <typename> class g, typename h, typename k, typename... l>
|
||||
struct f<g<k, l...>, h> {
|
||||
using m = g<h>;
|
||||
};
|
||||
} // namespace
|
||||
namespace aa {
|
||||
template <typename k> class o {
|
||||
public:
|
||||
typedef k p;
|
||||
};
|
||||
} // namespace aa
|
||||
namespace ab {
|
||||
template <typename k> using r = aa::o<k>;
|
||||
template <typename k> class ac : public r<k> {};
|
||||
struct s {
|
||||
template <typename k, typename h> struct ad : f<k, h> {};
|
||||
};
|
||||
template <typename t, typename h> using ae = typename s::ad<t, h>::m;
|
||||
template <typename t> struct af {
|
||||
typedef typename t::p p;
|
||||
template <typename k> using u = ae<t, k>;
|
||||
};
|
||||
} // namespace ab
|
||||
namespace aa {
|
||||
template <typename t> struct ag {
|
||||
typedef ab::af<t> v;
|
||||
typedef typename v::p &ah;
|
||||
template <typename k> struct ai { typedef typename v::u<k> aj; };
|
||||
};
|
||||
} // namespace aa
|
||||
namespace ab {
|
||||
template <typename k, typename t> struct w {
|
||||
typedef typename aa::ag<t>::ai<k>::aj x;
|
||||
struct y {};
|
||||
typedef t ak;
|
||||
w(e, ak);
|
||||
y a;
|
||||
};
|
||||
template <typename k, typename t = ac<k>> class al : w<k, t> {
|
||||
typedef w<k, t> am;
|
||||
typedef typename am::x x;
|
||||
typedef aa::ag<x> an;
|
||||
|
||||
public:
|
||||
typedef typename an::ah ah;
|
||||
typedef e ao;
|
||||
typedef t ak;
|
||||
al(ao ap, ak aq = ak()) : am(ar(ap, aq), aq) {}
|
||||
ah operator[](ao);
|
||||
ao ar(ao ap, ak) { return ap; }
|
||||
};
|
||||
} // namespace ab
|
||||
void as(int n, d *a, d *q) {
|
||||
ab::al<d> z(n);
|
||||
d acc;
|
||||
for (int j = 0; j < n; ++j) {
|
||||
auto at = a[j];
|
||||
auto au = q[j];
|
||||
acc.b += at.b * au.b - at.c * au.c;
|
||||
acc.c += at.b * au.c + at.c * au.b;
|
||||
}
|
||||
z[0] = acc;
|
||||
}
|
||||
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "vect" } } */
|
@ -2298,7 +2298,7 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
|
||||
unsigned i;
|
||||
|
||||
/* For now, we don't know anything about externals so do not do anything. */
|
||||
if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
|
||||
if (!root || SLP_TREE_DEF_TYPE (root) != vect_internal_def)
|
||||
return NULL;
|
||||
else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user