Add FMADDSUB and FMSUBADD SLP vectorization patterns and optabs
This adds named expanders for vec_fmaddsub<mode>4 and vec_fmsubadd<mode>4 which map to x86 vfmaddsubXXXp{ds} and vfmsubaddXXXp{ds} instructions. This complements the previous addition of ADDSUB support. x86 lacks SUBADD and the negate variants of FMA with mixed plus minus so I did not add optabs or patterns for those but it would not be difficult if there's a target that has them. 2021-07-05 Richard Biener <rguenther@suse.de> * doc/md.texi (vec_fmaddsub<mode>4): Document. (vec_fmsubadd<mode>4): Likewise. * optabs.def (vec_fmaddsub$a4): Add. (vec_fmsubadd$a4): Likewise. * internal-fn.def (IFN_VEC_FMADDSUB): Add. (IFN_VEC_FMSUBADD): Likewise. * tree-vect-slp-patterns.c (addsub_pattern::recognize): Refactor to handle IFN_VEC_FMADDSUB and IFN_VEC_FMSUBADD. (addsub_pattern::build): Likewise. * tree-vect-slp.c (vect_optimize_slp): CFN_VEC_FMADDSUB and CFN_VEC_FMSUBADD are not transparent for permutes. * config/i386/sse.md (vec_fmaddsub<mode>4): New expander. (vec_fmsubadd<mode>4): Likewise. * gcc.target/i386/vect-fmaddsubXXXpd.c: New testcase. * gcc.target/i386/vect-fmaddsubXXXps.c: Likewise. * gcc.target/i386/vect-fmsubaddXXXpd.c: Likewise. * gcc.target/i386/vect-fmsubaddXXXps.c: Likewise.
This commit is contained in:
parent
9f489a5731
commit
7d810646d4
@ -4644,6 +4644,25 @@
|
||||
;;
|
||||
;; But this doesn't seem useful in practice.
|
||||
|
||||
(define_expand "vec_fmaddsub<mode>4"
|
||||
[(set (match_operand:VF 0 "register_operand")
|
||||
(unspec:VF
|
||||
[(match_operand:VF 1 "nonimmediate_operand")
|
||||
(match_operand:VF 2 "nonimmediate_operand")
|
||||
(match_operand:VF 3 "nonimmediate_operand")]
|
||||
UNSPEC_FMADDSUB))]
|
||||
"TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
|
||||
|
||||
(define_expand "vec_fmsubadd<mode>4"
|
||||
[(set (match_operand:VF 0 "register_operand")
|
||||
(unspec:VF
|
||||
[(match_operand:VF 1 "nonimmediate_operand")
|
||||
(match_operand:VF 2 "nonimmediate_operand")
|
||||
(neg:VF
|
||||
(match_operand:VF 3 "nonimmediate_operand"))]
|
||||
UNSPEC_FMADDSUB))]
|
||||
"TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
|
||||
|
||||
(define_expand "fmaddsub_<mode>"
|
||||
[(set (match_operand:VF 0 "register_operand")
|
||||
(unspec:VF
|
||||
|
@ -5688,6 +5688,20 @@ Alternating subtract, add with even lanes doing subtract and odd
|
||||
lanes doing addition. Operands 1 and 2 and the outout operand are vectors
|
||||
with mode @var{m}.
|
||||
|
||||
@cindex @code{vec_fmaddsub@var{m}4} instruction pattern
|
||||
@item @samp{vec_fmaddsub@var{m}4}
|
||||
Alternating multiply subtract, add with even lanes doing subtract and odd
|
||||
lanes doing addition of the third operand to the multiplication result
|
||||
of the first two operands. Operands 1, 2 and 3 and the outout operand are vectors
|
||||
with mode @var{m}.
|
||||
|
||||
@cindex @code{vec_fmsubadd@var{m}4} instruction pattern
|
||||
@item @samp{vec_fmsubadd@var{m}4}
|
||||
Alternating multiply add, subtract with even lanes doing addition and odd
|
||||
lanes doing subtraction of the third operand to the multiplication result
|
||||
of the first two operands. Operands 1, 2 and 3 and the outout operand are vectors
|
||||
with mode @var{m}.
|
||||
|
||||
These instructions are not allowed to @code{FAIL}.
|
||||
|
||||
@cindex @code{mulhisi3} instruction pattern
|
||||
|
@ -282,7 +282,8 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
|
||||
DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
|
||||
DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary)
|
||||
DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
|
||||
DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
|
||||
|
||||
/* FP scales. */
|
||||
DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
|
||||
|
@ -408,6 +408,8 @@ OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a")
|
||||
OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a")
|
||||
OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a")
|
||||
OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
|
||||
OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
|
||||
OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
|
||||
|
||||
OPTAB_D (sync_add_optab, "sync_add$I$a")
|
||||
OPTAB_D (sync_and_optab, "sync_and$I$a")
|
||||
|
34
gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXpd.c
Normal file
34
gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXpd.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target fma } */
|
||||
/* { dg-options "-O3 -mfma -save-temps" } */
|
||||
|
||||
#include "fma-check.h"
|
||||
|
||||
void __attribute__((noipa))
|
||||
check_fmaddsub (double * __restrict a, double *b, double *c, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
a[2*i + 0] = b[2*i + 0] * c[2*i + 0] - a[2*i + 0];
|
||||
a[2*i + 1] = b[2*i + 1] * c[2*i + 1] + a[2*i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fma_test (void)
|
||||
{
|
||||
double a[4], b[4], c[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = 3*i;
|
||||
c[i] = 7*i;
|
||||
}
|
||||
check_fmaddsub (a, b, c, 2);
|
||||
const double d[4] = { 0., 22., 82., 192. };
|
||||
for (int i = 0; i < 4; ++i)
|
||||
if (a[i] != d[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "fmaddsub...pd" } } */
|
34
gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXps.c
Normal file
34
gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXps.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target fma } */
|
||||
/* { dg-options "-O3 -mfma -save-temps" } */
|
||||
|
||||
#include "fma-check.h"
|
||||
|
||||
void __attribute__((noipa))
|
||||
check_fmaddsub (float * __restrict a, float *b, float *c, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
a[2*i + 0] = b[2*i + 0] * c[2*i + 0] - a[2*i + 0];
|
||||
a[2*i + 1] = b[2*i + 1] * c[2*i + 1] + a[2*i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fma_test (void)
|
||||
{
|
||||
float a[4], b[4], c[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = 3*i;
|
||||
c[i] = 7*i;
|
||||
}
|
||||
check_fmaddsub (a, b, c, 2);
|
||||
const float d[4] = { 0., 22., 82., 192. };
|
||||
for (int i = 0; i < 4; ++i)
|
||||
if (a[i] != d[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "fmaddsub...ps" } } */
|
34
gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXpd.c
Normal file
34
gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXpd.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target fma } */
|
||||
/* { dg-options "-O3 -mfma -save-temps" } */
|
||||
|
||||
#include "fma-check.h"
|
||||
|
||||
void __attribute__((noipa))
|
||||
check_fmsubadd (double * __restrict a, double *b, double *c, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
a[2*i + 0] = b[2*i + 0] * c[2*i + 0] + a[2*i + 0];
|
||||
a[2*i + 1] = b[2*i + 1] * c[2*i + 1] - a[2*i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fma_test (void)
|
||||
{
|
||||
double a[4], b[4], c[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = 3*i;
|
||||
c[i] = 7*i;
|
||||
}
|
||||
check_fmsubadd (a, b, c, 2);
|
||||
const double d[4] = { 0., 20., 86., 186. };
|
||||
for (int i = 0; i < 4; ++i)
|
||||
if (a[i] != d[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "fmsubadd...pd" } } */
|
34
gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXps.c
Normal file
34
gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXps.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target fma } */
|
||||
/* { dg-options "-O3 -mfma -save-temps" } */
|
||||
|
||||
#include "fma-check.h"
|
||||
|
||||
void __attribute__((noipa))
|
||||
check_fmsubadd (float * __restrict a, float *b, float *c, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
a[2*i + 0] = b[2*i + 0] * c[2*i + 0] + a[2*i + 0];
|
||||
a[2*i + 1] = b[2*i + 1] * c[2*i + 1] - a[2*i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fma_test (void)
|
||||
{
|
||||
float a[4], b[4], c[4];
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = 3*i;
|
||||
c[i] = 7*i;
|
||||
}
|
||||
check_fmsubadd (a, b, c, 2);
|
||||
const float d[4] = { 0., 20., 86., 186. };
|
||||
for (int i = 0; i < 4; ++i)
|
||||
if (a[i] != d[i])
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "fmsubadd...ps" } } */
|
@ -1496,8 +1496,8 @@ complex_operations_pattern::build (vec_info * /* vinfo */)
|
||||
class addsub_pattern : public vect_pattern
|
||||
{
|
||||
public:
|
||||
addsub_pattern (slp_tree *node)
|
||||
: vect_pattern (node, NULL, IFN_VEC_ADDSUB) {};
|
||||
addsub_pattern (slp_tree *node, internal_fn ifn)
|
||||
: vect_pattern (node, NULL, ifn) {};
|
||||
|
||||
void build (vec_info *);
|
||||
|
||||
@ -1510,46 +1510,68 @@ addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
|
||||
{
|
||||
slp_tree node = *node_;
|
||||
if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
|
||||
|| SLP_TREE_CHILDREN (node).length () != 2)
|
||||
|| SLP_TREE_CHILDREN (node).length () != 2
|
||||
|| SLP_TREE_LANE_PERMUTATION (node).length () % 2)
|
||||
return NULL;
|
||||
|
||||
/* Match a blend of a plus and a minus op with the same number of plus and
|
||||
minus lanes on the same operands. */
|
||||
slp_tree sub = SLP_TREE_CHILDREN (node)[0];
|
||||
slp_tree add = SLP_TREE_CHILDREN (node)[1];
|
||||
bool swapped_p = false;
|
||||
if (vect_match_expression_p (sub, PLUS_EXPR))
|
||||
{
|
||||
std::swap (add, sub);
|
||||
swapped_p = true;
|
||||
}
|
||||
if (!(vect_match_expression_p (add, PLUS_EXPR)
|
||||
&& vect_match_expression_p (sub, MINUS_EXPR)))
|
||||
unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
|
||||
unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
|
||||
if (l0 == l1)
|
||||
return NULL;
|
||||
if (!((SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[0]
|
||||
&& SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[1])
|
||||
|| (SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[1]
|
||||
&& SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[0])))
|
||||
bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
|
||||
PLUS_EXPR);
|
||||
if (!l0add_p
|
||||
&& !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
|
||||
return NULL;
|
||||
bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
|
||||
PLUS_EXPR);
|
||||
if (!l1add_p
|
||||
&& !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
|
||||
return NULL;
|
||||
|
||||
slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
|
||||
slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
|
||||
if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
|
||||
&& SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
|
||||
|| (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
|
||||
&& SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
|
||||
return NULL;
|
||||
|
||||
for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
|
||||
{
|
||||
std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
|
||||
if (swapped_p)
|
||||
perm.first = perm.first == 0 ? 1 : 0;
|
||||
/* It has to be alternating -, +, -, ...
|
||||
/* It has to be alternating -, +, -,
|
||||
While we could permute the .ADDSUB inputs and the .ADDSUB output
|
||||
that's only profitable over the add + sub + blend if at least
|
||||
one of the permute is optimized which we can't determine here. */
|
||||
if (perm.first != (i & 1)
|
||||
if (perm.first != ((i & 1) ? l1 : l0)
|
||||
|| perm.second != i)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
|
||||
return NULL;
|
||||
/* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
|
||||
(l0add_p), see whether we have FMA variants. */
|
||||
if (!l0add_p
|
||||
&& vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
|
||||
{
|
||||
/* (c * d) -+ a */
|
||||
if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
|
||||
return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
|
||||
}
|
||||
else if (l0add_p
|
||||
&& vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
|
||||
{
|
||||
/* (c * d) +- a */
|
||||
if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
|
||||
return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
|
||||
}
|
||||
|
||||
return new addsub_pattern (node_);
|
||||
if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
|
||||
return new addsub_pattern (node_, IFN_VEC_ADDSUB);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
@ -1557,38 +1579,96 @@ addsub_pattern::build (vec_info *vinfo)
|
||||
{
|
||||
slp_tree node = *m_node;
|
||||
|
||||
slp_tree sub = SLP_TREE_CHILDREN (node)[0];
|
||||
slp_tree add = SLP_TREE_CHILDREN (node)[1];
|
||||
if (vect_match_expression_p (sub, PLUS_EXPR))
|
||||
std::swap (add, sub);
|
||||
unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
|
||||
unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
|
||||
|
||||
/* Modify the blend node in-place. */
|
||||
SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
|
||||
SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
|
||||
switch (m_ifn)
|
||||
{
|
||||
case IFN_VEC_ADDSUB:
|
||||
{
|
||||
slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
|
||||
slp_tree add = SLP_TREE_CHILDREN (node)[l1];
|
||||
|
||||
/* Build IFN_VEC_ADDSUB from the sub representative operands. */
|
||||
stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
|
||||
gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
|
||||
gimple_assign_rhs1 (rep->stmt),
|
||||
gimple_assign_rhs2 (rep->stmt));
|
||||
gimple_call_set_lhs (call, make_ssa_name
|
||||
(TREE_TYPE (gimple_assign_lhs (rep->stmt))));
|
||||
gimple_call_set_nothrow (call, true);
|
||||
gimple_set_bb (call, gimple_bb (rep->stmt));
|
||||
stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
|
||||
SLP_TREE_REPRESENTATIVE (node) = new_rep;
|
||||
STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
|
||||
STMT_SLP_TYPE (new_rep) = pure_slp;
|
||||
STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
|
||||
STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
|
||||
STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
|
||||
SLP_TREE_CODE (node) = ERROR_MARK;
|
||||
SLP_TREE_LANE_PERMUTATION (node).release ();
|
||||
/* Modify the blend node in-place. */
|
||||
SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
|
||||
SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
|
||||
|
||||
vect_free_slp_tree (sub);
|
||||
vect_free_slp_tree (add);
|
||||
/* Build IFN_VEC_ADDSUB from the sub representative operands. */
|
||||
stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
|
||||
gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
|
||||
gimple_assign_rhs1 (rep->stmt),
|
||||
gimple_assign_rhs2 (rep->stmt));
|
||||
gimple_call_set_lhs (call, make_ssa_name
|
||||
(TREE_TYPE (gimple_assign_lhs (rep->stmt))));
|
||||
gimple_call_set_nothrow (call, true);
|
||||
gimple_set_bb (call, gimple_bb (rep->stmt));
|
||||
stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
|
||||
SLP_TREE_REPRESENTATIVE (node) = new_rep;
|
||||
STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
|
||||
STMT_SLP_TYPE (new_rep) = pure_slp;
|
||||
STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
|
||||
STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
|
||||
STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
|
||||
SLP_TREE_CODE (node) = ERROR_MARK;
|
||||
SLP_TREE_LANE_PERMUTATION (node).release ();
|
||||
|
||||
vect_free_slp_tree (sub);
|
||||
vect_free_slp_tree (add);
|
||||
break;
|
||||
}
|
||||
case IFN_VEC_FMADDSUB:
|
||||
case IFN_VEC_FMSUBADD:
|
||||
{
|
||||
slp_tree sub, add;
|
||||
if (m_ifn == IFN_VEC_FMADDSUB)
|
||||
{
|
||||
sub = SLP_TREE_CHILDREN (node)[l0];
|
||||
add = SLP_TREE_CHILDREN (node)[l1];
|
||||
}
|
||||
else /* m_ifn == IFN_VEC_FMSUBADD */
|
||||
{
|
||||
sub = SLP_TREE_CHILDREN (node)[l1];
|
||||
add = SLP_TREE_CHILDREN (node)[l0];
|
||||
}
|
||||
slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
|
||||
/* Modify the blend node in-place. */
|
||||
SLP_TREE_CHILDREN (node).safe_grow (3, true);
|
||||
SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
|
||||
SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
|
||||
SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
|
||||
SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
|
||||
|
||||
/* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
|
||||
stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
|
||||
stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
|
||||
gcall *call = gimple_build_call_internal (m_ifn, 3,
|
||||
gimple_assign_rhs1 (mrep->stmt),
|
||||
gimple_assign_rhs2 (mrep->stmt),
|
||||
gimple_assign_rhs2 (srep->stmt));
|
||||
gimple_call_set_lhs (call, make_ssa_name
|
||||
(TREE_TYPE (gimple_assign_lhs (srep->stmt))));
|
||||
gimple_call_set_nothrow (call, true);
|
||||
gimple_set_bb (call, gimple_bb (srep->stmt));
|
||||
stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
|
||||
SLP_TREE_REPRESENTATIVE (node) = new_rep;
|
||||
STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
|
||||
STMT_SLP_TYPE (new_rep) = pure_slp;
|
||||
STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
|
||||
STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
|
||||
STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
|
||||
SLP_TREE_CODE (node) = ERROR_MARK;
|
||||
SLP_TREE_LANE_PERMUTATION (node).release ();
|
||||
|
||||
vect_free_slp_tree (sub);
|
||||
vect_free_slp_tree (add);
|
||||
break;
|
||||
}
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
|
@ -3728,6 +3728,8 @@ vect_optimize_slp (vec_info *vinfo)
|
||||
case CFN_COMPLEX_MUL:
|
||||
case CFN_COMPLEX_MUL_CONJ:
|
||||
case CFN_VEC_ADDSUB:
|
||||
case CFN_VEC_FMADDSUB:
|
||||
case CFN_VEC_FMSUBADD:
|
||||
vertices[idx].perm_in = 0;
|
||||
vertices[idx].perm_out = 0;
|
||||
default:;
|
||||
|
Loading…
Reference in New Issue
Block a user