re PR rtl-optimization/84101 (-O3 and -ftree-vectorize trying too hard for function returning trivial pair-of-uint64_t-structure)

2019-04-03  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/84101
	* tree-vect-stmts.c: Include explow.h for hard_function_value,
	regs.h for hard_regno_nregs.
	(cfun_returns): New helper.
	(vect_model_store_cost): When vectorizing a store to a decl
	we return and the function ABI returns in a multi-reg location
	account for the possible spilling that will happen.

	* gcc.target/i386/pr84101.c: New testcase.

From-SVN: r270123
This commit is contained in:
Richard Biener 2019-04-03 12:30:16 +00:00 committed by Richard Biener
parent 615792b72e
commit c51b04ec33
4 changed files with 100 additions and 0 deletions

View File

@ -1,3 +1,13 @@
2019-04-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/84101
* tree-vect-stmts.c: Include explow.h for hard_function_value,
regs.h for hard_regno_nregs.
(cfun_returns): New helper.
(vect_model_store_cost): When vectorizing a store to a decl
we return and the function ABI returns in a multi-reg location
account for the possible spilling that will happen.
2019-04-03 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390.c (s390_legitimate_address_p): Reject long

View File

@ -1,3 +1,8 @@
2019-04-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/84101
* gcc.target/i386/pr84101.c: New testcase.
2019-04-02 Jeff Law <law@redhat.com>
* gcc.target/visium/bit_shift.c: xfail.

View File

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-slp2-details" } */
typedef struct uint64_pair uint64_pair_t ;
struct uint64_pair
{
unsigned long w0 ;
unsigned long w1 ;
} ;
uint64_pair_t pair(int num)
{
uint64_pair_t p ;
p.w0 = num << 1 ;
p.w1 = num >> 1 ;
return p ;
}
/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */

View File

@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-cfg.h"
#include "tree-ssa-loop-manip.h"
#include "cfgloop.h"
#include "explow.h"
#include "tree-ssa-loop.h"
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
@ -52,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "vec-perm-indices.h"
#include "tree-ssa-loop-niter.h"
#include "gimple-fold.h"
#include "regs.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@ -948,6 +950,37 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
"prologue_cost = %d .\n", inside_cost, prologue_cost);
}
/* Returns true if the current function returns DECL. */
static bool
cfun_returns (tree decl)
{
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
{
greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
if (!ret)
continue;
if (gimple_return_retval (ret) == decl)
return true;
/* We often end up with an aggregate copy to the result decl,
handle that case as well. First skip intermediate clobbers
though. */
gimple *def = ret;
do
{
def = SSA_NAME_DEF_STMT (gimple_vuse (def));
}
while (gimple_clobber_p (def));
if (is_a <gassign *> (def)
&& gimple_assign_lhs (def) == gimple_return_retval (ret)
&& gimple_assign_rhs1 (def) == decl)
return true;
}
return false;
}
/* Function vect_model_store_cost
Models cost for stores. In the case of grouped accesses, one access
@ -1032,6 +1065,37 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
vec_to_scalar, stmt_info, 0, vect_body);
}
/* When vectorizing a store into the function result assign
a penalty if the function returns in a multi-register location.
In this case we assume we'll end up with having to spill the
vector result and do piecewise loads as a conservative estimate. */
tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
if (base
&& (TREE_CODE (base) == RESULT_DECL
|| (DECL_P (base) && cfun_returns (base)))
&& !aggregate_value_p (base, cfun->decl))
{
rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
/* ??? Handle PARALLEL in some way. */
if (REG_P (reg))
{
int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
/* Assume that a single reg-reg move is possible and cheap,
do not account for vector to gp register move cost. */
if (nregs > 1)
{
/* Spill. */
prologue_cost += record_stmt_cost (cost_vec, ncopies,
vector_store,
stmt_info, 0, vect_epilogue);
/* Loads. */
prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
scalar_load,
stmt_info, 0, vect_epilogue);
}
}
}
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_store_cost: inside_cost = %d, "