re PR rtl-optimization/84101 (-O3 and -ftree-vectorize trying too hard for function returning trivial pair-of-uint64_t-structure)

2019-04-03 Richard Biener <rguenther@suse.de> PR tree-optimization/84101 * tree-vect-stmts.c: Include explow.h for hard_function_value, regs.h for hard_regno_nregs. (cfun_returns): New helper. (vect_model_store_cost): When vectorizing a store to a decl we return and the function ABI returns in a multi-reg location account for the possible spilling that will happen. * gcc.target/i386/pr84101.c: New testcase. From-SVN: r270123
2019-04-03 12:30:16 +00:00 · 2019-04-03 12:30:16 +00:00 · c51b04ec33
commit c51b04ec33
parent 615792b72e
4 changed files with 100 additions and 0 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,13 @@
+2019-04-03  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/84101
+	* tree-vect-stmts.c: Include explow.h for hard_function_value,
+	regs.h for hard_regno_nregs.
+	(cfun_returns): New helper.
+	(vect_model_store_cost): When vectorizing a store to a decl
+	we return and the function ABI returns in a multi-reg location
+	account for the possible spilling that will happen.
+
 2019-04-03  Andreas Krebbel  <krebbel@linux.ibm.com>

 	* config/s390/s390.c (s390_legitimate_address_p): Reject long
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2019-04-03  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/84101
+	* gcc.target/i386/pr84101.c: New testcase.
+
 2019-04-02  Jeff Law  <law@redhat.com>

 	* gcc.target/visium/bit_shift.c: xfail.
--- a/gcc/testsuite/gcc.target/i386/pr84101.c
+++ b/gcc/testsuite/gcc.target/i386/pr84101.c
@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-slp2-details" } */
+
+typedef struct uint64_pair uint64_pair_t ;
+struct uint64_pair
+{
+  unsigned long w0 ;
+  unsigned long w1 ;
+} ;
+
+uint64_pair_t pair(int num)
+{
+  uint64_pair_t p ;
+
+  p.w0 = num << 1 ;
+  p.w1 = num >> 1 ;
+
+  return p ;
+}
+
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "tree-ssa-loop-manip.h"
 #include "cfgloop.h"
+#include "explow.h"
 #include "tree-ssa-loop.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
@ -52,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "vec-perm-indices.h"
 #include "tree-ssa-loop-niter.h"
 #include "gimple-fold.h"
+#include "regs.h"

 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@ -948,6 +950,37 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
                     "prologue_cost = %d .\n", inside_cost, prologue_cost);
 }

+/* Returns true if the current function returns DECL.  */
+
+static bool
+cfun_returns (tree decl)
+{
+  edge_iterator ei;
+  edge e;
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
+      if (!ret)
+	continue;
+      if (gimple_return_retval (ret) == decl)
+	return true;
+      /* We often end up with an aggregate copy to the result decl,
+         handle that case as well.  First skip intermediate clobbers
+	 though.  */
+      gimple *def = ret;
+      do
+	{
+	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
+	}
+      while (gimple_clobber_p (def));
+      if (is_a <gassign *> (def)
+	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
+	  && gimple_assign_rhs1 (def) == decl)
+	return true;
+    }
+  return false;
+}
+
 /* Function vect_model_store_cost

   Models cost for stores.  In the case of grouped accesses, one access
@ -1032,6 +1065,37 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
 				       vec_to_scalar, stmt_info, 0, vect_body);
    }

+  /* When vectorizing a store into the function result assign
+     a penalty if the function returns in a multi-register location.
+     In this case we assume we'll end up with having to spill the
+     vector result and do piecewise loads as a conservative estimate.  */
+  tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
+  if (base
+      && (TREE_CODE (base) == RESULT_DECL
+	  || (DECL_P (base) && cfun_returns (base)))
+      && !aggregate_value_p (base, cfun->decl))
+    {
+      rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
+      /* ???  Handle PARALLEL in some way.  */
+      if (REG_P (reg))
+	{
+	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
+	  /* Assume that a single reg-reg move is possible and cheap,
+	     do not account for vector to gp register move cost.  */
+	  if (nregs > 1)
+	    {
+	      /* Spill.  */
+	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
+						 vector_store,
+						 stmt_info, 0, vect_epilogue);
+	      /* Loads.  */
+	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
+						 scalar_load,
+						 stmt_info, 0, vect_epilogue);
+	    }
+	}
+    }
+
  if (dump_enabled_p ())
    dump_printf_loc (MSG_NOTE, vect_location,
                     "vect_model_store_cost: inside_cost = %d, "