aarch64: Move more code into aarch64_vector_costs

This patch moves more code into aarch64_vector_costs and reuses some of the information that is now available in the base class. I'm planing to significantly rework this code, with more hooks into the vectoriser, but this seemed worth doing as a first step. gcc/ * config/aarch64/aarch64.c (aarch64_vector_costs): Make member variables private and add "m_" to their names. Remove is_loop. (aarch64_record_potential_advsimd_unrolling): Replace with... (aarch64_vector_costs::record_potential_advsimd_unrolling): ...this. (aarch64_analyze_loop_vinfo): Replace with... (aarch64_vector_costs::analyze_loop_vinfo): ...this. Move initialization of (m_)vec_flags to add_stmt_cost. (aarch64_analyze_bb_vinfo): Delete. (aarch64_count_ops): Replace with... (aarch64_vector_costs::count_ops): ...this. (aarch64_vector_costs::add_stmt_cost): Set m_vec_flags, using m_costing_for_scalar to test whether we're costing scalar or vector code. (aarch64_adjust_body_cost_sve): Replace with... (aarch64_vector_costs::adjust_body_cost_sve): ...this. (aarch64_adjust_body_cost): Replace with... (aarch64_vector_costs::adjust_body_cost): ...this. (aarch64_vector_costs::finish_cost): Use m_vinfo instead of is_loop.
2021-11-04 12:31:17 +00:00 · 2021-11-04 12:31:17 +00:00 · d43fc1df73
parent 6239dd0512
commit d43fc1df73
1 changed files with 154 additions and 183 deletions
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@ -14589,8 +14589,9 @@ struct aarch64_sve_op_count : aarch64_vec_op_count
 };
 /* Information about vector code that we're in the process of costing.  */
-struct aarch64_vector_costs : public vector_costs
+class aarch64_vector_costs : public vector_costs
 {
 public:
  using vector_costs::vector_costs;
  unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
@ -14599,26 +14600,31 @@ struct aarch64_vector_costs : public vector_costs
 			      vect_cost_model_location where) override;
  void finish_cost () override;
-  /* True if we have performed one-time initialization based on the vec_info.
+private:
  void record_potential_advsimd_unrolling (loop_vec_info);
  void analyze_loop_vinfo (loop_vec_info);
  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, tree,
 		  unsigned int, aarch64_vec_op_count *,
 		  const aarch64_base_vec_issue_info *, unsigned int);
  fractional_cost adjust_body_cost_sve (const aarch64_vec_issue_info *,
 					fractional_cost, fractional_cost,
 					bool, unsigned int, unsigned int *,
 					bool *);
  unsigned int adjust_body_cost (unsigned int);
-     This variable exists because the vec_info is not passed to the
+  /* True if we have performed one-time initialization based on the
-     init_cost hook.  We therefore have to defer initialization based on
+     vec_info.  */
-     it till later.  */
+  bool m_analyzed_vinfo = false;
  bool analyzed_vinfo = false;
  /* True if we're costing a vector loop, false if we're costing block-level
     vectorization.  */
  bool is_loop = false;
  /* True if we've seen an SVE operation that we cannot currently vectorize
     using Advanced SIMD.  */
-  bool saw_sve_only_op = false;
+  bool m_saw_sve_only_op = false;
-  /* - If VEC_FLAGS is zero then we're costing the original scalar code.
+  /* - If M_VEC_FLAGS is zero then we're costing the original scalar code.
-     - If VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
+     - If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
       SIMD code.
-     - If VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
+     - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
-  unsigned int vec_flags = 0;
+  unsigned int m_vec_flags = 0;
  /* On some CPUs, SVE and Advanced SIMD provide the same theoretical vector
     throughput, such as 4x128 Advanced SIMD vs. 2x256 SVE.  In those
@ -14628,39 +14634,39 @@ struct aarch64_vector_costs : public vector_costs
     than length-agnostic SVE, since the SVE loop would execute an unknown
     number of times and so could not be completely unrolled in the same way.
-     If we're applying this heuristic, UNROLLED_ADVSIMD_NITERS is the
+     If we're applying this heuristic, M_UNROLLED_ADVSIMD_NITERS is the
     number of Advanced SIMD loop iterations that would be unrolled and
-     UNROLLED_ADVSIMD_STMTS estimates the total number of statements
+     M_UNROLLED_ADVSIMD_STMTS estimates the total number of statements
     in the unrolled loop.  Both values are zero if we're not applying
     the heuristic.  */
-  unsigned HOST_WIDE_INT unrolled_advsimd_niters = 0;
+  unsigned HOST_WIDE_INT m_unrolled_advsimd_niters = 0;
-  unsigned HOST_WIDE_INT unrolled_advsimd_stmts = 0;
+  unsigned HOST_WIDE_INT m_unrolled_advsimd_stmts = 0;
  /* If we're vectorizing a loop that executes a constant number of times,
     this variable gives the number of times that the vector loop would
     iterate, otherwise it is zero.  */
-  uint64_t num_vector_iterations = 0;
+  uint64_t m_num_vector_iterations = 0;
  /* Used only when vectorizing loops.  Estimates the number and kind of scalar
     operations that would be needed to perform the same work as one iteration
     of the vector loop.  */
-  aarch64_vec_op_count scalar_ops;
+  aarch64_vec_op_count m_scalar_ops;
-  /* Used only when vectorizing loops.  If VEC_FLAGS & VEC_ADVSIMD,
+  /* Used only when vectorizing loops.  If M_VEC_FLAGS & VEC_ADVSIMD,
     this structure estimates the number and kind of operations that the
-     vector loop would contain.  If VEC_FLAGS & VEC_SVE, the structure
+     vector loop would contain.  If M_VEC_FLAGS & VEC_SVE, the structure
     estimates what the equivalent Advanced SIMD-only code would need in
     order to perform the same work as one iteration of the SVE loop.  */
-  aarch64_vec_op_count advsimd_ops;
+  aarch64_vec_op_count m_advsimd_ops;
  /* Used only when vectorizing loops with SVE.  It estimates the number and
     kind of operations that the SVE loop would contain.  */
-  aarch64_sve_op_count sve_ops;
+  aarch64_sve_op_count m_sve_ops;
  /* Used to detect cases in which we end up costing the same load twice,
     once to account for results that are actually used and once to account
     for unused results.  */
-  hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> seen_loads;
+  hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> m_seen_loads;
 };
 /* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
@ -14703,12 +14709,11 @@ aarch64_simd_vec_costs_for_flags (unsigned int flags)
 }
 /* Decide whether to use the unrolling heuristic described above
-   aarch64_vector_costs::unrolled_advsimd_niters, updating that
+   m_unrolled_advsimd_niters, updating that field if so.  LOOP_VINFO
-   field if so.  LOOP_VINFO describes the loop that we're vectorizing
+   describes the loop that we're vectorizing.  */
-   and COSTS are the costs that we're calculating for it.  */
+void
-static void
+aarch64_vector_costs::
-aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
+record_potential_advsimd_unrolling (loop_vec_info loop_vinfo)
 					    aarch64_vector_costs *costs)
 {
  /* The heuristic only makes sense on targets that have the same
     vector throughput for SVE and Advanced SIMD.  */
@ -14718,7 +14723,7 @@ aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
  /* We only want to apply the heuristic if LOOP_VINFO is being
     vectorized for SVE.  */
-  if (!(costs->vec_flags & VEC_ANY_SVE))
+  if (!(m_vec_flags & VEC_ANY_SVE))
    return;
  /* Check whether it is possible in principle to use Advanced SIMD
@ -14751,17 +14756,14 @@ aarch64_record_potential_advsimd_unrolling (loop_vec_info loop_vinfo,
  /* Record that we're applying the heuristic and should try to estimate
     the number of statements in the Advanced SIMD loop.  */
-  costs->unrolled_advsimd_niters = unrolled_advsimd_niters;
+  m_unrolled_advsimd_niters = unrolled_advsimd_niters;
 }
-/* Do one-time initialization of COSTS given that we're costing the loop
+/* Do one-time initialization of the aarch64_vector_costs given that we're
-   vectorization described by LOOP_VINFO.  */
+   costing the loop vectorization described by LOOP_VINFO.  */
-static void
+void
-aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
+aarch64_vector_costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
 			    aarch64_vector_costs *costs)
 {
  costs->is_loop = true;
  /* Record the number of times that the vector loop would execute,
     if known.  */
  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@ -14770,26 +14772,14 @@ aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
    {
      unsigned int vf = vect_vf_for_cost (loop_vinfo);
      if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
-	costs->num_vector_iterations = scalar_niters / vf;
+	m_num_vector_iterations = scalar_niters / vf;
      else
-	costs->num_vector_iterations = CEIL (scalar_niters, vf);
+	m_num_vector_iterations = CEIL (scalar_niters, vf);
    }
-  /* Detect whether we're costing the scalar code or the vector code.
+  /* Detect whether we're vectorizing for SVE and should apply the unrolling
-     This is a bit hacky: it would be better if the vectorizer told
+     heuristic described above m_unrolled_advsimd_niters.  */
-     us directly.
+  record_potential_advsimd_unrolling (loop_vinfo);
     If we're costing the vector code, record whether we're vectorizing
     for Advanced SIMD or SVE.  */
  if (costs == LOOP_VINFO_TARGET_COST_DATA (loop_vinfo))
    costs->vec_flags = aarch64_classify_vector_mode (loop_vinfo->vector_mode);
  else
    costs->vec_flags = 0;
  /* Detect whether we're vectorizing for SVE and should
     apply the unrolling heuristic described above
     aarch64_vector_costs::unrolled_advsimd_niters.  */
  aarch64_record_potential_advsimd_unrolling (loop_vinfo, costs);
  /* Record the issue information for any SVE WHILE instructions that the
     loop needs.  */
@ -14804,21 +14794,10 @@ aarch64_analyze_loop_vinfo (loop_vec_info loop_vinfo,
      FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
 	if (rgm->type)
 	  num_masks += num_vectors_m1 + 1;
-      costs->sve_ops.pred_ops += num_masks * issue_info->sve->while_pred_ops;
+      m_sve_ops.pred_ops += num_masks * issue_info->sve->while_pred_ops;
    }
 }
 /* Do one-time initialization of COSTS given that we're costing the block
   vectorization described by BB_VINFO.  */
 static void
 aarch64_analyze_bb_vinfo (bb_vec_info bb_vinfo, aarch64_vector_costs *costs)
 {
  /* Unfortunately, there's no easy way of telling whether we're costing
     the vector code or the scalar code, so just assume that we're costing
     the vector code.  */
  costs->vec_flags = aarch64_classify_vector_mode (bb_vinfo->vector_mode);
 }
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 static int
 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
@ -15352,30 +15331,30 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
  return stmt_cost;
 }
-/* VINFO, COSTS, COUNT, KIND, STMT_INFO and VECTYPE are the same as for
+/* COUNT, KIND, STMT_INFO and VECTYPE are the same as for
   vector_costs::add_stmt_cost and they describe an operation in the
   body of a vector loop.  Record issue information relating to the vector
-   operation in OPS, where OPS is one of COSTS->scalar_ops, COSTS->advsimd_ops
+   operation in OPS, where OPS is one of m_scalar_ops, m_advsimd_ops
-   or COSTS->sve_ops; see the comments above those variables for details.
+   or m_sve_ops; see the comments above those variables for details.
   In addition:
-   - VEC_FLAGS is zero if OPS is COSTS->scalar_ops.
+   - VEC_FLAGS is zero if OPS is m_scalar_ops.
-   - VEC_FLAGS & VEC_ADVSIMD is nonzero if OPS is COSTS->advsimd_ops.
+   - VEC_FLAGS & VEC_ADVSIMD is nonzero if OPS is m_advsimd_ops.
-   - VEC_FLAGS & VEC_ANY_SVE is nonzero if OPS is COSTS->sve_ops.
+   - VEC_FLAGS & VEC_ANY_SVE is nonzero if OPS is m_sve_ops.
   ISSUE_INFO provides the scalar, Advanced SIMD or SVE issue information
   associated with OPS and VEC_FLAGS.  FACTOR says how many iterations of
   the loop described by VEC_FLAGS would be needed to match one iteration
   of the vector loop in VINFO.  */
-static void
+void
-aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
+aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
-		   unsigned int count, enum vect_cost_for_stmt kind,
+				 stmt_vec_info stmt_info, tree vectype,
-		   _stmt_vec_info *stmt_info, tree vectype,
+				 unsigned int vec_flags,
-		   unsigned int vec_flags, aarch64_vec_op_count *ops,
+				 aarch64_vec_op_count *ops,
-		   const aarch64_base_vec_issue_info *issue_info,
+				 const aarch64_base_vec_issue_info *issue_info,
-		   unsigned int factor)
+				 unsigned int factor)
 {
  if (!issue_info)
    return;
@ -15394,9 +15373,9 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
      && vect_is_reduction (stmt_info))
    {
      unsigned int base
-	= aarch64_in_loop_reduction_latency (vinfo, stmt_info, vectype,
+	= aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, vectype,
 					     vec_flags);
-      if (vect_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION)
+      if (vect_reduc_type (m_vinfo, stmt_info) == FOLD_LEFT_REDUCTION)
 	{
 	  if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
 	    {
@ -15423,7 +15402,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
    }
  /* Assume that multiply-adds will become a single operation.  */
-  if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info, vec_flags))
+  if (stmt_info && aarch64_multiply_add_p (m_vinfo, stmt_info, vec_flags))
    return;
  /* When costing scalar statements in vector code, the count already
@ -15473,7 +15452,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
 	{
 	  bool existed = false;
 	  unsigned int &prev_count
-	    = costs->seen_loads.get_or_insert (stmt_info, &existed);
+	    = m_seen_loads.get_or_insert (stmt_info, &existed);
 	  if (existed)
 	    num_copies -= prev_count;
 	  else
@ -15504,7 +15483,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
     have only accounted for one.  */
  if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar))
    {
-      int reduc_type = vect_reduc_type (vinfo, stmt_info);
+      int reduc_type = vect_reduc_type (m_vinfo, stmt_info);
      if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD))
 	  || reduc_type == COND_REDUCTION)
 	ops->general_ops += num_copies;
@ -15517,7 +15496,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
 	unsigned int base = (FLOAT_TYPE_P (type)
 			     ? sve_issue->fp_cmp_pred_ops
 			     : sve_issue->int_cmp_pred_ops);
-	costs->sve_ops.pred_ops += base * num_copies;
+	m_sve_ops.pred_ops += base * num_copies;
      }
  /* Add any extra overhead associated with LD[234] and ST[234] operations.  */
@ -15543,8 +15522,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
    {
      unsigned int pairs = CEIL (count, 2);
-      costs->sve_ops.pred_ops
+      m_sve_ops.pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
 	+= sve_issue->gather_scatter_pair_pred_ops * pairs;
      ops->general_ops += sve_issue->gather_scatter_pair_general_ops * pairs;
    }
 }
@ -15564,14 +15542,17 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
  /* Do one-time initialization based on the vinfo.  */
  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
-  bb_vec_info bb_vinfo = dyn_cast<bb_vec_info> (m_vinfo);
+  if (!m_analyzed_vinfo && aarch64_use_new_vector_costs_p ())
  if (!analyzed_vinfo && aarch64_use_new_vector_costs_p ())
    {
      /* If we're costing the vector code, record whether we're vectorizing
 	 for Advanced SIMD or SVE.  */
      if (!m_costing_for_scalar)
 	m_vec_flags = aarch64_classify_vector_mode (m_vinfo->vector_mode);
      if (loop_vinfo)
-	aarch64_analyze_loop_vinfo (loop_vinfo, this);
+	analyze_loop_vinfo (loop_vinfo);
-      else
+
-	aarch64_analyze_bb_vinfo (bb_vinfo, this);
+      m_analyzed_vinfo = true;
      this->analyzed_vinfo = true;
    }
  /* Try to get a more accurate cost by looking at STMT_INFO instead
@ -15579,7 +15560,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
  if (stmt_info && aarch64_use_new_vector_costs_p ())
    {
      if (vectype && aarch64_sve_only_stmt_p (stmt_info, vectype))
-	this->saw_sve_only_op = true;
+	m_saw_sve_only_op = true;
      /* If we scalarize a strided store, the vectorizer costs one
 	 vec_to_scalar for each element.  However, we can store the first
@ -15587,10 +15568,10 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
      if (vect_is_store_elt_extraction (kind, stmt_info))
 	count -= 1;
-      stmt_cost = aarch64_detect_scalar_stmt_subtype
+      stmt_cost = aarch64_detect_scalar_stmt_subtype (m_vinfo, kind,
-	(m_vinfo, kind, stmt_info, stmt_cost);
+						      stmt_info, stmt_cost);
-      if (vectype && this->vec_flags)
+      if (vectype && m_vec_flags)
 	stmt_cost = aarch64_detect_vector_stmt_subtype (m_vinfo, kind,
 							stmt_info, vectype,
 							where, stmt_cost);
@ -15614,37 +15595,33 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
      auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
      if (loop_vinfo
 	  && issue_info
-	  && this->vec_flags
+	  && m_vec_flags
 	  && where == vect_body
 	  && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
 	  && vectype
 	  && stmt_cost != 0)
 	{
 	  /* Record estimates for the scalar code.  */
-	  aarch64_count_ops (m_vinfo, this, count, kind, stmt_info, vectype,
+	  count_ops (count, kind, stmt_info, vectype, 0, &m_scalar_ops,
-			     0, &this->scalar_ops, issue_info->scalar,
+		     issue_info->scalar, vect_nunits_for_cost (vectype));
 			     vect_nunits_for_cost (vectype));
 	  if (aarch64_sve_mode_p (m_vinfo->vector_mode) && issue_info->sve)
 	    {
 	      /* Record estimates for a possible Advanced SIMD version
 		 of the SVE code.  */
-	      aarch64_count_ops (m_vinfo, this, count, kind, stmt_info,
+	      count_ops (count, kind, stmt_info, vectype, VEC_ADVSIMD,
-				 vectype, VEC_ADVSIMD, &this->advsimd_ops,
+			 &m_advsimd_ops, issue_info->advsimd,
-				 issue_info->advsimd,
+			 aarch64_estimated_sve_vq ());
 				 aarch64_estimated_sve_vq ());
 	      /* Record estimates for the SVE code itself.  */
-	      aarch64_count_ops (m_vinfo, this, count, kind, stmt_info,
+	      count_ops (count, kind, stmt_info, vectype, VEC_ANY_SVE,
-				 vectype, VEC_ANY_SVE, &this->sve_ops,
+			 &m_sve_ops, issue_info->sve, 1);
 				 issue_info->sve, 1);
 	    }
 	  else
 	    /* Record estimates for the Advanced SIMD code.  Treat SVE like
 	       Advanced SIMD if the CPU has no specific SVE costs.  */
-	    aarch64_count_ops (m_vinfo, this, count, kind, stmt_info,
+	    count_ops (count, kind, stmt_info, vectype, VEC_ADVSIMD,
-			       vectype, VEC_ADVSIMD, &this->advsimd_ops,
+		       &m_advsimd_ops, issue_info->advsimd, 1);
 			       issue_info->advsimd, 1);
 	}
      /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
@ -15652,9 +15629,8 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	 loop.  For simplicitly, we assume that one iteration of the
 	 Advanced SIMD loop would need the same number of statements
 	 as one iteration of the SVE loop.  */
-      if (where == vect_body && this->unrolled_advsimd_niters)
+      if (where == vect_body && m_unrolled_advsimd_niters)
-	this->unrolled_advsimd_stmts
+	m_unrolled_advsimd_stmts += count * m_unrolled_advsimd_niters;
 	  += count * this->unrolled_advsimd_niters;
    }
  return record_stmt_cost (stmt_info, where, (count * stmt_cost).ceil ());
 }
@ -15698,32 +15674,28 @@ aarch64_estimate_min_cycles_per_iter
  return cycles;
 }
-/* Subroutine of aarch64_adjust_body_cost for handling SVE.
+/* Subroutine of adjust_body_cost for handling SVE.  Use ISSUE_INFO to work out
-   Use ISSUE_INFO to work out how fast the SVE code can be issued and compare
+   how fast the SVE code can be issued and compare it to the equivalent value
-   it to the equivalent value for scalar code (SCALAR_CYCLES_PER_ITER).
+   for scalar code (SCALAR_CYCLES_PER_ITER).  If COULD_USE_ADVSIMD is true,
-   If COULD_USE_ADVSIMD is true, also compare it to the issue rate of
+   also compare it to the issue rate of Advanced SIMD code
-   Advanced SIMD code (ADVSIMD_CYCLES_PER_ITER).
+   (ADVSIMD_CYCLES_PER_ITER).
-   COSTS is as for aarch64_adjust_body_cost.  ORIG_BODY_COST is the cost
+   ORIG_BODY_COST is the cost originally passed to adjust_body_cost and
-   originally passed to aarch64_adjust_body_cost and *BODY_COST is the current
+   *BODY_COST is the current value of the adjusted cost.  *SHOULD_DISPARAGE
-   value of the adjusted cost.  *SHOULD_DISPARAGE is true if we think the loop
+   is true if we think the loop body is too expensive.  */
   body is too expensive.  */
-static fractional_cost
+fractional_cost
-aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
+aarch64_vector_costs::
-			      const aarch64_vec_issue_info *issue_info,
+adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
-			      fractional_cost scalar_cycles_per_iter,
+		      fractional_cost scalar_cycles_per_iter,
-			      fractional_cost advsimd_cycles_per_iter,
+		      fractional_cost advsimd_cycles_per_iter,
-			      bool could_use_advsimd,
+		      bool could_use_advsimd, unsigned int orig_body_cost,
-			      unsigned int orig_body_cost,
+		      unsigned int *body_cost, bool *should_disparage)
 			      unsigned int *body_cost,
 			      bool *should_disparage)
 {
  /* Estimate the minimum number of cycles per iteration needed to issue
     non-predicate operations.  */
  fractional_cost sve_nonpred_issue_cycles_per_iter
-    = aarch64_estimate_min_cycles_per_iter (&costs->sve_ops,
+    = aarch64_estimate_min_cycles_per_iter (&m_sve_ops, issue_info->sve);
 					    issue_info->sve);
  /* Estimate the minimum number of cycles per iteration needed to rename
     SVE instructions.
@ -15739,9 +15711,9 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
       ??? This value is very much on the pessimistic side, but seems to work
       pretty well in practice.  */
    sve_rename_cycles_per_iter
-      = { costs->sve_ops.general_ops
+      = { m_sve_ops.general_ops
-	  + costs->sve_ops.loads
+	  + m_sve_ops.loads
-	  + costs->sve_ops.pred_ops + 1, 5 };
+	  + m_sve_ops.pred_ops + 1, 5 };
  /* Combine the rename and non-predicate issue limits into a single value.  */
  fractional_cost sve_nonpred_cycles_per_iter
@ -15750,7 +15722,7 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
  /* Separately estimate the minimum number of cycles per iteration needed
     to issue the predicate operations.  */
  fractional_cost sve_pred_issue_cycles_per_iter
-    = { costs->sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle };
+    = { m_sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle };
  /* Calculate the overall limit on the number of cycles per iteration.  */
  fractional_cost sve_cycles_per_iter
@ -15758,15 +15730,15 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
  if (dump_enabled_p ())
    {
-      costs->sve_ops.dump ();
+      m_sve_ops.dump ();
      dump_printf_loc (MSG_NOTE, vect_location,
 		       "  estimated cycles per iteration = %f\n",
 		       sve_cycles_per_iter.as_double ());
-      if (costs->sve_ops.pred_ops)
+      if (m_sve_ops.pred_ops)
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "    predicate issue = %f\n",
 			 sve_pred_issue_cycles_per_iter.as_double ());
-      if (costs->sve_ops.pred_ops || sve_rename_cycles_per_iter)
+      if (m_sve_ops.pred_ops || sve_rename_cycles_per_iter)
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "    non-predicate issue = %f\n",
 			 sve_nonpred_issue_cycles_per_iter.as_double ());
@ -15843,10 +15815,10 @@ aarch64_adjust_body_cost_sve (const aarch64_vector_costs *costs,
  return sve_cycles_per_iter;
 }
-/* BODY_COST is the cost of a vector loop body recorded in COSTS.
+/* BODY_COST is the cost of a vector loop body.  Adjust the cost as necessary
-   Adjust the cost as necessary and return the new cost.  */
+   and return the new cost.  */
-static unsigned int
+unsigned int
-aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
+aarch64_vector_costs::adjust_body_cost (unsigned int body_cost)
 {
  unsigned int orig_body_cost = body_cost;
  bool should_disparage = false;
@ -15855,15 +15827,15 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
    dump_printf_loc (MSG_NOTE, vect_location,
 		     "Original vector body cost = %d\n", body_cost);
-  if (costs->unrolled_advsimd_stmts)
+  if (m_unrolled_advsimd_stmts)
    {
      if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location, "Number of insns in"
 			 " unrolled Advanced SIMD loop = %d\n",
-			 costs->unrolled_advsimd_stmts);
+			 m_unrolled_advsimd_stmts);
      /* Apply the Advanced SIMD vs. SVE unrolling heuristic described above
-	 aarch64_vector_costs::unrolled_advsimd_niters.
+	 m_unrolled_advsimd_niters.
 	 The balance here is tricky.  On the one hand, we can't be sure whether
 	 the code is vectorizable with Advanced SIMD or not.  However, even if
@ -15871,8 +15843,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	 the scalar code could also be unrolled.  Some of the code might then
 	 benefit from SLP, or from using LDP and STP.  We therefore apply
 	 the heuristic regardless of can_use_advsimd_p.  */
-      if (costs->unrolled_advsimd_stmts
+      if (m_unrolled_advsimd_stmts
-	  && (costs->unrolled_advsimd_stmts
+	  && (m_unrolled_advsimd_stmts
 	      <= (unsigned int) param_max_completely_peeled_insns))
 	{
 	  unsigned int estimated_vq = aarch64_estimated_sve_vq ();
@ -15894,28 +15866,28 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
    return body_cost;
  fractional_cost scalar_cycles_per_iter
-    = aarch64_estimate_min_cycles_per_iter (&costs->scalar_ops,
+    = aarch64_estimate_min_cycles_per_iter (&m_scalar_ops,
 					    issue_info->scalar);
  fractional_cost advsimd_cycles_per_iter
-    = aarch64_estimate_min_cycles_per_iter (&costs->advsimd_ops,
+    = aarch64_estimate_min_cycles_per_iter (&m_advsimd_ops,
 					    issue_info->advsimd);
  bool could_use_advsimd
-    = ((costs->vec_flags & VEC_ADVSIMD)
+    = ((m_vec_flags & VEC_ADVSIMD)
       || (aarch64_autovec_preference != 2
 	   && (aarch64_tune_params.extra_tuning_flags
 	       & AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT)
-	   && !costs->saw_sve_only_op));
+	   && !m_saw_sve_only_op));
  if (dump_enabled_p ())
    {
-      if (IN_RANGE (costs->num_vector_iterations, 0, 65536))
+      if (IN_RANGE (m_num_vector_iterations, 0, 65536))
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "Vector loop iterates at most %wd times\n",
-			 costs->num_vector_iterations);
+			 m_num_vector_iterations);
      dump_printf_loc (MSG_NOTE, vect_location, "Scalar issue estimate:\n");
-      costs->scalar_ops.dump ();
+      m_scalar_ops.dump ();
      dump_printf_loc (MSG_NOTE, vect_location,
 		       "  estimated cycles per iteration = %f\n",
 		       scalar_cycles_per_iter.as_double ());
@ -15923,7 +15895,7 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	{
 	  dump_printf_loc (MSG_NOTE, vect_location,
 			   "Advanced SIMD issue estimate:\n");
-	  costs->advsimd_ops.dump ();
+	  m_advsimd_ops.dump ();
 	  dump_printf_loc (MSG_NOTE, vect_location,
 			   "  estimated cycles per iteration = %f\n",
 			   advsimd_cycles_per_iter.as_double ());
@ -15934,19 +15906,17 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
    }
  fractional_cost vector_cycles_per_iter = advsimd_cycles_per_iter;
-  unsigned int vector_reduction_latency = costs->advsimd_ops.reduction_latency;
+  unsigned int vector_reduction_latency = m_advsimd_ops.reduction_latency;
-  if ((costs->vec_flags & VEC_ANY_SVE) && issue_info->sve)
+  if ((m_vec_flags & VEC_ANY_SVE) && issue_info->sve)
    {
      if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location, "SVE issue estimate:\n");
-      vector_reduction_latency = costs->sve_ops.reduction_latency;
+      vector_reduction_latency = m_sve_ops.reduction_latency;
      vector_cycles_per_iter
-	= aarch64_adjust_body_cost_sve (costs, issue_info,
+	= adjust_body_cost_sve (issue_info, scalar_cycles_per_iter,
-					scalar_cycles_per_iter,
+				advsimd_cycles_per_iter, could_use_advsimd,
-					advsimd_cycles_per_iter,
+				orig_body_cost, &body_cost, &should_disparage);
 					could_use_advsimd, orig_body_cost,
 					&body_cost, &should_disparage);
      if (aarch64_tune_params.vec_costs == &neoverse512tvb_vector_cost)
 	{
@ -15956,22 +15926,22 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_NOTE, vect_location,
 			     "Neoverse V1 estimate:\n");
-	  aarch64_adjust_body_cost_sve (costs, &neoversev1_vec_issue_info,
+	  adjust_body_cost_sve (&neoversev1_vec_issue_info,
-					scalar_cycles_per_iter * 2,
+				scalar_cycles_per_iter * 2,
-					advsimd_cycles_per_iter * 2,
+				advsimd_cycles_per_iter * 2,
-					could_use_advsimd, orig_body_cost,
+				could_use_advsimd, orig_body_cost,
-					&body_cost, &should_disparage);
+				&body_cost, &should_disparage);
 	}
    }
  /* Decide whether to stick to latency-based costs or whether to try to
     take issue rates into account.  */
  unsigned int threshold = aarch64_loop_vect_issue_rate_niters;
-  if (costs->vec_flags & VEC_ANY_SVE)
+  if (m_vec_flags & VEC_ANY_SVE)
    threshold = CEIL (threshold, aarch64_estimated_sve_vq ());
-  if (costs->num_vector_iterations >= 1
+  if (m_num_vector_iterations >= 1
-      && costs->num_vector_iterations < threshold)
+      && m_num_vector_iterations < threshold)
    {
      if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
@ -16004,8 +15974,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
     vector code is an improvement, even if adding the other (non-loop-carried)
     latencies tends to hide this saving.  We therefore reduce the cost of the
     vector loop body in proportion to the saving.  */
-  else if (costs->scalar_ops.reduction_latency > vector_reduction_latency
+  else if (m_scalar_ops.reduction_latency > vector_reduction_latency
-	   && costs->scalar_ops.reduction_latency == scalar_cycles_per_iter
+	   && m_scalar_ops.reduction_latency == scalar_cycles_per_iter
 	   && scalar_cycles_per_iter > vector_cycles_per_iter
 	   && !should_disparage)
    {
@ -16023,10 +15993,11 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 void
 aarch64_vector_costs::finish_cost ()
 {
-  if (this->is_loop
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
-      && this->vec_flags
+  if (loop_vinfo
      && m_vec_flags
      && aarch64_use_new_vector_costs_p ())
-    m_costs[vect_body] = aarch64_adjust_body_cost (this, m_costs[vect_body]);
+    m_costs[vect_body] = adjust_body_cost (m_costs[vect_body]);
  vector_costs::finish_cost ();
 }