aarch64: Revert bogus fix for PR105254
In f2ebf2d98e
I'd forced the
chosen unroll factor to be a factor of the VF, in order to
work around an exact_div ICE in PR105254. This was completely
bogus -- clearly I didn't look in enough detail at why we ended
up with an unrolled VF that wasn't a multiple of the UF.
Kewen has since fixed the bug properly for PR105940, so this
patch reverts my earlier attempt. Sorry for the stupidity.
gcc/
PR tree-optimization/105254
PR tree-optimization/105940
Revert:
* config/aarch64/aarch64.cc
(aarch64_vector_costs::determine_suggested_unroll_factor): Take a
loop_vec_info as argument. Restrict the unroll factor to values
that divide the VF.
(aarch64_vector_costs::finish_cost): Update call accordingly.
gcc/testsuite/
* gcc.target/aarch64/sve/cost_model_14.c: New test.
This commit is contained in:
parent
183a4f3829
commit
2636660b6f
|
@ -15631,7 +15631,7 @@ private:
|
|||
unsigned int adjust_body_cost (loop_vec_info, const aarch64_vector_costs *,
|
||||
unsigned int);
|
||||
bool prefer_unrolled_loop () const;
|
||||
unsigned int determine_suggested_unroll_factor (loop_vec_info);
|
||||
unsigned int determine_suggested_unroll_factor ();
|
||||
|
||||
/* True if we have performed one-time initialization based on the
|
||||
vec_info. */
|
||||
|
@ -16740,8 +16740,7 @@ adjust_body_cost_sve (const aarch64_vec_op_count *ops,
|
|||
}
|
||||
|
||||
unsigned int
|
||||
aarch64_vector_costs::
|
||||
determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
|
||||
aarch64_vector_costs::determine_suggested_unroll_factor ()
|
||||
{
|
||||
bool sve = m_vec_flags & VEC_ANY_SVE;
|
||||
/* If we are trying to unroll an Advanced SIMD main loop that contains
|
||||
|
@ -16755,7 +16754,6 @@ determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
|
|||
return 1;
|
||||
|
||||
unsigned int max_unroll_factor = 1;
|
||||
auto vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
for (auto vec_ops : m_ops)
|
||||
{
|
||||
aarch64_simd_vec_issue_info const *vec_issue
|
||||
|
@ -16764,8 +16762,7 @@ determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
|
|||
return 1;
|
||||
/* Limit unroll factor to a value adjustable by the user, the default
|
||||
value is 4. */
|
||||
unsigned int unroll_factor = MIN (aarch64_vect_unroll_limit,
|
||||
(int) known_alignment (vf));
|
||||
unsigned int unroll_factor = aarch64_vect_unroll_limit;
|
||||
unsigned int factor
|
||||
= vec_ops.reduction_latency > 1 ? vec_ops.reduction_latency : 1;
|
||||
unsigned int temp;
|
||||
|
@ -16943,8 +16940,7 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs)
|
|||
{
|
||||
m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs,
|
||||
m_costs[vect_body]);
|
||||
m_suggested_unroll_factor
|
||||
= determine_suggested_unroll_factor (loop_vinfo);
|
||||
m_suggested_unroll_factor = determine_suggested_unroll_factor ();
|
||||
}
|
||||
|
||||
/* Apply the heuristic described above m_stp_sequence_cost. Prefer
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
/* { dg-options "-O3 -mtune=neoverse-v1" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint64_t f2(uint64_t *ptr, int n) {
|
||||
uint64_t res = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
res += ptr[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */
|
Loading…
Reference in New Issue