[vect] Add main vectorized loop unrolling
gcc/ChangeLog: * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Pass new argument suggested_unroll_factor. (vect_analyze_loop_costing): Likewise. (_loop_vec_info::_loop_vec_info): Initialize new member suggested_unroll_factor. (vect_determine_partial_vectors_and_peeling): Make epilogue of unrolled main loop use partial vectors. (vect_analyze_loop_2): Pass and use new argument suggested_unroll_factor. (vect_analyze_loop_1): Change to intialize local suggested_unroll_factor and use it. (vectorizable_reduction): Don't use single_defuse_cycle when unrolling. * tree-vectorizer.h (_loop_vec_info::_loop_vec_info): Add new member suggested_unroll_factor. (vector_costs::vector_costs): Add new member m_suggested_unroll_factor. (vector_costs::suggested_unroll_factor): New getter function. (finish_cost): Set return argument suggested_unroll_factor.
This commit is contained in:
parent
254ada46ae
commit
7ca1582ca6
@ -154,7 +154,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
http://gcc.gnu.org/projects/tree-ssa/vectorization.html
|
||||
*/
|
||||
|
||||
static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
|
||||
static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
|
||||
unsigned *);
|
||||
static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
|
||||
bool *, bool *);
|
||||
|
||||
@ -831,6 +832,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
|
||||
skip_main_loop_edge (nullptr),
|
||||
skip_this_loop_edge (nullptr),
|
||||
reusable_accumulators (),
|
||||
suggested_unroll_factor (1),
|
||||
max_vectorization_factor (0),
|
||||
mask_skip_niters (NULL_TREE),
|
||||
rgroup_compare_type (NULL_TREE),
|
||||
@ -1834,7 +1836,8 @@ vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
|
||||
definitely no, or -1 if it's worth retrying. */
|
||||
|
||||
static int
|
||||
vect_analyze_loop_costing (loop_vec_info loop_vinfo)
|
||||
vect_analyze_loop_costing (loop_vec_info loop_vinfo,
|
||||
unsigned *suggested_unroll_factor)
|
||||
{
|
||||
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
|
||||
@ -1868,7 +1871,8 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
|
||||
|
||||
int min_profitable_iters, min_profitable_estimate;
|
||||
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
|
||||
&min_profitable_estimate);
|
||||
&min_profitable_estimate,
|
||||
suggested_unroll_factor);
|
||||
|
||||
if (min_profitable_iters < 0)
|
||||
{
|
||||
@ -2152,10 +2156,16 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
|
||||
vectors to the epilogue, with the main loop continuing to operate
|
||||
on full vectors.
|
||||
|
||||
If we are unrolling we also do not want to use partial vectors. This
|
||||
is to avoid the overhead of generating multiple masks and also to
|
||||
avoid having to execute entire iterations of FALSE masked instructions
|
||||
when dealing with one or less full iterations.
|
||||
|
||||
??? We could then end up failing to use partial vectors if we
|
||||
decide to peel iterations into a prologue, and if the main loop
|
||||
then ends up processing fewer than VF iterations. */
|
||||
if (param_vect_partial_vector_usage == 1
|
||||
if ((param_vect_partial_vector_usage == 1
|
||||
|| loop_vinfo->suggested_unroll_factor > 1)
|
||||
&& !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
|
||||
&& !vect_known_niters_smaller_than_vf (loop_vinfo))
|
||||
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
|
||||
@ -2222,7 +2232,8 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
|
||||
for it. The different analyses will record information in the
|
||||
loop_vec_info struct. */
|
||||
static opt_result
|
||||
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
|
||||
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
|
||||
unsigned *suggested_unroll_factor)
|
||||
{
|
||||
opt_result ok = opt_result::success ();
|
||||
int res;
|
||||
@ -2382,6 +2393,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
|
||||
set of rgroups. */
|
||||
gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
|
||||
|
||||
/* Apply the suggested unrolling factor, this was determined by the backend
|
||||
during finish_cost the first time we ran the analyzis for this
|
||||
vector mode. */
|
||||
if (loop_vinfo->suggested_unroll_factor > 1)
|
||||
LOOP_VINFO_VECT_FACTOR (loop_vinfo) *= loop_vinfo->suggested_unroll_factor;
|
||||
|
||||
/* This is the point where we can re-start analysis with SLP forced off. */
|
||||
start_over:
|
||||
|
||||
@ -2573,7 +2590,7 @@ start_over:
|
||||
return ok;
|
||||
|
||||
/* Check the costings of the loop make vectorizing worthwhile. */
|
||||
res = vect_analyze_loop_costing (loop_vinfo);
|
||||
res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor);
|
||||
if (res < 0)
|
||||
{
|
||||
ok = opt_result::failure_at (vect_location,
|
||||
@ -2851,15 +2868,38 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
|
||||
|
||||
machine_mode vector_mode = vector_modes[mode_i];
|
||||
loop_vinfo->vector_mode = vector_mode;
|
||||
unsigned int suggested_unroll_factor = 1;
|
||||
|
||||
/* Run the main analysis. */
|
||||
opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal);
|
||||
opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
|
||||
&suggested_unroll_factor);
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"***** Analysis %s with vector mode %s\n",
|
||||
res ? "succeeded" : " failed",
|
||||
GET_MODE_NAME (loop_vinfo->vector_mode));
|
||||
|
||||
if (!main_loop_vinfo && suggested_unroll_factor > 1)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"***** Re-trying analysis for unrolling"
|
||||
" with unroll factor %d.\n",
|
||||
suggested_unroll_factor);
|
||||
loop_vec_info unroll_vinfo
|
||||
= vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
|
||||
unroll_vinfo->vector_mode = vector_mode;
|
||||
unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
|
||||
opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL);
|
||||
if (new_res)
|
||||
{
|
||||
delete loop_vinfo;
|
||||
loop_vinfo = unroll_vinfo;
|
||||
}
|
||||
else
|
||||
delete unroll_vinfo;
|
||||
}
|
||||
|
||||
/* Remember the autodetected vector mode. */
|
||||
if (vector_mode == VOIDmode)
|
||||
autodetected_vector_mode = loop_vinfo->vector_mode;
|
||||
@ -3860,7 +3900,8 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
|
||||
static void
|
||||
vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
|
||||
int *ret_min_profitable_niters,
|
||||
int *ret_min_profitable_estimate)
|
||||
int *ret_min_profitable_estimate,
|
||||
unsigned *suggested_unroll_factor)
|
||||
{
|
||||
int min_profitable_iters;
|
||||
int min_profitable_estimate;
|
||||
@ -4227,7 +4268,22 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
|
||||
|
||||
/* Complete the target-specific cost calculations. */
|
||||
finish_cost (loop_vinfo->vector_costs, loop_vinfo->scalar_costs,
|
||||
&vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost);
|
||||
&vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost,
|
||||
suggested_unroll_factor);
|
||||
|
||||
if (suggested_unroll_factor && *suggested_unroll_factor > 1
|
||||
&& LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) != MAX_VECTORIZATION_FACTOR
|
||||
&& !known_le (LOOP_VINFO_VECT_FACTOR (loop_vinfo) *
|
||||
*suggested_unroll_factor,
|
||||
LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"can't unroll as unrolled vectorization factor larger"
|
||||
" than maximum vectorization factor: %d\n",
|
||||
LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo));
|
||||
*suggested_unroll_factor = 1;
|
||||
}
|
||||
|
||||
vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
|
||||
|
||||
@ -7194,10 +7250,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
|
||||
|
||||
This only works when we see both the reduction PHI and its only consumer
|
||||
in vectorizable_reduction and there are no intermediate stmts
|
||||
participating. */
|
||||
participating. When unrolling we want each unrolled iteration to have its
|
||||
own reduction accumulator since one of the main goals of unrolling a
|
||||
reduction is to reduce the aggregate loop-carried latency. */
|
||||
if (ncopies > 1
|
||||
&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
|
||||
&& reduc_chain_length == 1)
|
||||
&& reduc_chain_length == 1
|
||||
&& loop_vinfo->suggested_unroll_factor == 1)
|
||||
single_defuse_cycle = true;
|
||||
|
||||
if (single_defuse_cycle || lane_reduc_code_p)
|
||||
|
@ -642,6 +642,13 @@ public:
|
||||
about the reductions that generated them. */
|
||||
hash_map<tree, vect_reusable_accumulator> reusable_accumulators;
|
||||
|
||||
/* The number of times that the target suggested we unroll the vector loop
|
||||
in order to promote more ILP. This value will be used to re-analyze the
|
||||
loop for vectorization and if successful the value will be folded into
|
||||
vectorization_factor (and therefore exactly divides
|
||||
vectorization_factor). */
|
||||
unsigned int suggested_unroll_factor;
|
||||
|
||||
/* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
|
||||
if there is no particular limit. */
|
||||
unsigned HOST_WIDE_INT max_vectorization_factor;
|
||||
@ -1465,6 +1472,7 @@ public:
|
||||
unsigned int epilogue_cost () const;
|
||||
unsigned int outside_cost () const;
|
||||
unsigned int total_cost () const;
|
||||
unsigned int suggested_unroll_factor () const;
|
||||
|
||||
protected:
|
||||
unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
|
||||
@ -1484,6 +1492,9 @@ protected:
|
||||
/* The costs of the three regions, indexed by vect_cost_model_location. */
|
||||
unsigned int m_costs[3];
|
||||
|
||||
/* The suggested unrolling factor determined at finish_cost. */
|
||||
unsigned int m_suggested_unroll_factor;
|
||||
|
||||
/* True if finish_cost has been called. */
|
||||
bool m_finished;
|
||||
};
|
||||
@ -1496,6 +1507,7 @@ vector_costs::vector_costs (vec_info *vinfo, bool costing_for_scalar)
|
||||
: m_vinfo (vinfo),
|
||||
m_costing_for_scalar (costing_for_scalar),
|
||||
m_costs (),
|
||||
m_suggested_unroll_factor(1),
|
||||
m_finished (false)
|
||||
{
|
||||
}
|
||||
@ -1544,6 +1556,15 @@ vector_costs::total_cost () const
|
||||
return body_cost () + outside_cost ();
|
||||
}
|
||||
|
||||
/* Return the suggested unroll factor. */
|
||||
|
||||
inline unsigned int
|
||||
vector_costs::suggested_unroll_factor () const
|
||||
{
|
||||
gcc_checking_assert (m_finished);
|
||||
return m_suggested_unroll_factor;
|
||||
}
|
||||
|
||||
#define VECT_MAX_COST 1000
|
||||
|
||||
/* The maximum number of intermediate steps required in multi-step type
|
||||
@ -1720,12 +1741,14 @@ add_stmt_cost (vector_costs *costs, stmt_info_for_cost *i)
|
||||
static inline void
|
||||
finish_cost (vector_costs *costs, const vector_costs *scalar_costs,
|
||||
unsigned *prologue_cost, unsigned *body_cost,
|
||||
unsigned *epilogue_cost)
|
||||
unsigned *epilogue_cost, unsigned *suggested_unroll_factor = NULL)
|
||||
{
|
||||
costs->finish_cost (scalar_costs);
|
||||
*prologue_cost = costs->prologue_cost ();
|
||||
*body_cost = costs->body_cost ();
|
||||
*epilogue_cost = costs->epilogue_cost ();
|
||||
if (suggested_unroll_factor)
|
||||
*suggested_unroll_factor = costs->suggested_unroll_factor ();
|
||||
}
|
||||
|
||||
inline void
|
||||
|
Loading…
x
Reference in New Issue
Block a user