From c5126ce8cae4f14194414e266be91fdc4b756807 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 3 Jan 2018 07:14:24 +0000 Subject: [PATCH] poly_int: vect_nunits_for_cost This patch adds a function for getting the number of elements in a vector for cost purposes, which is always constant. It makes it possible for a later patch to change GET_MODE_NUNITS and TYPE_VECTOR_SUBPARTS to a poly_int. 2018-01-03 Richard Sandiford Alan Hayward David Sherwood gcc/ * tree-vectorizer.h (vect_nunits_for_cost): New function. * tree-vect-loop.c (vect_model_reduction_cost): Use it. * tree-vect-slp.c (vect_analyze_slp_cost_1): Likewise. (vect_analyze_slp_cost): Likewise. * tree-vect-stmts.c (vect_model_store_cost): Likewise. (vect_model_load_cost): Likewise. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r256128 --- gcc/ChangeLog | 11 +++++++++++ gcc/tree-vect-loop.c | 8 +++++--- gcc/tree-vect-slp.c | 20 +++++++++++--------- gcc/tree-vect-stmts.c | 24 ++++++++++++++++-------- gcc/tree-vectorizer.h | 10 ++++++++++ 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 45579c740bc..52a10ad2305 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2018-01-03 Richard Sandiford + Alan Hayward + David Sherwood + + * tree-vectorizer.h (vect_nunits_for_cost): New function. + * tree-vect-loop.c (vect_model_reduction_cost): Use it. + * tree-vect-slp.c (vect_analyze_slp_cost_1): Likewise. + (vect_analyze_slp_cost): Likewise. + * tree-vect-stmts.c (vect_model_store_cost): Likewise. + (vect_model_load_cost): Likewise. + 2018-01-03 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 4c5729796ef..c58a08d8d38 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3854,13 +3854,15 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, } else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) { - unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned estimated_nunits = vect_nunits_for_cost (vectype); /* Extraction of scalar elements. */ - epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits, + epilogue_cost += add_stmt_cost (target_cost_data, + 2 * estimated_nunits, vec_to_scalar, stmt_info, 0, vect_epilogue); /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ - epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3, + epilogue_cost += add_stmt_cost (target_cost_data, + 2 * estimated_nunits - 3, scalar_stmt, stmt_info, 0, vect_epilogue); } diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 26e7d652f44..d41056932da 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1730,8 +1730,8 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, &n_perms); record_stmt_cost (body_cost_vec, n_perms, vec_perm, stmt_info, 0, vect_body); - unsigned nunits - = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); + unsigned assumed_nunits + = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info)); /* And adjust the number of loads performed. This handles redundancies as well as loads that are later dead. */ auto_sbitmap perm (GROUP_SIZE (stmt_info)); @@ -1742,7 +1742,7 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, bool load_seen = false; for (i = 0; i < GROUP_SIZE (stmt_info); ++i) { - if (i % nunits == 0) + if (i % assumed_nunits == 0) { if (load_seen) ncopies_for_cost++; @@ -1755,7 +1755,7 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, ncopies_for_cost++; gcc_assert (ncopies_for_cost <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info) - + nunits - 1) / nunits); + + assumed_nunits - 1) / assumed_nunits); poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance); ncopies_for_cost *= estimated_poly_value (uf); } @@ -1868,9 +1868,9 @@ vect_analyze_slp_cost (slp_instance instance, void *data) assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info)); else assumed_vf = 1; - unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); /* For reductions look at a reduction operand in case the reduction operation is widening like DOT_PROD or SAD. */ + tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info); if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) { gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -1878,14 +1878,16 @@ vect_analyze_slp_cost (slp_instance instance, void *data) { case DOT_PROD_EXPR: case SAD_EXPR: - nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type - (TREE_TYPE (gimple_assign_rhs1 (stmt)))); + vectype_for_cost = get_vectype_for_scalar_type + (TREE_TYPE (gimple_assign_rhs1 (stmt))); break; default:; } } - ncopies_for_cost = least_common_multiple (nunits, - group_size * assumed_vf) / nunits; + unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost); + ncopies_for_cost = (least_common_multiple (assumed_nunits, + group_size * assumed_vf) + / assumed_nunits); prologue_cost_vec.create (10); body_cost_vec.create (10); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index bae72d09854..6ca3a16c4fc 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -958,18 +958,25 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, /* Costs of the stores. */ if (memory_access_type == VMAT_ELEMENTWISE || memory_access_type == VMAT_GATHER_SCATTER) - /* N scalar stores plus extracting the elements. */ - inside_cost += record_stmt_cost (body_cost_vec, - ncopies * TYPE_VECTOR_SUBPARTS (vectype), - scalar_store, stmt_info, 0, vect_body); + { + /* N scalar stores plus extracting the elements. */ + unsigned int assumed_nunits = vect_nunits_for_cost (vectype); + inside_cost += record_stmt_cost (body_cost_vec, + ncopies * assumed_nunits, + scalar_store, stmt_info, 0, vect_body); + } else vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec); if (memory_access_type == VMAT_ELEMENTWISE || memory_access_type == VMAT_STRIDED_SLP) - inside_cost += record_stmt_cost (body_cost_vec, - ncopies * TYPE_VECTOR_SUBPARTS (vectype), - vec_to_scalar, stmt_info, 0, vect_body); + { + /* N scalar stores plus extracting the elements. */ + unsigned int assumed_nunits = vect_nunits_for_cost (vectype); + inside_cost += record_stmt_cost (body_cost_vec, + ncopies * assumed_nunits, + vec_to_scalar, stmt_info, 0, vect_body); + } if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1089,8 +1096,9 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, { /* N scalar loads plus gathering them into a vector. */ tree vectype = STMT_VINFO_VECTYPE (stmt_info); + unsigned int assumed_nunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (body_cost_vec, - ncopies * TYPE_VECTOR_SUBPARTS (vectype), + ncopies * assumed_nunits, scalar_load, stmt_info, 0, vect_body); } else diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9619286ed34..f6938e4f60f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1154,6 +1154,16 @@ vect_vf_for_cost (loop_vec_info loop_vinfo) return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); } +/* Estimate the number of elements in VEC_TYPE for costing purposes. + Pick a reasonable estimate if the exact number isn't known at + compile time. */ + +static inline unsigned int +vect_nunits_for_cost (tree vec_type) +{ + return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); +} + /* Return the size of the value accessed by unvectorized data reference DR. This is only valid once STMT_VINFO_VECTYPE has been calculated for the associated gimple statement, since that guarantees that DR accesses