Add support for vector permute cost since various permutes can expand into a complex sequence of instructions.

Add support for vector permute cost since various permutes can expand
into a complex sequence of instructions.  This fixes major performance
regressions due to recent changes in SLP vectorizer (which now vectorizes
more aggressively and emits many complex permutes).  Set the cost to > 1
for all microarchitectures so that the number of permutes is usually zero
and regressions disappear.

2016-01-28  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/aarch64/aarch64.c (generic_vector_cost):
	Set vec_permute_cost.
	(cortexa57_vector_cost): Likewise.
	(exynosm1_vector_cost): Likewise.
	(xgene1_vector_cost): Likewise.
	(aarch64_builtin_vectorization_cost): Use vec_permute_cost.
	* config/aarch64/aarch64-protos.h (cpu_vector_cost):
	Add vec_permute_cost entry.

From-SVN: r232922
This commit is contained in:
Wilco Dijkstra 2016-01-28 11:52:08 +00:00 committed by Wilco Dijkstra
parent e2b691c420
commit c428f91cb7
3 changed files with 19 additions and 1 deletions

View File

@ -1,3 +1,14 @@
2016-01-28 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (generic_vector_cost):
Set vec_permute_cost.
(cortexa57_vector_cost): Likewise.
(exynosm1_vector_cost): Likewise.
(xgene1_vector_cost): Likewise.
(aarch64_builtin_vectorization_cost): Use vec_permute_cost.
* config/aarch64/aarch64-protos.h (cpu_vector_cost):
Add vec_permute_cost entry.
2016-01-28 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.md (ccmp<mode>): Disassemble

View File

@ -156,9 +156,10 @@ struct cpu_vector_cost
const int scalar_load_cost; /* Cost of scalar load. */
const int scalar_store_cost; /* Cost of scalar store. */
const int vec_stmt_cost; /* Cost of any vector operation,
excluding load, store,
excluding load, store, permute,
vector-to-scalar and
scalar-to-vector operation. */
const int vec_permute_cost; /* Cost of permute operation. */
const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */
const int scalar_to_vec_cost; /* Cost of scalar-to-vector
operation. */

View File

@ -315,6 +315,7 @@ static const struct cpu_vector_cost generic_vector_cost =
1, /* scalar_load_cost */
1, /* scalar_store_cost */
1, /* vec_stmt_cost */
2, /* vec_permute_cost */
1, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* vec_align_load_cost */
@ -332,6 +333,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
4, /* scalar_load_cost */
1, /* scalar_store_cost */
3, /* vec_stmt_cost */
3, /* vec_permute_cost */
8, /* vec_to_scalar_cost */
8, /* scalar_to_vec_cost */
5, /* vec_align_load_cost */
@ -348,6 +350,7 @@ static const struct cpu_vector_cost exynosm1_vector_cost =
5, /* scalar_load_cost */
1, /* scalar_store_cost */
3, /* vec_stmt_cost */
3, /* vec_permute_cost */
3, /* vec_to_scalar_cost */
3, /* scalar_to_vec_cost */
5, /* vec_align_load_cost */
@ -365,6 +368,7 @@ static const struct cpu_vector_cost xgene1_vector_cost =
5, /* scalar_load_cost */
1, /* scalar_store_cost */
2, /* vec_stmt_cost */
2, /* vec_permute_cost */
4, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
10, /* vec_align_load_cost */
@ -7574,6 +7578,8 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
case vec_perm:
return aarch64_tune_params.vec_costs->vec_permute_cost;
case vec_promote_demote:
return aarch64_tune_params.vec_costs->vec_stmt_cost;