Add prefetch configuration to aarch64 backend.

* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
	New tune structure.
	(struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
	[Unrelated to main purpose of the patch] Place the pointer field last
	to enable type checking errors when tune structure are wrongly merged.
	* config/aarch64/aarch64.c (generic_prefetch_tune,)
	(exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
	(thunderx2t99_prefetch_tune): New tune constants.
	(tune_params *_tunings): Update all tunings (no functional change).
	(aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
	PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
	from tunings structures.

Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94

From-SVN: r249240
This commit is contained in:
Maxim Kuvyrkov 2017-06-16 09:30:43 +00:00 committed by Maxim Kuvyrkov
parent b783399af9
commit 9d2c6e2eb7
3 changed files with 105 additions and 28 deletions

View File

@ -1,3 +1,18 @@
2017-06-16 Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
New tune structure.
(struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
[Unrelated to main purpose of the patch] Place the pointer field last
to enable type checking errors when tune structure are wrongly merged.
* config/aarch64/aarch64.c (generic_prefetch_tune,)
(exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
(thunderx2t99_prefetch_tune): New tune constants.
(tune_params *_tunings): Update all tunings (no functional change).
(aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
from tunings structures.
2017-06-16 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/81094

View File

@ -203,6 +203,15 @@ struct cpu_approx_modes
const unsigned int recip_sqrt; /* Reciprocal square root. */
};
/* Cache prefetch settings for prefetch-loop-arrays. */
struct cpu_prefetch_tune
{
const int num_slots;
const int l1_cache_size;
const int l1_cache_line_size;
const int l2_cache_size;
};
struct tune_params
{
const struct cpu_cost_table *insn_extra_cost;
@ -224,9 +233,6 @@ struct tune_params
int min_div_recip_mul_df;
/* Value for aarch64_case_values_threshold; or 0 for the default. */
unsigned int max_case_values;
/* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default. */
unsigned int cache_line_size;
/* An enum specifying how to take into account CPU autoprefetch capabilities
during instruction scheduling:
- AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
@ -244,6 +250,10 @@ struct tune_params
} autoprefetcher_model;
unsigned int extra_tuning_flags;
/* Place prefetch struct pointer at the end to enable type checking
errors when tune_params misses elements (e.g., from erroneous merges). */
const struct cpu_prefetch_tune *prefetch;
};
#define AARCH64_FUSION_PAIR(x, name) \

View File

@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes =
AARCH64_APPROX_ALL /* recip_sqrt */
};
/* Generic prefetch settings (which disable prefetch). */
static const cpu_prefetch_tune generic_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
-1, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const cpu_prefetch_tune exynosm1_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const cpu_prefetch_tune qdf24xx_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
@ -546,9 +579,9 @@ static const struct tune_params generic_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params cortexa35_tunings =
@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params cortexa53_tunings =
@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params cortexa57_tunings =
@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params cortexa72_tunings =
@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params cortexa73_tunings =
@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params exynosm1_tunings =
{
&exynosm1_extra_costs,
@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
48, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&exynosm1_prefetch_tune
};
static const struct tune_params thunderx_tunings =
@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params xgene1_tunings =
@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
};
static const struct tune_params qdf24xx_tunings =
@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&qdf24xx_prefetch_tune
};
static const struct tune_params thunderx2t99_tunings =
@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&thunderx2t99_prefetch_tune
};
/* Support for fine-grained override of the tuning structures. */
@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
opts->x_param_values,
global_options_set.x_param_values);
/* Set the L1 cache line size. */
if (selected_cpu->tune->cache_line_size != 0)
/* Set up parameters to be used in prefetching algorithm. Do not
override the defaults unless we are tuning for a core we have
researched values for. */
if (aarch64_tune_params.prefetch->num_slots > 0)
maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
aarch64_tune_params.prefetch->num_slots,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_SIZE,
aarch64_tune_params.prefetch->l1_cache_size,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
selected_cpu->tune->cache_line_size,
aarch64_tune_params.prefetch->l1_cache_line_size,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
maybe_set_param_value (PARAM_L2_CACHE_SIZE,
aarch64_tune_params.prefetch->l2_cache_size,
opts->x_param_values,
global_options_set.x_param_values);