Add prefetch configuration to aarch64 backend.
* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune): New tune structure. (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size. [Unrelated to main purpose of the patch] Place the pointer field last to enable type checking errors when tune structure are wrongly merged. * config/aarch64/aarch64.c (generic_prefetch_tune,) (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,) (thunderx2t99_prefetch_tune): New tune constants. (tune_params *_tunings): Update all tunings (no functional change). (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES, PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE from tunings structures. Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94 From-SVN: r249240
This commit is contained in:
parent
b783399af9
commit
9d2c6e2eb7
@ -1,3 +1,18 @@
|
||||
2017-06-16 Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
|
||||
|
||||
* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
|
||||
New tune structure.
|
||||
(struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
|
||||
[Unrelated to main purpose of the patch] Place the pointer field last
|
||||
to enable type checking errors when tune structure are wrongly merged.
|
||||
* config/aarch64/aarch64.c (generic_prefetch_tune,)
|
||||
(exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
|
||||
(thunderx2t99_prefetch_tune): New tune constants.
|
||||
(tune_params *_tunings): Update all tunings (no functional change).
|
||||
(aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
|
||||
PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
|
||||
from tunings structures.
|
||||
|
||||
2017-06-16 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR sanitizer/81094
|
||||
|
@ -203,6 +203,15 @@ struct cpu_approx_modes
|
||||
const unsigned int recip_sqrt; /* Reciprocal square root. */
|
||||
};
|
||||
|
||||
/* Cache prefetch settings for prefetch-loop-arrays. */
|
||||
struct cpu_prefetch_tune
|
||||
{
|
||||
const int num_slots;
|
||||
const int l1_cache_size;
|
||||
const int l1_cache_line_size;
|
||||
const int l2_cache_size;
|
||||
};
|
||||
|
||||
struct tune_params
|
||||
{
|
||||
const struct cpu_cost_table *insn_extra_cost;
|
||||
@ -224,9 +233,6 @@ struct tune_params
|
||||
int min_div_recip_mul_df;
|
||||
/* Value for aarch64_case_values_threshold; or 0 for the default. */
|
||||
unsigned int max_case_values;
|
||||
/* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default. */
|
||||
unsigned int cache_line_size;
|
||||
|
||||
/* An enum specifying how to take into account CPU autoprefetch capabilities
|
||||
during instruction scheduling:
|
||||
- AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
|
||||
@ -244,6 +250,10 @@ struct tune_params
|
||||
} autoprefetcher_model;
|
||||
|
||||
unsigned int extra_tuning_flags;
|
||||
|
||||
/* Place prefetch struct pointer at the end to enable type checking
|
||||
errors when tune_params misses elements (e.g., from erroneous merges). */
|
||||
const struct cpu_prefetch_tune *prefetch;
|
||||
};
|
||||
|
||||
#define AARCH64_FUSION_PAIR(x, name) \
|
||||
|
@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes =
|
||||
AARCH64_APPROX_ALL /* recip_sqrt */
|
||||
};
|
||||
|
||||
/* Generic prefetch settings (which disable prefetch). */
|
||||
static const cpu_prefetch_tune generic_prefetch_tune =
|
||||
{
|
||||
0, /* num_slots */
|
||||
-1, /* l1_cache_size */
|
||||
-1, /* l1_cache_line_size */
|
||||
-1 /* l2_cache_size */
|
||||
};
|
||||
|
||||
static const cpu_prefetch_tune exynosm1_prefetch_tune =
|
||||
{
|
||||
0, /* num_slots */
|
||||
-1, /* l1_cache_size */
|
||||
64, /* l1_cache_line_size */
|
||||
-1 /* l2_cache_size */
|
||||
};
|
||||
|
||||
static const cpu_prefetch_tune qdf24xx_prefetch_tune =
|
||||
{
|
||||
0, /* num_slots */
|
||||
-1, /* l1_cache_size */
|
||||
64, /* l1_cache_line_size */
|
||||
-1 /* l2_cache_size */
|
||||
};
|
||||
|
||||
static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
|
||||
{
|
||||
0, /* num_slots */
|
||||
-1, /* l1_cache_size */
|
||||
64, /* l1_cache_line_size */
|
||||
-1 /* l2_cache_size */
|
||||
};
|
||||
|
||||
static const struct tune_params generic_tunings =
|
||||
{
|
||||
&cortexa57_extra_costs,
|
||||
@ -546,9 +579,9 @@ static const struct tune_params generic_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa35_tunings =
|
||||
@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa53_tunings =
|
||||
@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa57_tunings =
|
||||
@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa72_tunings =
|
||||
@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa73_tunings =
|
||||
@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const struct tune_params exynosm1_tunings =
|
||||
{
|
||||
&exynosm1_extra_costs,
|
||||
@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
48, /* max_case_values. */
|
||||
64, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&exynosm1_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params thunderx_tunings =
|
||||
@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params xgene1_tunings =
|
||||
@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params qdf24xx_tunings =
|
||||
@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
64, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&qdf24xx_prefetch_tune
|
||||
};
|
||||
|
||||
static const struct tune_params thunderx2t99_tunings =
|
||||
@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings =
|
||||
2, /* min_div_recip_mul_sf. */
|
||||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
64, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&thunderx2t99_prefetch_tune
|
||||
};
|
||||
|
||||
/* Support for fine-grained override of the tuning structures. */
|
||||
@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||||
opts->x_param_values,
|
||||
global_options_set.x_param_values);
|
||||
|
||||
/* Set the L1 cache line size. */
|
||||
if (selected_cpu->tune->cache_line_size != 0)
|
||||
/* Set up parameters to be used in prefetching algorithm. Do not
|
||||
override the defaults unless we are tuning for a core we have
|
||||
researched values for. */
|
||||
if (aarch64_tune_params.prefetch->num_slots > 0)
|
||||
maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
|
||||
aarch64_tune_params.prefetch->num_slots,
|
||||
opts->x_param_values,
|
||||
global_options_set.x_param_values);
|
||||
if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
|
||||
maybe_set_param_value (PARAM_L1_CACHE_SIZE,
|
||||
aarch64_tune_params.prefetch->l1_cache_size,
|
||||
opts->x_param_values,
|
||||
global_options_set.x_param_values);
|
||||
if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
|
||||
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
|
||||
selected_cpu->tune->cache_line_size,
|
||||
aarch64_tune_params.prefetch->l1_cache_line_size,
|
||||
opts->x_param_values,
|
||||
global_options_set.x_param_values);
|
||||
if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
|
||||
maybe_set_param_value (PARAM_L2_CACHE_SIZE,
|
||||
aarch64_tune_params.prefetch->l2_cache_size,
|
||||
opts->x_param_values,
|
||||
global_options_set.x_param_values);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user