From 005e72b9ddb6b2d9d954b7a48c3d3c28972fff4d Mon Sep 17 00:00:00 2001 From: Luis Machado Date: Mon, 7 May 2018 14:12:54 +0000 Subject: [PATCH] Introduce prefetch-dynamic-strides option. The following patch adds an option to control software prefetching of memory references with non-constant/unknown strides. Currently we prefetch these references if the pass thinks there is benefit to doing so. But, since this is all based on heuristics, it's not always the case that we end up with better performance. For Falkor there is also the problem of conflicts with the hardware prefetcher, so we need to be more conservative in terms of what we issue software prefetch hints for. This also aligns GCC with what LLVM does for Falkor. Similarly to the previous patch, the defaults guarantee no change in behavior for other targets and architectures. 2018-05-07 Luis Machado gcc/ * config/aarch64/aarch64-protos.h (cpu_prefetch_tune) : New const bool field. * config/aarch64/aarch64.c (generic_prefetch_tune): Update to include prefetch_dynamic_strides. (exynosm1_prefetch_tune): Likewise. (thunderxt88_prefetch_tune): Likewise. (thunderx_prefetch_tune): Likewise. (thunderx2t99_prefetch_tune): Likewise. (qdf24xx_prefetch_tune): Likewise. Set prefetch_dynamic_strides to false. (aarch64_override_options_internal): Update to set PARAM_PREFETCH_DYNAMIC_STRIDES. * doc/invoke.texi (prefetch-dynamic-strides): Document new option. * params.def (PARAM_PREFETCH_DYNAMIC_STRIDES): New. * params.h (PARAM_PREFETCH_DYNAMIC_STRIDES): Define. * tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Account for prefetch-dynamic-strides setting. From-SVN: r259996 --- gcc/ChangeLog | 20 ++++++++++++++++++++ gcc/config/aarch64/aarch64-protos.h | 3 +++ gcc/config/aarch64/aarch64.c | 11 +++++++++++ gcc/doc/invoke.texi | 10 ++++++++++ gcc/params.def | 9 +++++++++ gcc/params.h | 2 ++ gcc/tree-ssa-loop-prefetch.c | 10 ++++++++++ 7 files changed, 65 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 535204d3b67..64a385e851a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2018-05-07 Luis Machado + + gcc/ + * config/aarch64/aarch64-protos.h (cpu_prefetch_tune) + : New const bool field. + * config/aarch64/aarch64.c (generic_prefetch_tune): Update to include + prefetch_dynamic_strides. + (exynosm1_prefetch_tune): Likewise. + (thunderxt88_prefetch_tune): Likewise. + (thunderx_prefetch_tune): Likewise. + (thunderx2t99_prefetch_tune): Likewise. + (qdf24xx_prefetch_tune): Likewise. Set prefetch_dynamic_strides to false. + (aarch64_override_options_internal): Update to set + PARAM_PREFETCH_DYNAMIC_STRIDES. + * doc/invoke.texi (prefetch-dynamic-strides): Document new option. + * params.def (PARAM_PREFETCH_DYNAMIC_STRIDES): New. + * params.h (PARAM_PREFETCH_DYNAMIC_STRIDES): Define. + * tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Account for + prefetch-dynamic-strides setting. + 2018-05-07 Luis Machado gcc/ diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 5d3b9d7a06e..157308ddb79 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -230,6 +230,9 @@ struct cpu_prefetch_tune const int l1_cache_size; const int l1_cache_line_size; const int l2_cache_size; + /* Whether software prefetch hints should be issued for non-constant + strides. */ + const bool prefetch_dynamic_strides; /* The minimum constant stride beyond which we should use prefetch hints for. */ const int minimum_stride; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 7d0ba35b7be..3d3a9763824 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -547,6 +547,7 @@ static const cpu_prefetch_tune generic_prefetch_tune = -1, /* l1_cache_size */ -1, /* l1_cache_line_size */ -1, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -557,6 +558,7 @@ static const cpu_prefetch_tune exynosm1_prefetch_tune = -1, /* l1_cache_size */ 64, /* l1_cache_line_size */ -1, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -567,6 +569,7 @@ static const cpu_prefetch_tune qdf24xx_prefetch_tune = 32, /* l1_cache_size */ 64, /* l1_cache_line_size */ 1024, /* l2_cache_size */ + false, /* prefetch_dynamic_strides */ 2048, /* minimum_stride */ 3 /* default_opt_level */ }; @@ -577,6 +580,7 @@ static const cpu_prefetch_tune thunderxt88_prefetch_tune = 32, /* l1_cache_size */ 128, /* l1_cache_line_size */ 16*1024, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ -1, /* minimum_stride */ 3 /* default_opt_level */ }; @@ -587,6 +591,7 @@ static const cpu_prefetch_tune thunderx_prefetch_tune = 32, /* l1_cache_size */ 128, /* l1_cache_line_size */ -1, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -597,6 +602,7 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune = 32, /* l1_cache_size */ 64, /* l1_cache_line_size */ 256, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ -1, /* minimum_stride */ -1 /* default_opt_level */ }; @@ -10602,6 +10608,11 @@ aarch64_override_options_internal (struct gcc_options *opts) aarch64_tune_params.prefetch->l2_cache_size, opts->x_param_values, global_options_set.x_param_values); + if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides) + maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES, + 0, + opts->x_param_values, + global_options_set.x_param_values); if (aarch64_tune_params.prefetch->minimum_stride >= 0) maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE, aarch64_tune_params.prefetch->minimum_stride, diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7c90abcef05..7e57e9650f6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10733,6 +10733,16 @@ The size of L1 cache, in kilobytes. @item l2-cache-size The size of L2 cache, in kilobytes. +@item prefetch-dynamic-strides +Whether the loop array prefetch pass should issue software prefetch hints +for strides that are non-constant. In some cases this may be +beneficial, though the fact the stride is non-constant may make it +hard to predict when there is clear benefit to issuing these hints. + +Set to 1, the default, if the prefetch hints should be issued for non-constant +strides. Set to 0 if prefetch hints should be issued only for strides that +are known to be constant and below @option{prefetch-minimum-stride}. + @item prefetch-minimum-stride Minimum constant stride, in bytes, to start using prefetch hints for. If the stride is less than this threshold, prefetch hints will not be issued. diff --git a/gcc/params.def b/gcc/params.def index 2166deb6a68..7408843c8cc 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -790,6 +790,15 @@ DEFPARAM (PARAM_L2_CACHE_SIZE, "The size of L2 cache.", 512, 0, 0) +/* Whether software prefetch hints should be issued for non-constant + strides. */ + +DEFPARAM (PARAM_PREFETCH_DYNAMIC_STRIDES, + "prefetch-dynamic-strides", + "Whether software prefetch hints should be issued for non-constant " + "strides.", + 1, 0, 1) + /* The minimum constant stride beyond which we should use prefetch hints for. */ diff --git a/gcc/params.h b/gcc/params.h index 96012db31ff..8aa960a904e 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -196,6 +196,8 @@ extern void init_param_values (int *params); PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) #define L2_CACHE_SIZE \ PARAM_VALUE (PARAM_L2_CACHE_SIZE) +#define PREFETCH_DYNAMIC_STRIDES \ + PARAM_VALUE (PARAM_PREFETCH_DYNAMIC_STRIDES) #define PREFETCH_MINIMUM_STRIDE \ PARAM_VALUE (PARAM_PREFETCH_MINIMUM_STRIDE) #define USE_CANONICAL_TYPES \ diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 53104026e3c..af89d7070e3 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -992,6 +992,16 @@ prune_by_reuse (struct mem_ref_group *groups) static bool should_issue_prefetch_p (struct mem_ref *ref) { + /* Do we want to issue prefetches for non-constant strides? */ + if (!cst_and_fits_in_hwi (ref->group->step) && PREFETCH_DYNAMIC_STRIDES == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Skipping non-constant step for reference %u:%u\n", + ref->group->uid, ref->uid); + return false; + } + /* Some processors may have a hardware prefetcher that may conflict with prefetch hints for a range of strides. Make sure we don't issue prefetches for such cases if the stride is within this particular