Rename the tuning option and related functions to enable the Newton series for the reciprocal square root to reflect its approximative characteristic.
gcc/ * config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename function to "aarch64_emit_approx_rsqrt". * config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to AARCH64_EXTRA_TUNE_APPROX_RSQRT. * config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name. (xgene1_tunings): Likewise. (use_rsqrt_p): Likewise. (aarch64_emit_swrsqrt): Use new function name. * config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise. * config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the text explaining this option. * doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise. From-SVN: r233772
This commit is contained in:
parent
20ba5f3318
commit
0c30e0f316
@ -1,3 +1,23 @@
|
||||
2016-02-26 Evandro Menezes <e.menezes@samsung.com>
|
||||
|
||||
Rename the AArch64 tuning option and related functions to enable the
|
||||
Newton series for the reciprocal square root to reflect its
|
||||
approximative characteristic.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename
|
||||
function to "aarch64_emit_approx_rsqrt".
|
||||
* config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to
|
||||
AARCH64_EXTRA_TUNE_APPROX_RSQRT.
|
||||
* config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name.
|
||||
(xgene1_tunings): Likewise.
|
||||
(use_rsqrt_p): Likewise.
|
||||
(aarch64_emit_swrsqrt): Use new function name.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise.
|
||||
* config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the
|
||||
text explaining this option.
|
||||
* doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise.
|
||||
|
||||
2016-02-26 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/69969
|
||||
|
@ -360,8 +360,7 @@ void aarch64_emit_call_insn (rtx);
|
||||
void aarch64_register_pragmas (void);
|
||||
void aarch64_relayout_simd_types (void);
|
||||
void aarch64_reset_previous_fndecl (void);
|
||||
|
||||
void aarch64_emit_swrsqrt (rtx, rtx);
|
||||
void aarch64_emit_approx_rsqrt (rtx, rtx);
|
||||
|
||||
/* Initialize builtins for SIMD intrinsics. */
|
||||
void init_aarch64_simd_builtins (void);
|
||||
@ -413,9 +412,7 @@ rtx aarch64_expand_builtin (tree exp,
|
||||
machine_mode mode ATTRIBUTE_UNUSED,
|
||||
int ignore ATTRIBUTE_UNUSED);
|
||||
tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
|
||||
|
||||
tree aarch64_builtin_rsqrt (unsigned int);
|
||||
|
||||
tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
|
||||
|
||||
extern void aarch64_split_combinev16qi (rtx operands[3]);
|
||||
|
@ -405,7 +405,7 @@
|
||||
UNSPEC_RSQRT))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
aarch64_emit_swrsqrt (operands[0], operands[1]);
|
||||
aarch64_emit_approx_rsqrt (operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -29,5 +29,5 @@
|
||||
AARCH64_TUNE_ to give an enum name. */
|
||||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
|
||||
AARCH64_EXTRA_TUNING_OPTION ("recip_sqrt", RECIP_SQRT)
|
||||
AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
|
||||
|
||||
|
@ -538,7 +538,7 @@ static const struct tune_params exynosm1_tunings =
|
||||
48, /* max_case_values. */
|
||||
64, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
|
||||
};
|
||||
|
||||
static const struct tune_params thunderx_tunings =
|
||||
@ -586,7 +586,7 @@ static const struct tune_params xgene1_tunings =
|
||||
0, /* max_case_values. */
|
||||
0, /* cache_line_size. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
|
||||
};
|
||||
|
||||
/* Support for fine-grained override of the tuning structures. */
|
||||
@ -7460,8 +7460,8 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
|
||||
return aarch64_tune_params.memmov_cost;
|
||||
}
|
||||
|
||||
/* Return true if it is safe and beneficial to use the rsqrt optabs to
|
||||
optimize 1.0/sqrt. */
|
||||
/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
|
||||
to optimize 1.0/sqrt. */
|
||||
|
||||
static bool
|
||||
use_rsqrt_p (void)
|
||||
@ -7469,12 +7469,12 @@ use_rsqrt_p (void)
|
||||
return (!flag_trapping_math
|
||||
&& flag_unsafe_math_optimizations
|
||||
&& ((aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_RECIP_SQRT)
|
||||
& AARCH64_EXTRA_TUNE_APPROX_RSQRT)
|
||||
|| flag_mrecip_low_precision_sqrt));
|
||||
}
|
||||
|
||||
/* Function to decide when to use
|
||||
reciprocal square root builtins. */
|
||||
/* Function to decide when to use the approximate reciprocal square root
|
||||
builtin. */
|
||||
|
||||
static tree
|
||||
aarch64_builtin_reciprocal (tree fndecl)
|
||||
@ -7522,12 +7522,12 @@ get_rsqrts_type (machine_mode mode)
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit instruction sequence to compute
|
||||
reciprocal square root. Use two Newton-Raphson steps
|
||||
for single precision and three for double precision. */
|
||||
/* Emit instruction sequence to compute the reciprocal square root using the
|
||||
Newton-Raphson series. Iterate over the series twice for SF
|
||||
and thrice for DF. */
|
||||
|
||||
void
|
||||
aarch64_emit_swrsqrt (rtx dst, rtx src)
|
||||
aarch64_emit_approx_rsqrt (rtx dst, rtx src)
|
||||
{
|
||||
machine_mode mode = GET_MODE (src);
|
||||
gcc_assert (
|
||||
@ -7544,6 +7544,7 @@ aarch64_emit_swrsqrt (rtx dst, rtx src)
|
||||
|
||||
int iterations = double_mode ? 3 : 2;
|
||||
|
||||
/* Optionally iterate over the series one less time than otherwise. */
|
||||
if (flag_mrecip_low_precision_sqrt)
|
||||
iterations--;
|
||||
|
||||
|
@ -151,5 +151,5 @@ PC relative literal loads.
|
||||
|
||||
mlow-precision-recip-sqrt
|
||||
Common Var(flag_mrecip_low_precision_sqrt) Optimization
|
||||
When calculating a sqrt approximation, run fewer steps.
|
||||
This reduces precision, but can result in faster computation.
|
||||
When calculating the reciprocal square root approximation,
|
||||
uses one less step than otherwise, thus reducing latency and precision.
|
||||
|
@ -12884,12 +12884,10 @@ corresponding flag to the linker.
|
||||
@item -mno-low-precision-recip-sqrt
|
||||
@opindex -mlow-precision-recip-sqrt
|
||||
@opindex -mno-low-precision-recip-sqrt
|
||||
The square root estimate uses two steps instead of three for double-precision,
|
||||
and one step instead of two for single-precision.
|
||||
Thus reducing latency and precision.
|
||||
This is only relevant if @option{-ffast-math} activates
|
||||
reciprocal square root estimate instructions.
|
||||
Which in turn depends on the target processor.
|
||||
When calculating the reciprocal square root approximation,
|
||||
uses one less step than otherwise, thus reducing latency and precision.
|
||||
This is only relevant if @option{-ffast-math} enables the reciprocal square root
|
||||
approximation, which in turn depends on the target processor.
|
||||
|
||||
@item -march=@var{name}
|
||||
@opindex march
|
||||
|
Loading…
Reference in New Issue
Block a user