x86: Enable FMA in rsqrt<mode>2 expander
Enable FMA in rsqrt<mode>2 expander and fold rsqrtv16sf2 expander into rsqrt<mode>2 expander which expands to UNSPEC_RSQRT28 for TARGET_AVX512ER. Although it doesn't show performance change in our workloads, FMA can improve other workloads. gcc/ PR target/88713 * config/i386/i386-expand.c (ix86_emit_swsqrtsf): Enable FMA. * config/i386/sse.md (VF_AVX512VL_VF1_128_256): New. (rsqrt<mode>2): Replace VF1_128_256 with VF_AVX512VL_VF1_128_256. (rsqrtv16sf2): Removed. gcc/testsuite/ PR target/88713 * gcc.target/i386/pr88713-1.c: New test. * gcc.target/i386/pr88713-2.c: Likewise.
This commit is contained in:
parent
a1e25d0008
commit
fab263ab0f
@ -15535,14 +15535,22 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
|
||||
}
|
||||
}
|
||||
|
||||
mthree = force_reg (mode, mthree);
|
||||
|
||||
/* e0 = x0 * a */
|
||||
emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
|
||||
/* e1 = e0 * x0 */
|
||||
emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
|
||||
|
||||
/* e2 = e1 - 3. */
|
||||
mthree = force_reg (mode, mthree);
|
||||
emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
|
||||
if (TARGET_FMA || TARGET_AVX512F)
|
||||
emit_insn (gen_rtx_SET (e2,
|
||||
gen_rtx_FMA (mode, e0, x0, mthree)));
|
||||
else
|
||||
{
|
||||
/* e1 = e0 * x0 */
|
||||
emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
|
||||
|
||||
/* e2 = e1 - 3. */
|
||||
emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
|
||||
}
|
||||
|
||||
mhalf = force_reg (mode, mhalf);
|
||||
if (recip)
|
||||
|
@ -326,6 +326,12 @@
|
||||
[V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
|
||||
V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
|
||||
|
||||
;; AVX512VL SF/DF plus 128- and 256-bit SF vector modes
|
||||
(define_mode_iterator VF_AVX512VL_VF1_128_256
|
||||
[(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL")
|
||||
(V2DF "TARGET_AVX512VL")])
|
||||
|
||||
(define_mode_iterator VF2_AVX512VL
|
||||
[V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
|
||||
|
||||
@ -2070,26 +2076,16 @@
|
||||
(set_attr "mode" "<ssescalarmode>")])
|
||||
|
||||
(define_expand "rsqrt<mode>2"
|
||||
[(set (match_operand:VF1_128_256 0 "register_operand")
|
||||
(unspec:VF1_128_256
|
||||
[(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
|
||||
[(set (match_operand:VF_AVX512VL_VF1_128_256 0 "register_operand")
|
||||
(unspec:VF_AVX512VL_VF1_128_256
|
||||
[(match_operand:VF_AVX512VL_VF1_128_256 1 "vector_operand")]
|
||||
UNSPEC_RSQRT))]
|
||||
"TARGET_SSE && TARGET_SSE_MATH"
|
||||
{
|
||||
ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "rsqrtv16sf2"
|
||||
[(set (match_operand:V16SF 0 "register_operand")
|
||||
(unspec:V16SF
|
||||
[(match_operand:V16SF 1 "vector_operand")]
|
||||
UNSPEC_RSQRT28))]
|
||||
"TARGET_AVX512ER && TARGET_SSE_MATH"
|
||||
{
|
||||
ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "<sse>_rsqrt<mode>2"
|
||||
[(set (match_operand:VF1_128_256 0 "register_operand" "=x")
|
||||
(unspec:VF1_128_256
|
||||
|
13
gcc/testsuite/gcc.target/i386/pr88713-1.c
Normal file
13
gcc/testsuite/gcc.target/i386/pr88713-1.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -mno-avx512f -mfma" } */
|
||||
|
||||
extern float sqrtf (float);
|
||||
|
||||
void
|
||||
rsqrt (float* restrict r, float* restrict a)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
r[i] = sqrtf(a[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */
|
6
gcc/testsuite/gcc.target/i386/pr88713-2.c
Normal file
6
gcc/testsuite/gcc.target/i386/pr88713-2.c
Normal file
@ -0,0 +1,6 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -march=skylake-avx512 -mno-fma" } */
|
||||
|
||||
#include "pr88713-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */
|
Loading…
Reference in New Issue
Block a user