x86: Enable FMA in rsqrt<mode>2 expander

Enable FMA in rsqrt<mode>2 expander and fold rsqrtv16sf2 expander into
rsqrt<mode>2 expander which expands to UNSPEC_RSQRT28 for TARGET_AVX512ER.
Although it doesn't show performance change in our workloads, FMA can
improve other workloads.

gcc/

	PR target/88713
	* config/i386/i386-expand.c (ix86_emit_swsqrtsf): Enable FMA.
	* config/i386/sse.md (VF_AVX512VL_VF1_128_256): New.
	(rsqrt<mode>2): Replace VF1_128_256 with VF_AVX512VL_VF1_128_256.
	(rsqrtv16sf2): Removed.

gcc/testsuite/

	PR target/88713
	* gcc.target/i386/pr88713-1.c: New test.
	* gcc.target/i386/pr88713-2.c: Likewise.
This commit is contained in:
H.J. Lu 2019-01-23 06:33:58 -08:00
parent a1e25d0008
commit fab263ab0f
4 changed files with 42 additions and 19 deletions

View File

@ -15535,14 +15535,22 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
}
}
mthree = force_reg (mode, mthree);
/* e0 = x0 * a */
emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
/* e1 = e0 * x0 */
emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
/* e2 = e1 - 3. */
mthree = force_reg (mode, mthree);
emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
if (TARGET_FMA || TARGET_AVX512F)
emit_insn (gen_rtx_SET (e2,
gen_rtx_FMA (mode, e0, x0, mthree)));
else
{
/* e1 = e0 * x0 */
emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
/* e2 = e1 - 3. */
emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
}
mhalf = force_reg (mode, mhalf);
if (recip)

View File

@ -326,6 +326,12 @@
[V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
;; AVX512VL SF/DF plus 128- and 256-bit SF vector modes
(define_mode_iterator VF_AVX512VL_VF1_128_256
[(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL")
(V2DF "TARGET_AVX512VL")])
(define_mode_iterator VF2_AVX512VL
[V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
@ -2070,26 +2076,16 @@
(set_attr "mode" "<ssescalarmode>")])
(define_expand "rsqrt<mode>2"
[(set (match_operand:VF1_128_256 0 "register_operand")
(unspec:VF1_128_256
[(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
[(set (match_operand:VF_AVX512VL_VF1_128_256 0 "register_operand")
(unspec:VF_AVX512VL_VF1_128_256
[(match_operand:VF_AVX512VL_VF1_128_256 1 "vector_operand")]
UNSPEC_RSQRT))]
"TARGET_SSE && TARGET_SSE_MATH"
{
ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
DONE;
})
(define_expand "rsqrtv16sf2"
[(set (match_operand:V16SF 0 "register_operand")
(unspec:V16SF
[(match_operand:V16SF 1 "vector_operand")]
UNSPEC_RSQRT28))]
"TARGET_AVX512ER && TARGET_SSE_MATH"
{
ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
DONE;
})
(define_insn "<sse>_rsqrt<mode>2"
[(set (match_operand:VF1_128_256 0 "register_operand" "=x")
(unspec:VF1_128_256

View File

@ -0,0 +1,13 @@
/* { dg-do compile } */
/* { dg-options "-Ofast -mno-avx512f -mfma" } */
extern float sqrtf (float);
void
rsqrt (float* restrict r, float* restrict a)
{
for (int i = 0; i < 64; i++)
r[i] = sqrtf(a[i]);
}
/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */

View File

@ -0,0 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-Ofast -march=skylake-avx512 -mno-fma" } */
#include "pr88713-1.c"
/* { dg-final { scan-assembler "\tvfmadd\[123\]+ps" } } */