Fix false dependence of scalar operation vrcp/vsqrt/vrsqrt/vrndscale

For instructions with xmm operand:

op %xmmN,%xmmQ,%xmmQ ----> op %xmmN, %xmmN, %xmmQ

for instruction with mem operand or gpr operand:

op mem/gpr, %xmmQ, %xmmQ

--->  using pass rpad ---->

xorps %xmmN, %xmmN, %xxN
op mem/gpr, %xmmN, %xmmQ

Performance influence of SPEC2017 fprate which is tested on SKX
----
503.bwaves_r	-0.03%
507.cactuBSSN_r -0.22%
508.namd_r	-0.02%
510.parest_r	0.37%
511.povray_r	0.74%
519.lbm_r	0.24%
521.wrf_r	2.35%
526.blender_r	0.71%
527.cam4_r	0.65%
538.imagick_r	0.95%
544.nab_r	-0.37
549.fotonik3d_r 0.24%
554.roms_r	0.90%
fprate geomean	0.50%
-----

Changelog
gcc/
	* config/i386/i386.md (*rcpsf2_sse): Add
	avx_partial_xmm_update, prefer m constraint for TARGET_AVX.
	(*rsqrtsf2_sse): Ditto.
	(*sqrt<mode>2_sse): Ditto.
	(sse4_1_round<mode>2): separate constraint vm, add
	avx_partail_xmm_update, prefer m constraint for TARGET_AVX.
	* config/i386/sse.md (*sse_vmrcpv4sf2"): New define_insn used
	by pass rpad.
	(*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>*):
	Ditto.
	(*sse_vmrsqrtv4sf2): Ditto.
	(*avx512f_rndscale<mode><round_saeonly_name>): Ditto.
	(*sse4_1_round<ssescalarmodesuffix>): Ditto.
	(sse4_1_round<ssescalarmodesuffix>): Add m constraint and
	<iptr> pointer size modifier since vround support memory operand.

gcc/testsuite
	* gcc.target/i386/pr87007-4.c: New test.
	* gcc.target/i386/pr87007-5.c: Ditto.

From-SVN: r277469
This commit is contained in:
Hongtao Liu 2019-10-26 02:34:34 +00:00 committed by Hongtao Liu
parent 6755d2d056
commit 011464ede0
6 changed files with 193 additions and 32 deletions

View File

@ -1,3 +1,22 @@
2019-10-26 Hongtao Liu <hongtao.liu@intel.com>
PR target/89071
* config/i386/i386.md (*rcpsf2_sse): Add
avx_partial_xmm_update, prefer m constraint for TARGET_AVX.
(*rsqrtsf2_sse): Ditto.
(*sqrt<mode>2_sse): Ditto.
(sse4_1_round<mode>2): separate constraint vm, add
avx_partail_xmm_update, prefer m constraint for TARGET_AVX.
* config/i386/sse.md (*sse_vmrcpv4sf2"): New define_insn used
by pass rpad.
(*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>*):
Ditto.
(*sse_vmrsqrtv4sf2): Ditto.
(*avx512f_rndscale<mode><round_saeonly_name>): Ditto.
(*sse4_1_round<ssescalarmodesuffix>): Ditto.
(sse4_1_round<ssescalarmodesuffix>): Add m constraint and
<iptr> pointer size modifier since vround support memory operand.
2019-10-18 Georg-Johann Lay <avr@gjlay.de>
PR target/85969

View File

@ -14843,13 +14843,14 @@
(set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")
(set_attr "avx_partial_xmm_update" "false,false,true")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(cond [(match_test "TARGET_AVX")
(symbol_ref "true")
(eq_attr "alternative" "1,2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_insn "*fop_xf_1_i387"
[(set (match_operand:XF 0 "register_operand" "=f,f")
@ -15089,13 +15090,14 @@
(set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")
(set_attr "avx_partial_xmm_update" "false,false,true")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(cond [(match_test "TARGET_AVX")
(symbol_ref "true")
(eq_attr "alternative" "1,2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_expand "rsqrtsf2"
[(set (match_operand:SF 0 "register_operand")
@ -15120,14 +15122,15 @@
(set_attr "atom_sse_attr" "sqrt")
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "avx_partial_xmm_update" "false,false,true")
(set_attr "mode" "<MODE>")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(cond [(match_test "TARGET_AVX")
(symbol_ref "true")
(eq_attr "alternative" "1,2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_expand "sqrt<mode>2"
[(set (match_operand:MODEF 0 "register_operand")
@ -16261,30 +16264,32 @@
(define_insn "sse4_1_round<mode>2"
[(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v")
[(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v,v")
(unspec:MODEF
[(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,vm")
(match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n")]
[(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,v,m")
(match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n,n")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
"@
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1,1,1,*")
(set_attr "length_immediate" "*,*,*,1")
(set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex")
(set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f")
(set_attr "prefix_extra" "1,1,1,*,*")
(set_attr "length_immediate" "*,*,*,1,1")
(set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
(set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
(set_attr "avx_partial_xmm_update" "false,false,true,false,true")
(set_attr "mode" "<MODE>")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(cond [(match_test "TARGET_AVX")
(symbol_ref "true")
(eq_attr "alternative" "1,2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_insn "rintxf2"
[(set (match_operand:XF 0 "register_operand" "=f")

View File

@ -2035,6 +2035,25 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
(define_insn "*sse_vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(vec_duplicate:V4SF
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
UNSPEC_RCP))
(match_operand:V4SF 2 "register_operand" "0,x")
(const_int 1)))]
"TARGET_SSE"
"@
rcpss\t{%1, %0|%0, %1}
vrcpss\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp")
(set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
(define_insn "<mask_codefor>rcp14<mode><mask_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
(unspec:VF_AVX512VL
@ -2130,6 +2149,25 @@
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "mode" "<ssescalarmode>")])
(define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(vec_duplicate:VF_128
(sqrt:<ssescalarmode>
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
(match_operand:VF_128 2 "register_operand" "0,v")
(const_int 1)))]
"TARGET_SSE"
"@
sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
(set_attr "prefix" "<round_scalar_prefix>")
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "rsqrt<mode>2"
[(set (match_operand:VF1_128_256 0 "register_operand")
(unspec:VF1_128_256
@ -2219,6 +2257,23 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
(define_insn "*sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(vec_duplicate:V4SF
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
UNSPEC_RSQRT))
(match_operand:V4SF 2 "register_operand" "0,x")
(const_int 1)))]
"TARGET_SSE"
"@
rsqrtss\t{%1, %0|%0, %1}
vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
[(set (match_operand:VF 0 "register_operand")
(smaxmin:VF
@ -9709,6 +9764,22 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
(define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(vec_duplicate:VF_128
(unspec:<ssescalarmode>
[(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_ROUND))
(match_operand:VF_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
"vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
[(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
;; One bit in mask selects 2 elements.
(define_insn "avx512f_shufps512_1<mask_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
@ -17954,12 +18025,36 @@
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
[(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
UNSPEC_ROUND)
(match_operand:VF_128 1 "register_operand" "0,0,x,v")
(const_int 1)))]
"TARGET_SSE4_1"
"@
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
[(set_attr "isa" "noavx,noavx,avx,avx512f")
(set_attr "type" "ssecvt")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex,evex")
(set_attr "mode" "<MODE>")])
(define_insn "*sse4_1_round<ssescalarmodesuffix>"
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
(vec_merge:VF_128
(vec_duplicate:VF_128
(unspec:<ssescalarmode>
[(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
UNSPEC_ROUND))
(match_operand:VF_128 1 "register_operand" "0,0,x,v")
(const_int 1)))]
"TARGET_SSE4_1"
"@
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}

View File

@ -1,3 +1,9 @@
2019-10-26 Hongtao Liu <hongtao.liu@intel.com>
PR target/89071
* gcc.target/i386/pr87007-4.c: New test.
* gcc.target/i386/pr87007-5.c: Ditto.
2019-10-25 Marek Polacek <polacek@redhat.com>
PR c++/91581 - ICE in exception-specification of defaulted ctor.

View File

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse" } */
#include<math.h>
extern double d1, d2, d3;
void
foo (int n, int k)
{
for (int i = 0; i != n; i++)
if(i < k)
d1 = floor (d2);
else
d1 = ceil (d3);
}
/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */

View File

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse" } */
#include<math.h>
extern double d1, d2, d3;
void
foo (int n, int k)
{
for (int i = 0; i != n; i++)
if(i < k)
d1 = sqrt (d2);
else
d1 = sqrt (d3);
}
/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */