Fix false dependence of scalar operation vrcp/vsqrt/vrsqrt/vrndscale
For instructions with xmm operand: op %xmmN,%xmmQ,%xmmQ ----> op %xmmN, %xmmN, %xmmQ for instruction with mem operand or gpr operand: op mem/gpr, %xmmQ, %xmmQ ---> using pass rpad ----> xorps %xmmN, %xmmN, %xxN op mem/gpr, %xmmN, %xmmQ Performance influence of SPEC2017 fprate which is tested on SKX ---- 503.bwaves_r -0.03% 507.cactuBSSN_r -0.22% 508.namd_r -0.02% 510.parest_r 0.37% 511.povray_r 0.74% 519.lbm_r 0.24% 521.wrf_r 2.35% 526.blender_r 0.71% 527.cam4_r 0.65% 538.imagick_r 0.95% 544.nab_r -0.37 549.fotonik3d_r 0.24% 554.roms_r 0.90% fprate geomean 0.50% ----- Changelog gcc/ * config/i386/i386.md (*rcpsf2_sse): Add avx_partial_xmm_update, prefer m constraint for TARGET_AVX. (*rsqrtsf2_sse): Ditto. (*sqrt<mode>2_sse): Ditto. (sse4_1_round<mode>2): separate constraint vm, add avx_partail_xmm_update, prefer m constraint for TARGET_AVX. * config/i386/sse.md (*sse_vmrcpv4sf2"): New define_insn used by pass rpad. (*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>*): Ditto. (*sse_vmrsqrtv4sf2): Ditto. (*avx512f_rndscale<mode><round_saeonly_name>): Ditto. (*sse4_1_round<ssescalarmodesuffix>): Ditto. (sse4_1_round<ssescalarmodesuffix>): Add m constraint and <iptr> pointer size modifier since vround support memory operand. gcc/testsuite * gcc.target/i386/pr87007-4.c: New test. * gcc.target/i386/pr87007-5.c: Ditto. From-SVN: r277469
This commit is contained in:
parent
6755d2d056
commit
011464ede0
@ -1,3 +1,22 @@
|
||||
2019-10-26 Hongtao Liu <hongtao.liu@intel.com>
|
||||
|
||||
PR target/89071
|
||||
* config/i386/i386.md (*rcpsf2_sse): Add
|
||||
avx_partial_xmm_update, prefer m constraint for TARGET_AVX.
|
||||
(*rsqrtsf2_sse): Ditto.
|
||||
(*sqrt<mode>2_sse): Ditto.
|
||||
(sse4_1_round<mode>2): separate constraint vm, add
|
||||
avx_partail_xmm_update, prefer m constraint for TARGET_AVX.
|
||||
* config/i386/sse.md (*sse_vmrcpv4sf2"): New define_insn used
|
||||
by pass rpad.
|
||||
(*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>*):
|
||||
Ditto.
|
||||
(*sse_vmrsqrtv4sf2): Ditto.
|
||||
(*avx512f_rndscale<mode><round_saeonly_name>): Ditto.
|
||||
(*sse4_1_round<ssescalarmodesuffix>): Ditto.
|
||||
(sse4_1_round<ssescalarmodesuffix>): Add m constraint and
|
||||
<iptr> pointer size modifier since vround support memory operand.
|
||||
|
||||
2019-10-18 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
PR target/85969
|
||||
|
@ -14843,13 +14843,14 @@
|
||||
(set_attr "btver2_sse_attr" "rcp")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "avx_partial_xmm_update" "false,false,true")
|
||||
(set (attr "preferred_for_speed")
|
||||
(cond [(eq_attr "alternative" "1")
|
||||
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
(eq_attr "alternative" "2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
(cond [(match_test "TARGET_AVX")
|
||||
(symbol_ref "true")
|
||||
(eq_attr "alternative" "1,2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
(define_insn "*fop_xf_1_i387"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f,f")
|
||||
@ -15089,13 +15090,14 @@
|
||||
(set_attr "btver2_sse_attr" "rcp")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "avx_partial_xmm_update" "false,false,true")
|
||||
(set (attr "preferred_for_speed")
|
||||
(cond [(eq_attr "alternative" "1")
|
||||
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
(eq_attr "alternative" "2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
(cond [(match_test "TARGET_AVX")
|
||||
(symbol_ref "true")
|
||||
(eq_attr "alternative" "1,2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
(define_expand "rsqrtsf2"
|
||||
[(set (match_operand:SF 0 "register_operand")
|
||||
@ -15120,14 +15122,15 @@
|
||||
(set_attr "atom_sse_attr" "sqrt")
|
||||
(set_attr "btver2_sse_attr" "sqrt")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "avx_partial_xmm_update" "false,false,true")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set (attr "preferred_for_speed")
|
||||
(cond [(eq_attr "alternative" "1")
|
||||
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
(eq_attr "alternative" "2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
(cond [(match_test "TARGET_AVX")
|
||||
(symbol_ref "true")
|
||||
(eq_attr "alternative" "1,2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
(define_expand "sqrt<mode>2"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
@ -16261,30 +16264,32 @@
|
||||
|
||||
|
||||
(define_insn "sse4_1_round<mode>2"
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v")
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v,v")
|
||||
(unspec:MODEF
|
||||
[(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,vm")
|
||||
(match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n")]
|
||||
[(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,v,m")
|
||||
(match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n,n")]
|
||||
UNSPEC_ROUND))]
|
||||
"TARGET_SSE4_1"
|
||||
"@
|
||||
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
|
||||
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
|
||||
%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
|
||||
vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
|
||||
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix_extra" "1,1,1,*")
|
||||
(set_attr "length_immediate" "*,*,*,1")
|
||||
(set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex")
|
||||
(set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f")
|
||||
(set_attr "prefix_extra" "1,1,1,*,*")
|
||||
(set_attr "length_immediate" "*,*,*,1,1")
|
||||
(set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
|
||||
(set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
|
||||
(set_attr "avx_partial_xmm_update" "false,false,true,false,true")
|
||||
(set_attr "mode" "<MODE>")
|
||||
(set (attr "preferred_for_speed")
|
||||
(cond [(eq_attr "alternative" "1")
|
||||
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
(eq_attr "alternative" "2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
(cond [(match_test "TARGET_AVX")
|
||||
(symbol_ref "true")
|
||||
(eq_attr "alternative" "1,2")
|
||||
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
|
||||
]
|
||||
(symbol_ref "true")))])
|
||||
|
||||
(define_insn "rintxf2"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
|
@ -2035,6 +2035,25 @@
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "*sse_vmrcpv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
|
||||
(vec_merge:V4SF
|
||||
(vec_duplicate:V4SF
|
||||
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
|
||||
UNSPEC_RCP))
|
||||
(match_operand:V4SF 2 "register_operand" "0,x")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
rcpss\t{%1, %0|%0, %1}
|
||||
vrcpss\t{%1, %2, %0|%0, %2, %1}"
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "atom_sse_attr" "rcp")
|
||||
(set_attr "btver2_sse_attr" "rcp")
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "<mask_codefor>rcp14<mode><mask_name>"
|
||||
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
|
||||
(unspec:VF_AVX512VL
|
||||
@ -2130,6 +2149,25 @@
|
||||
(set_attr "btver2_sse_attr" "sqrt")
|
||||
(set_attr "mode" "<ssescalarmode>")])
|
||||
|
||||
(define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
|
||||
(vec_merge:VF_128
|
||||
(vec_duplicate:VF_128
|
||||
(sqrt:<ssescalarmode>
|
||||
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
|
||||
(match_operand:VF_128 2 "register_operand" "0,v")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
|
||||
vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "atom_sse_attr" "sqrt")
|
||||
(set_attr "prefix" "<round_scalar_prefix>")
|
||||
(set_attr "btver2_sse_attr" "sqrt")
|
||||
(set_attr "mode" "<ssescalarmode>")])
|
||||
|
||||
(define_expand "rsqrt<mode>2"
|
||||
[(set (match_operand:VF1_128_256 0 "register_operand")
|
||||
(unspec:VF1_128_256
|
||||
@ -2219,6 +2257,23 @@
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "*sse_vmrsqrtv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
|
||||
(vec_merge:V4SF
|
||||
(vec_duplicate:V4SF
|
||||
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
|
||||
UNSPEC_RSQRT))
|
||||
(match_operand:V4SF 2 "register_operand" "0,x")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
rsqrtss\t{%1, %0|%0, %1}
|
||||
vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
(set_attr "type" "sse")
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
|
||||
[(set (match_operand:VF 0 "register_operand")
|
||||
(smaxmin:VF
|
||||
@ -9709,6 +9764,22 @@
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=v")
|
||||
(vec_merge:VF_128
|
||||
(vec_duplicate:VF_128
|
||||
(unspec:<ssescalarmode>
|
||||
[(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
|
||||
(match_operand:SI 3 "const_0_to_255_operand")]
|
||||
UNSPEC_ROUND))
|
||||
(match_operand:VF_128 1 "register_operand" "v")
|
||||
(const_int 1)))]
|
||||
"TARGET_AVX512F"
|
||||
"vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
|
||||
[(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; One bit in mask selects 2 elements.
|
||||
(define_insn "avx512f_shufps512_1<mask_name>"
|
||||
[(set (match_operand:V16SF 0 "register_operand" "=v")
|
||||
@ -17954,12 +18025,36 @@
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
|
||||
(vec_merge:VF_128
|
||||
(unspec:VF_128
|
||||
[(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
|
||||
[(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
|
||||
UNSPEC_ROUND)
|
||||
(match_operand:VF_128 1 "register_operand" "0,0,x,v")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE4_1"
|
||||
"@
|
||||
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
|
||||
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
|
||||
vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
|
||||
vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
|
||||
[(set_attr "isa" "noavx,noavx,avx,avx512f")
|
||||
(set_attr "type" "ssecvt")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix_data16" "1,1,*,*")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "orig,orig,vex,evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "*sse4_1_round<ssescalarmodesuffix>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
|
||||
(vec_merge:VF_128
|
||||
(vec_duplicate:VF_128
|
||||
(unspec:<ssescalarmode>
|
||||
[(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
|
||||
UNSPEC_ROUND))
|
||||
(match_operand:VF_128 1 "register_operand" "0,0,x,v")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE4_1"
|
||||
"@
|
||||
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
|
||||
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
|
||||
|
@ -1,3 +1,9 @@
|
||||
2019-10-26 Hongtao Liu <hongtao.liu@intel.com>
|
||||
|
||||
PR target/89071
|
||||
* gcc.target/i386/pr87007-4.c: New test.
|
||||
* gcc.target/i386/pr87007-5.c: Ditto.
|
||||
|
||||
2019-10-25 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
PR c++/91581 - ICE in exception-specification of defaulted ctor.
|
||||
|
18
gcc/testsuite/gcc.target/i386/pr87007-4.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr87007-4.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse" } */
|
||||
|
||||
|
||||
#include<math.h>
|
||||
|
||||
extern double d1, d2, d3;
|
||||
void
|
||||
foo (int n, int k)
|
||||
{
|
||||
for (int i = 0; i != n; i++)
|
||||
if(i < k)
|
||||
d1 = floor (d2);
|
||||
else
|
||||
d1 = ceil (d3);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
|
18
gcc/testsuite/gcc.target/i386/pr87007-5.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr87007-5.c
Normal file
@ -0,0 +1,18 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse" } */
|
||||
|
||||
|
||||
#include<math.h>
|
||||
|
||||
extern double d1, d2, d3;
|
||||
void
|
||||
foo (int n, int k)
|
||||
{
|
||||
for (int i = 0; i != n; i++)
|
||||
if(i < k)
|
||||
d1 = sqrt (d2);
|
||||
else
|
||||
d1 = sqrt (d3);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
|
Loading…
Reference in New Issue
Block a user