From 1657d923e68a4969e1c3661a06a1f4cde30b7ae7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 30 May 2017 19:18:25 +0200 Subject: [PATCH] re PR target/80833 (32-bit x86 causes store-forwarding stalls for int64_t -> xmm) PR target/80833 * config/i386/constraints.md (Yd): New constraint. (Ye): Ditto. * config/i386/i386.md (*movti_internal): Add (?r, Ye) and (?Yd, r) alternatives. Update insn attributes. * config/i386/i386.md (*movti_internal): Add (?r, *Ye) and (?*Yd, r) alternatives. Update insn attributes. (double-mode inter-unit splitters): Add new GR<->XMM splitters. testsuite/ChangeLog: PR target/80833 * gcc.target/i386/pr80833-1.c: New test. * gcc.target/i386/pr80833-2.c: Ditto. From-SVN: r248691 --- gcc/config/i386/constraints.md | 26 ++++++- gcc/config/i386/i386.md | 91 +++++++++++++++++++---- gcc/testsuite/gcc.target/i386/pr80833-1.c | 12 +++ gcc/testsuite/gcc.target/i386/pr80833-2.c | 12 +++ 4 files changed, 126 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr80833-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr80833-2.c diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 816704fd2e1..f94e274358b 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -102,18 +102,24 @@ ;; c SSE inter-unit conversions enabled ;; i SSE2 inter-unit moves to SSE register enabled ;; j SSE2 inter-unit moves from SSE register enabled +;; d any EVEX encodable SSE register for AVX512BW target or any SSE register +;; for SSE4_1 target, when inter-unit moves to SSE register are enabled +;; e any EVEX encodable SSE register for AVX512BW target or any SSE register +;; for SSE4_1 target, when inter-unit moves from SSE register are enabled ;; m MMX inter-unit moves to MMX register enabled ;; n MMX inter-unit moves from MMX register enabled +;; p Integer register when TARGET_PARTIAL_REG_STALL is disabled ;; a Integer register when zero extensions with AND are disabled ;; b Any register that can be used as the GOT base when calling ;; ___tls_get_addr: that is, any general register except EAX ;; and ESP, for -fno-plt if linker supports it. Otherwise, ;; EBX. -;; p Integer register when TARGET_PARTIAL_REG_STALL is disabled ;; f x87 register when 80387 floating point arithmetic is enabled ;; r SSE regs not requiring REX prefix when prefixes avoidance is enabled ;; and all SSE regs otherwise -;; h EVEX encodable SSE register with number factor of four +;; v any EVEX encodable SSE register for AVX512VL target, +;; otherwise any SSE register +;; h EVEX encodable SSE register with number factor of four (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -130,6 +136,22 @@ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC ? ALL_SSE_REGS : NO_REGS" "@internal Any SSE register, when SSE2 and inter-unit moves from vector registers are enabled.") +(define_register_constraint "Yd" + "TARGET_INTER_UNIT_MOVES_TO_VEC + ? (TARGET_AVX512BW + ? ALL_SSE_REGS + : (TARGET_SSE4_1 ? SSE_REGS : NO_REGS)) + : NO_REGS" + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target or any SSE register for SSE4_1 target, when inter-unit moves to vector registers are enabled.") + +(define_register_constraint "Ye" + "TARGET_INTER_UNIT_MOVES_FROM_VEC + ? (TARGET_AVX512BW + ? ALL_SSE_REGS + : (TARGET_SSE4_1 ? SSE_REGS : NO_REGS)) + : NO_REGS" + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target or any SSE register for SSE4_1 target, when inter-unit moves from vector registers are enabled.") + (define_register_constraint "Ym" "TARGET_MMX && TARGET_INTER_UNIT_MOVES_TO_VEC ? MMX_REGS : NO_REGS" "@internal Any MMX register, when inter-unit moves to vector registers are enabled.") diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 140defae6e3..d6f7e639cb1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2074,8 +2074,8 @@ (const_string "OI")))]) (define_insn "*movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m") - (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd") + (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Ye,r"))] "(TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))) || (TARGET_SSE @@ -2118,8 +2118,20 @@ gcc_unreachable (); } } - [(set_attr "isa" "x64,x64,*,sse2,*,*") - (set_attr "type" "multi,multi,sselog1,sselog1,ssemov,ssemov") + [(set (attr "isa") + (cond [(eq_attr "alternative" "0,1,6,7") + (const_string "x64") + (eq_attr "alternative" "3") + (const_string "sse2") + ] + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "0,1,6,7") + (const_string "multi") + (eq_attr "alternative" "2,3") + (const_string "sselog1") + ] + (const_string "ssemov"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") @@ -2145,11 +2157,29 @@ ] (const_string "TI")))]) +(define_split + [(set (match_operand:TI 0 "sse_reg_operand") + (match_operand:TI 1 "general_reg_operand"))] + "TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC + && reload_completed" + [(set (match_dup 2) + (vec_merge:V2DI + (vec_duplicate:V2DI (match_dup 3)) + (match_dup 2) + (const_int 2)))] +{ + operands[2] = lowpart_subreg (V2DImode, operands[0], TImode); + operands[3] = gen_highpart (DImode, operands[1]); + + emit_move_insn (gen_lowpart (DImode, operands[0]), + gen_lowpart (DImode, operands[1])); +}) + (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?*Yd,?r ,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Yj,r ,*Yj ,*Yn ,*r,*km,*k,*k"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,v,*Ye,r ,*Yj,r ,*Yj ,*Yn ,*r,*km,*k,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2222,14 +2252,14 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "0,1") + (cond [(eq_attr "alternative" "0,1,17,18") (const_string "nox64") - (eq_attr "alternative" "2,3,4,5,10,11,17,18,21,23") + (eq_attr "alternative" "2,3,4,5,10,11,19,20,23,25") (const_string "x64") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "0,1") + (cond [(eq_attr "alternative" "0,1,17,18") (const_string "multi") (eq_attr "alternative" "6") (const_string "mmx") @@ -2237,11 +2267,11 @@ (const_string "mmxmov") (eq_attr "alternative" "12") (const_string "sselog1") - (eq_attr "alternative" "13,14,15,16,17,18") + (eq_attr "alternative" "13,14,15,16,19,20") (const_string "ssemov") - (eq_attr "alternative" "19,20") + (eq_attr "alternative" "21,22") (const_string "ssecvt") - (eq_attr "alternative" "21,22,23,24") + (eq_attr "alternative" "23,24,25,26") (const_string "mskmov") (and (match_operand 0 "register_operand") (match_operand 1 "pic_32bit_operand")) @@ -2260,7 +2290,7 @@ (const_string "*"))) (set (attr "prefix_rex") (if_then_else - (eq_attr "alternative" "10,11,17,18") + (eq_attr "alternative" "10,11,19,20") (const_string "1") (const_string "*"))) (set (attr "prefix") @@ -2307,6 +2337,23 @@ ] (const_string "*")))]) +(define_split + [(set (match_operand: 0 "general_reg_operand") + (match_operand: 1 "sse_reg_operand"))] + "TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC + && reload_completed" + [(set (match_dup 2) + (vec_select:DWIH + (match_dup 3) + (parallel [(const_int 1)])))] +{ + operands[2] = gen_highpart (mode, operands[0]); + operands[3] = lowpart_subreg (mode, operands[1], mode); + + emit_move_insn (gen_lowpart (mode, operands[0]), + gen_lowpart (mode, operands[1])); +}) + (define_split [(set (match_operand:DWI 0 "nonimmediate_gr_operand") (match_operand:DWI 1 "general_gr_operand"))] @@ -2314,6 +2361,24 @@ [(const_int 0)] "ix86_split_long_move (operands); DONE;") +(define_split + [(set (match_operand:DI 0 "sse_reg_operand") + (match_operand:DI 1 "general_reg_operand"))] + "!TARGET_64BIT && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC + && reload_completed" + [(set (match_dup 2) + (vec_merge:V4SI + (vec_duplicate:V4SI (match_dup 3)) + (match_dup 2) + (const_int 2)))] +{ + operands[2] = lowpart_subreg (V4SImode, operands[0], DImode); + operands[3] = gen_highpart (SImode, operands[1]); + + emit_move_insn (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1])); +}) + (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?*Yi,*k,*k ,*rm") diff --git a/gcc/testsuite/gcc.target/i386/pr80833-1.c b/gcc/testsuite/gcc.target/i386/pr80833-1.c new file mode 100644 index 00000000000..3f702fb0a54 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr80833-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1 -mtune=intel -mregparm=2" } */ +/* { dg-require-effective-target ia32 } */ + +long long test (long long a) +{ + asm ("" : "+x" (a)); + return a; +} + +/* { dg-final { scan-assembler "pinsrd" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr80833-2.c b/gcc/testsuite/gcc.target/i386/pr80833-2.c new file mode 100644 index 00000000000..8136a60f519 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr80833-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1 -mtune=intel" } */ +/* { dg-require-effective-target int128 } */ + +__int128 test (__int128 a) +{ + asm ("" : "+x" (a)); + return a; +} + +/* { dg-final { scan-assembler "pinsrq" } } */ +/* { dg-final { scan-assembler "pextrq" } } */