From 45392c76508048665ea5ad3269b59894724d5cb5 Mon Sep 17 00:00:00 2001 From: Ilya Enkovich Date: Wed, 3 Dec 2014 08:45:45 +0000 Subject: [PATCH] constraints.md (Yr): New. gcc/ * config/i386/constraints.md (Yr): New. * config/i386/i386.h (reg_class): Add NO_REX_SSE_REGS. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. * config/i386/sse.md (*vec_concatv2sf_sse4_1): Add alternatives which use only NO_REX_SSE_REGS. (vec_set_0): Likewise. (*vec_setv4sf_sse4_1): Likewise. (sse4_1_insertps): Likewise. (*sse4_1_extractps): Likewise. (*sse4_1_mulv2siv2di3): Likewise. (*_mul3): Likewise. (*sse4_1_3): Likewise. (*sse4_1_3): Likewise. (*sse4_1_eqv2di3): Likewise. (sse4_2_gtv2di3): Likewise. (*vec_extractv4si): Likewise. (*vec_concatv2si_sse4_1): Likewise. (vec_concatv2di): Likewise. (_blend): Likewise. (_blendv): Likewise. (_dp): Likewise. (_movntdqa): Likewise. (_mpsadbw): Likewise. (packusdw): Likewise. (_pblendvb): Likewise. (sse4_1_pblendw): Likewise. (sse4_1_phminposuw): Likewise. (sse4_1_v8qiv8hi2): Likewise. (sse4_1_v4qiv4si2): Likewise. (sse4_1_v4hiv4si2): Likewise. (sse4_1_v2qiv2di2): Likewise. (sse4_1_v2hiv2di2): Likewise. (sse4_1_v2siv2di2): Likewise. (sse4_1_ptest): Likewise. (_round): Likewise. (sse4_1_round): Likewise. * config/i386/subst.md (mask_prefix4): New. * config/i386/x86-tune.def (X86_TUNE_AVOID_4BYTE_PREFIXES): New. gcc/testsuites/ * gcc.target/i386/sse2-init-v2di-2.c: Adjust to changed vec_concatv2di template. From-SVN: r218303 --- gcc/ChangeLog | 42 ++ gcc/config/i386/constraints.md | 6 + gcc/config/i386/i386.h | 3 + gcc/config/i386/sse.md | 393 ++++++++++-------- gcc/config/i386/subst.md | 1 + gcc/config/i386/x86-tune.def | 4 + gcc/testsuite/ChangeLog | 5 + .../gcc.target/i386/sse2-init-v2di-2.c | 2 +- 8 files changed, 271 insertions(+), 185 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e3b3477e541..85a68b90d66 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2014-12-03 Ilya Enkovich + + * config/i386/constraints.md (Yr): New. + * config/i386/i386.h (reg_class): Add NO_REX_SSE_REGS. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/i386/sse.md (*vec_concatv2sf_sse4_1): Add alternatives + which use only NO_REX_SSE_REGS. + (vec_set_0): Likewise. + (*vec_setv4sf_sse4_1): Likewise. + (sse4_1_insertps): Likewise. + (*sse4_1_extractps): Likewise. + (*sse4_1_mulv2siv2di3): Likewise. + (*_mul3): Likewise. + (*sse4_1_3): Likewise. + (*sse4_1_3): Likewise. + (*sse4_1_eqv2di3): Likewise. + (sse4_2_gtv2di3): Likewise. + (*vec_extractv4si): Likewise. + (*vec_concatv2si_sse4_1): Likewise. + (vec_concatv2di): Likewise. + (_blend): Likewise. + (_blendv): Likewise. + (_dp): Likewise. + (_movntdqa): Likewise. + (_mpsadbw): Likewise. + (packusdw): Likewise. + (_pblendvb): Likewise. + (sse4_1_pblendw): Likewise. + (sse4_1_phminposuw): Likewise. + (sse4_1_v8qiv8hi2): Likewise. + (sse4_1_v4qiv4si2): Likewise. + (sse4_1_v4hiv4si2): Likewise. + (sse4_1_v2qiv2di2): Likewise. + (sse4_1_v2hiv2di2): Likewise. + (sse4_1_v2siv2di2): Likewise. + (sse4_1_ptest): Likewise. + (_round): Likewise. + (sse4_1_round): Likewise. + * config/i386/subst.md (mask_prefix4): New. + * config/i386/x86-tune.def (X86_TUNE_AVOID_4BYTE_PREFIXES): New. + 2014-12-03 Segher Boessenkool PR rtl-optimization/52714 diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index b7183a1a90f..c8093f54e7e 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -106,6 +106,8 @@ ;; a Integer register when zero extensions with AND are disabled ;; p Integer register when TARGET_PARTIAL_REG_STALL is disabled ;; f x87 register when 80387 floating point arithmetic is enabled +;; r SSE regs not requiring REX prefix when prefixes avoidance is enabled +;; and all SSE regs otherwise (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -139,6 +141,10 @@ "(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS" "@internal Any x87 register when 80387 FP arithmetic is enabled.") +(define_register_constraint "Yr" + "TARGET_SSE ? (X86_TUNE_AVOID_4BYTE_PREFIXES ? NO_REX_SSE_REGS : ALL_SSE_REGS) : NO_REGS" + "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.") + ;; We use the B prefix to denote any number of internal operands: ;; s Sibcall memory operand, not valid for TARGET_X32 ;; w Call memory operand, not valid for TARGET_X32 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index bc766200df0..df7789d5ef2 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1311,6 +1311,7 @@ enum reg_class FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */ FLOAT_REGS, SSE_FIRST_REG, + NO_REX_SSE_REGS, SSE_REGS, EVEX_SSE_REGS, BND_REGS, @@ -1369,6 +1370,7 @@ enum reg_class "FP_TOP_REG", "FP_SECOND_REG", \ "FLOAT_REGS", \ "SSE_FIRST_REG", \ + "NO_REX_SSE_REGS", \ "SSE_REGS", \ "EVEX_SSE_REGS", \ "BND_REGS", \ @@ -1409,6 +1411,7 @@ enum reg_class { 0x0200, 0x0, 0x0 }, /* FP_SECOND_REG */ \ { 0xff00, 0x0, 0x0 }, /* FLOAT_REGS */ \ { 0x200000, 0x0, 0x0 }, /* SSE_FIRST_REG */ \ +{ 0x1fe00000, 0x000000, 0x0 }, /* NO_REX_SSE_REGS */ \ { 0x1fe00000, 0x1fe000, 0x0 }, /* SSE_REGS */ \ { 0x0,0xffe00000, 0x1f }, /* EVEX_SSE_REGS */ \ { 0x0, 0x0,0x1e000 }, /* BND_REGS */ \ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ca5d720d1be..c3aaea370bb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6338,26 +6338,28 @@ ;; Although insertps takes register source, we prefer ;; unpcklps with register source since it is shorter. (define_insn "*vec_concatv2sf_sse4_1" - [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y") + [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x,Yr,*x,x,x,*y ,*y") (vec_concat:V2SF - (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m") - (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))] + (match_operand:SF 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,m, 0 , m") + (match_operand:SF 2 "vector_move_operand" " Yr,*x,x, m,m, m,C,*ym, C")))] "TARGET_SSE4_1" "@ + unpcklps\t{%2, %0|%0, %2} unpcklps\t{%2, %0|%0, %2} vunpcklps\t{%2, %1, %0|%0, %1, %2} insertps\t{$0x10, %2, %0|%0, %2, 0x10} + insertps\t{$0x10, %2, %0|%0, %2, 0x10} vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} %vmovss\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") - (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") - (set_attr "prefix_data16" "*,*,1,*,*,*,*") - (set_attr "prefix_extra" "*,*,1,1,*,*,*") - (set_attr "length_immediate" "*,*,1,1,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") - (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) + [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") + (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*") + (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*") + (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*") + (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") + (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE @@ -6405,16 +6407,17 @@ ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "vec_set_0" [(set (match_operand:VI4F_128 0 "nonimmediate_operand" - "=v,v,v ,x,x,v,x ,x ,m ,m ,m") + "=Yr,*v,v,v ,x,x,v,Yr ,*x ,x ,m ,m ,m") (vec_merge:VI4F_128 (vec_duplicate:VI4F_128 (match_operand: 2 "general_operand" - " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF")) + " Yr,*v,m,*r,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF")) (match_operand:VI4F_128 1 "vector_move_operand" - " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0") + " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0") (const_int 1)))] "TARGET_SSE" "@ + %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} %vmov\t{%2, %0|%0, %2} %vmovd\t{%2, %0|%0, %2} @@ -6422,32 +6425,33 @@ movss\t{%2, %0|%0, %2} vmovss\t{%2, %1, %0|%0, %1, %2} pinsrd\t{$0, %2, %0|%0, %2, 0} + pinsrd\t{$0, %2, %0|%0, %2, 0} vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} # # #" - [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*") + [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*") (set (attr "type") - (cond [(eq_attr "alternative" "0,6,7") + (cond [(eq_attr "alternative" "0,1,7,8,9") (const_string "sselog") - (eq_attr "alternative" "9") + (eq_attr "alternative" "11") (const_string "imov") - (eq_attr "alternative" "10") + (eq_attr "alternative" "12") (const_string "fmov") ] (const_string "ssemov"))) - (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*") - (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*") - (set_attr "mode" "SF,,SI,SF,SF,SF,TI,TI,*,*,*")]) + (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*") + (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*") + (set_attr "mode" "SF,SF,,SI,SF,SF,SF,TI,TI,TI,*,*,*")]) ;; A subset is vec_setv4sf. (define_insn "*vec_setv4sf_sse4_1" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") + [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") (vec_merge:V4SF (vec_duplicate:V4SF - (match_operand:SF 2 "nonimmediate_operand" "xm,xm")) - (match_operand:V4SF 1 "register_operand" "0,x") + (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm")) + (match_operand:V4SF 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE4_1 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -6457,26 +6461,27 @@ switch (which_alternative) { case 0: - return "insertps\t{%3, %2, %0|%0, %2, %3}"; case 1: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 2: return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "V4SF")]) (define_insn "sse4_1_insertps" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") - (match_operand:V4SF 1 "register_operand" "0,x") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") + (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:V4SF 1 "register_operand" "0,0,x") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_INSERTPS))] "TARGET_SSE4_1" { @@ -6490,19 +6495,20 @@ switch (which_alternative) { case 0: - return "insertps\t{%3, %2, %0|%0, %2, %3}"; case 1: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 2: return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "V4SF")]) (define_split @@ -6544,12 +6550,13 @@ }) (define_insn_and_split "*sse4_1_extractps" - [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x") (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x,0,x") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] + (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))] "TARGET_SSE4_1" "@ + %vextractps\t{%2, %1, %0|%0, %1, %2} %vextractps\t{%2, %1, %0|%0, %1, %2} # #" @@ -6575,13 +6582,13 @@ } DONE; } - [(set_attr "isa" "*,noavx,avx") - (set_attr "type" "sselog,*,*") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix_extra" "1,*,*") - (set_attr "length_immediate" "1,*,*") - (set_attr "prefix" "maybe_vex,*,*") - (set_attr "mode" "V4SF,*,*")]) + [(set_attr "isa" "*,*,noavx,avx") + (set_attr "type" "sselog,sselog,*,*") + (set_attr "prefix_data16" "1,1,*,*") + (set_attr "prefix_extra" "1,1,*,*") + (set_attr "length_immediate" "1,1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,*,*") + (set_attr "mode" "V4SF,V4SF,*,*")]) (define_insn_and_split "*vec_extractv4sf_mem" [(set (match_operand:SF 0 "register_operand" "=x,*r,f") @@ -9553,26 +9560,27 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") (define_insn "*sse4_1_mulv2siv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x,v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%0,v") + (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm") + (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm") (parallel [(const_int 0) (const_int 2)])))))] "TARGET_SSE4_1 && && ix86_binary_operator_ok (MULT, V4SImode, operands)" "@ + pmuldq\t{%2, %0|%0, %2} pmuldq\t{%2, %0|%0, %2} vpmuldq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseimul") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "avx512bw_pmaddwd512" @@ -9752,19 +9760,20 @@ }) (define_insn "*_mul3" - [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v") + [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v") (mult:VI4_AVX512F - (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v") - (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))] + (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, mode, operands) && " "@ + pmulld\t{%2, %0|%0, %2} pmulld\t{%2, %0|%0, %2} vpmulld\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseimul") (set_attr "prefix_extra" "1") - (set_attr "prefix" "") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "")]) (define_expand "mul3" @@ -10241,20 +10250,21 @@ }) (define_insn "*sse4_1_3" - [(set (match_operand:VI14_128 0 "register_operand" "=x,v") + [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v") (smaxmin:VI14_128 - (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v") - (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))] + (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] "TARGET_SSE4_1 && && ix86_binary_operator_ok (, mode, operands)" "@ + p\t{%2, %0|%0, %2} p\t{%2, %0|%0, %2} vp\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*v8hi3" @@ -10324,20 +10334,21 @@ }) (define_insn "*sse4_1_3" - [(set (match_operand:VI24_128 0 "register_operand" "=x,v") + [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v") (umaxmin:VI24_128 - (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v") - (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))] + (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] "TARGET_SSE4_1 && && ix86_binary_operator_ok (, mode, operands)" "@ + p\t{%2, %0|%0, %2} p\t{%2, %0|%0, %2} vp\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*v16qi3" @@ -10427,18 +10438,19 @@ (set_attr "mode" "")]) (define_insn "*sse4_1_eqv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") (eq:V2DI - (match_operand:V2DI 1 "nonimmediate_operand" "%0,x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" "@ + pcmpeqq\t{%2, %0|%0, %2} pcmpeqq\t{%2, %0|%0, %2} vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecmp") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*sse2_eq3" @@ -10474,18 +10486,19 @@ "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") (define_insn "sse4_2_gtv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") (gt:V2DI - (match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:V2DI 1 "register_operand" "0,0,x") + (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] "TARGET_SSE4_2" "@ + pcmpgtq\t{%2, %0|%0, %2} pcmpgtq\t{%2, %0|%0, %2} vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecmp") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "avx2_gt3" @@ -12705,9 +12718,9 @@ "operands[1] = gen_rtx_REG (mode, REGNO (operands[1]));") (define_insn "*vec_extractv4si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x") + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x") (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x,0,x") + (match_operand:V4SI 1 "register_operand" "x,0,0,x") (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] "TARGET_SSE4_1" { @@ -12717,10 +12730,11 @@ return "%vpextrd\t{%2, %1, %0|%0, %1, %2}"; case 1: + case 2: operands [2] = GEN_INT (INTVAL (operands[2]) * 4); return "psrldq\t{%2, %0|%0, %2}"; - case 2: + case 3: operands [2] = GEN_INT (INTVAL (operands[2]) * 4); return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; @@ -12728,11 +12742,11 @@ gcc_unreachable (); } } - [(set_attr "isa" "*,noavx,avx") - (set_attr "type" "sselog1,sseishft1,sseishft1") - (set_attr "prefix_extra" "1,*,*") + [(set_attr "isa" "*,noavx,noavx,avx") + (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1") + (set_attr "prefix_extra" "1,*,*,*") (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex,orig,vex") + (set_attr "prefix" "maybe_vex,orig,orig,vex") (set_attr "mode" "TI")]) (define_insn "*vec_extractv4si_zext" @@ -12839,25 +12853,27 @@ (set_attr "mode" "TI,TI,DF,V4SF")]) (define_insn "*vec_concatv2si_sse4_1" - [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y") + [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y") (vec_concat:V2SI - (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm") - (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))] + (match_operand:SI 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,rm, 0,rm") + (match_operand:SI 2 "vector_move_operand" " rm,rm,rm,Yr,*x,x, C,*ym, C")))] "TARGET_SSE4_1" "@ + pinsrd\t{$1, %2, %0|%0, %2, 1} pinsrd\t{$1, %2, %0|%0, %2, 1} vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} punpckldq\t{%2, %0|%0, %2} + punpckldq\t{%2, %0|%0, %2} vpunpckldq\t{%2, %1, %0|%0, %1, %2} %vmovd\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") - (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") - (set_attr "prefix_extra" "1,1,*,*,*,*,*") - (set_attr "length_immediate" "1,1,*,*,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") - (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")]) + [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") + (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") + (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*") + (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*") + (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") + (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE @@ -12900,14 +12916,15 @@ ;; movd instead of movq is required to handle broken assemblers. (define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" - "=x,x ,Yi,x ,!x,x,x,x,x,x") + "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" - " 0,x ,r ,xm,*y,0,x,0,0,x") + " 0, 0,x ,r ,xm,*y,0,x,0,0,x") (match_operand:DI 2 "vector_move_operand" - "rm,rm,C ,C ,C ,x,x,x,m,m")))] + "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))] "TARGET_SSE" "@ + pinsrq\t{$1, %2, %0|%0, %2, 1} pinsrq\t{$1, %2, %0|%0, %2, 1} vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\"; @@ -12918,17 +12935,17 @@ movlhps\t{%2, %0|%0, %2} movhps\t{%2, %0|%0, %2} vmovhps\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") + [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") (set (attr "type") (if_then_else - (eq_attr "alternative" "0,1,5,6") + (eq_attr "alternative" "0,1,2,6,7") (const_string "sselog") (const_string "ssemov"))) - (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*") - (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*") - (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*") - (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex") - (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*") + (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*") + (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex") + (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) (define_expand "vec_unpacks_lo_" [(match_operand: 0 "register_operand") @@ -13968,61 +13985,64 @@ [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) (define_insn "_blend" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (vec_merge:VF_128_256 - (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") - (match_operand:VF_128_256 1 "register_operand" "0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:VF_128_256 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_0_to__operand")))] "TARGET_SSE4_1" "@ + blend\t{%3, %2, %0|%0, %2, %3} blend\t{%3, %2, %0|%0, %2, %3} vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "")]) (define_insn "_blendv" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "register_operand" "0,x") - (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") - (match_operand:VF_128_256 3 "register_operand" "Yz,x")] + [(match_operand:VF_128_256 1 "register_operand" "0,0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")] UNSPEC_BLENDV))] "TARGET_SSE4_1" "@ + blendv\t{%3, %2, %0|%0, %2, %3} blendv\t{%3, %2, %0|%0, %2, %3} vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "")]) (define_insn "_dp" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x") - (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_DP))] "TARGET_SSE4_1" "@ + dp\t{%3, %2, %0|%0, %2, %3} dp\t{%3, %2, %0|%0, %2, %3} vdp\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemul") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "")]) ;; Mode attribute used by `vmovntdqa' pattern @@ -14030,86 +14050,90 @@ [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")]) (define_insn "_movntdqa" - [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v") - (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")] + [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v") + (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")] UNSPEC_MOVNTDQA))] "TARGET_SSE4_1" "%vmovntdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1, *") - (set_attr "prefix" "maybe_vex, evex") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "maybe_vex,maybe_vex,evex") (set_attr "mode" "")]) (define_insn "_mpsadbw" - [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") (unspec:VI1_AVX2 - [(match_operand:VI1_AVX2 1 "register_operand" "0,x") - (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_MPSADBW))] "TARGET_SSE4_1" "@ + mpsadbw\t{%3, %2, %0|%0, %2, %3} mpsadbw\t{%3, %2, %0|%0, %2, %3} vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog1") (set_attr "length_immediate" "1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "")]) (define_insn "_packusdw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v") (vec_concat:VI2_AVX2 (us_truncate: - (match_operand: 1 "register_operand" "0,v")) + (match_operand: 1 "register_operand" "0,0,v")) (us_truncate: - (match_operand: 2 "nonimmediate_operand" "xm,vm"))))] + (match_operand: 2 "nonimmediate_operand" "Yrm,*xm,vm"))))] "TARGET_SSE4_1 && && " "@ + packusdw\t{%2, %0|%0, %2} packusdw\t{%2, %0|%0, %2} vpackusdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "")]) (define_insn "_pblendvb" - [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") (unspec:VI1_AVX2 - [(match_operand:VI1_AVX2 1 "register_operand" "0,x") - (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")] + [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")] UNSPEC_BLENDV))] "TARGET_SSE4_1" "@ + pblendvb\t{%3, %2, %0|%0, %2, %3} pblendvb\t{%3, %2, %0|%0, %2, %3} vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "*,1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "")]) (define_insn "sse4_1_pblendw" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x") (vec_merge:V8HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (match_operand:V8HI 1 "register_operand" "0,x") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] + (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm") + (match_operand:V8HI 1 "register_operand" "0,0,x") + (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))] "TARGET_SSE4_1" "@ + pblendw\t{%3, %2, %0|%0, %2, %3} pblendw\t{%3, %2, %0|%0, %2, %3} vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) ;; The builtin uses an 8-bit immediate. Expand that. @@ -14157,8 +14181,8 @@ (set_attr "mode" "")]) (define_insn "sse4_1_phminposuw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x") + (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")] UNSPEC_PHMINPOSUW))] "TARGET_SSE4_1" "%vphminposuw\t{%1, %0|%0, %1}" @@ -14190,10 +14214,10 @@ (set_attr "mode" "XI")]) (define_insn "sse4_1_v8qiv8hi2" - [(set (match_operand:V8HI 0 "register_operand" "=v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v") (any_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -14233,10 +14257,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_v4qiv4si2" - [(set (match_operand:V4SI 0 "register_operand" "=v") + [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") (any_extend:V4SI (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1 && " @@ -14269,10 +14293,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_v4hiv4si2" - [(set (match_operand:V4SI 0 "register_operand" "=v") + [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") (any_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "vm") + (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1 && " @@ -14313,10 +14337,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_v2qiv2di2" - [(set (match_operand:V2DI 0 "register_operand" "=v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") (any_extend:V2DI (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "vm") + (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && " "%vpmovbq\t{%1, %0|%0, %w1}" @@ -14351,10 +14375,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_v2hiv2di2" - [(set (match_operand:V2DI 0 "register_operand" "=v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") (any_extend:V2DI (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "vm") + (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && " "%vpmovwq\t{%1, %0|%0, %k1}" @@ -14386,10 +14410,10 @@ (set_attr "mode" "OI")]) (define_insn "sse4_1_v2siv2di2" - [(set (match_operand:V2DI 0 "register_operand" "=v") + [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") (any_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "vm") + (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1 && " "%vpmovdq\t{%1, %0|%0, %q1}" @@ -14430,8 +14454,8 @@ (define_insn "sse4_1_ptest" [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") - (match_operand:V2DI 1 "nonimmediate_operand" "xm")] + (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x") + (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")] UNSPEC_PTEST))] "TARGET_SSE4_1" "%vptest\t{%1, %0|%0, %1}" @@ -14441,10 +14465,10 @@ (set_attr "mode" "TI")]) (define_insn "_round" - [(set (match_operand:VF_128_256 0 "register_operand" "=x") + [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_15_operand" "n")] + [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm") + (match_operand:SI 2 "const_0_to_15_operand" "n,n")] UNSPEC_ROUND))] "TARGET_ROUND" "%vround\t{%2, %1, %0|%0, %1, %2}" @@ -14524,24 +14548,25 @@ }) (define_insn "sse4_1_round" - [(set (match_operand:VF_128 0 "register_operand" "=x,x") + [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 2 "register_operand" "x,x") - (match_operand:SI 3 "const_0_to_15_operand" "n,n")] + [(match_operand:VF_128 2 "register_operand" "Yr,*x,x") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")] UNSPEC_ROUND) - (match_operand:VF_128 1 "register_operand" "0,x") + (match_operand:VF_128 1 "register_operand" "0,0,x") (const_int 1)))] "TARGET_ROUND" "@ + round\t{%3, %2, %0|%0, %2, %3} round\t{%3, %2, %0|%0, %2, %3} vround\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecvt") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "")]) (define_expand "round2" diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 91228c8976b..d4ce519c3d6 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -63,6 +63,7 @@ (define_subst_attr "mask_prefix" "mask" "vex" "evex") (define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex") (define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex") +(define_subst_attr "mask_prefix4" "mask" "orig,orig,vex" "evex") (define_subst_attr "mask_expand_op3" "mask" "3" "5") (define_subst "mask" diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 735e6e5812f..b5c6e4f0168 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -395,6 +395,10 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel", m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) +/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ +DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + m_SILVERMONT | m_INTEL) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ee812d25be8..b0d17867c61 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2014-12-03 Ilya Enkovich + + * gcc.target/i386/sse2-init-v2di-2.c: Adjust to changed + vec_concatv2di template. + 2014-12-03 Segher Boessenkool PR rtl-optimization/52714 diff --git a/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c b/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c index 0aa52640592..b347a4ab27b 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c +++ b/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c @@ -10,4 +10,4 @@ test (long long b) return _mm_cvtsi64_si128 (b); } -/* { dg-final { scan-assembler-times "vec_concatv2di/3" 1 } } */ +/* { dg-final { scan-assembler-times "vec_concatv2di/4" 1 } } */