diff --git a/gcc/ChangeLog b/gcc/ChangeLog index de6a753188f..52ed599bc1d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2007-02-09 Richard Henderson + + * config/i386/constraints.md (Ym): New constraint. + * config/i386/i386.md (movsi_1): Change Y2 to Yi constraints. + (movdi_1_rex64): Split sse and xmm general register moves from + memory move alternatives. Use conditional register constraints. + (movsf_1, movdf_integer): Likewise. + (zero_extendsidi2_32, zero_extendsidi2_rex64): Likewise. + (movdf_integer_rex64): New. + (pushsf_rex64): Fix output constraints. + * config/i386/sse.md (sse2_loadld): Split rm alternative, use Yi. + (sse2_stored): Likewise. + (sse2_storeq_rex64): New. + * config/i386/i386.c (x86_inter_unit_moves): Enable for not + amd and not generic. + (ix86_secondary_memory_needed): Don't bypass TARGET_INTER_UNIT_MOVES + for optimize_size. Remove SF/DFmode hack. + 2007-02-09 Dwarakanath Rajagopal * config/i386/driver-i386.c: Turn on -mtune=native for AMDFAM10. diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index f5b7c513428..8f3e3cef066 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -86,6 +86,7 @@ ;; We use the Y prefix to denote any number of conditional register sets: ;; 2 SSE2 enabled ;; i SSE2 inter-unit moves enabled +;; m MMX inter-unit moves enabled (define_register_constraint "Y2" "TARGET_SSE2 ? SSE_REGS : NO_REGS" "@internal Any SSE register, when SSE2 is enabled.") @@ -94,6 +95,10 @@ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS" "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.") +(define_register_constraint "Ym" + "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS" + "@internal Any MMX register, when inter-unit moves are enabled.") + ;; Integer constant constraints. (define_constraint "I" "Integer constant in the range 0 @dots{} 31, for 32-bit shifts." diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2dddd69741c..e5ff76e2d22 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1161,9 +1161,7 @@ const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10; const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC); -/* ??? Allowing interunit moves makes it all too easy for the compiler to put - integer data in xmm registers. Which results in pretty abysmal code. */ -const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */; +const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC); const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; @@ -18301,18 +18299,12 @@ ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, /* If the target says that inter-unit moves are more expensive than moving through memory, then don't generate them. */ - if (!TARGET_INTER_UNIT_MOVES && !optimize_size) + if (!TARGET_INTER_UNIT_MOVES) return true; /* Between SSE and general, we have moves no larger than word size. */ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) return true; - - /* ??? For the cost of one register reformat penalty, we could use - the same instructions to move SFmode and DFmode data, but the - relevant move patterns don't support those alternatives. */ - if (mode == SFmode || mode == DFmode) - return true; } return false; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ffb575a8075..4f194d4b216 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1187,9 +1187,9 @@ (define_insn "*movsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Y2,*x") + "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") (match_operand:SI 1 "general_operand" - "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Y2,*x,r ,m "))] + "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2022,35 +2022,41 @@ (define_insn "*movdi_1_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,r ,r,m ,!m,*y,*y,?rm,?*y,*x,*x,?rm,?*x,?*x,?*y") + "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") (match_operand:DI 1 "general_operand" - "Z ,rem,i,re,n ,C ,*y,*y ,rm ,C ,*x,*x ,rm ,*y ,*x"))] + "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { case TYPE_SSECVT: - if (which_alternative == 13) + if (SSE_REG_P (operands[0])) return "movq2dq\t{%1, %0|%0, %1}"; else return "movdq2q\t{%1, %0|%0, %1}"; + case TYPE_SSEMOV: if (get_attr_mode (insn) == MODE_TI) - return "movdqa\t{%1, %0|%0, %1}"; + return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ + case TYPE_MMXMOV: - /* Moves from and into integer register is done using movd opcode with - REX prefix. */ + /* Moves from and into integer register is done using movd + opcode with REX prefix. */ if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) - return "movd\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; return "movq\t{%1, %0|%0, %1}"; + case TYPE_SSELOG1: case TYPE_MMXADD: return "pxor\t%0, %0"; + case TYPE_MULTI: return "#"; + case TYPE_LEA: return "lea{q}\t{%a1, %0|%0, %a1}"; + default: gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (get_attr_mode (insn) == MODE_SI) @@ -2064,13 +2070,13 @@ [(set (attr "type") (cond [(eq_attr "alternative" "5") (const_string "mmxadd") - (eq_attr "alternative" "6,7,8") + (eq_attr "alternative" "6,7,8,9,10") (const_string "mmxmov") - (eq_attr "alternative" "9") + (eq_attr "alternative" "11") (const_string "sselog1") - (eq_attr "alternative" "10,11,12") + (eq_attr "alternative" "12,13,14,15,16") (const_string "ssemov") - (eq_attr "alternative" "13,14") + (eq_attr "alternative" "17,18") (const_string "ssecvt") (eq_attr "alternative" "4") (const_string "multi") @@ -2078,9 +2084,9 @@ (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI")]) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address @@ -2280,7 +2286,7 @@ (set_attr "mode" "SF,SI,SF")]) (define_insn "*pushsf_rex64" - [(set (match_operand:SF 0 "push_operand" "=X,X,X") + [(set (match_operand:SF 0 "push_operand" "=<,<,<") (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] "TARGET_64BIT" { @@ -2320,9 +2326,9 @@ (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" - "=f,m ,f,r ,m ,x,x,x ,m ,!*y,!rm,!*y") + "=f,m,f,r ,m ,x,x,x ,m,*y,m ,*y,Yi,r ,*Ym,r ") (match_operand:SF 1 "general_operand" - "fm,f,G ,rmF,Fr,C ,x ,xm,x,rm ,*y ,*y"))] + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y,r ,Yi,r ,*Ym"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2358,12 +2364,11 @@ return "movaps\t{%1, %0|%0, %1}"; else return "movss\t{%1, %0|%0, %1}"; - case 7: - case 8: + case 7: case 8: return "movss\t{%1, %0|%0, %1}"; - case 9: - case 10: + case 9: case 10: + case 12: case 13: case 14: case 15: return "movd\t{%1, %0|%0, %1}"; case 11: @@ -2373,7 +2378,7 @@ gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") (set (attr "mode") (cond [(eq_attr "alternative" "3,4,9,10") (const_string "SI") @@ -2609,13 +2614,139 @@ ] (const_string "DF")))]) +(define_insn "*movdf_integer_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + (match_operand:DF 1 "general_operand" + "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + case 9: + case 10: + return "movd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable(); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4,9,10") + (const_string "DI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") (match_operand:DF 1 "general_operand" "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT) + && !optimize_size && TARGET_INTEGER_DFMODE_MOVES && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size @@ -3261,17 +3392,18 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] "" - "if (!TARGET_64BIT) - { - emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); - DONE; - } - ") +{ + if (!TARGET_64BIT) + { + emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); + DONE; + } +}) (define_insn "zero_extendsidi2_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,?*y,?*Y2") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,*y,?*Yi,*Y2") (zero_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm"))) + (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "@ @@ -3279,22 +3411,26 @@ # # movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} movd\t{%1, %0|%0, %1}" - [(set_attr "mode" "SI,SI,SI,DI,TI") - (set_attr "type" "multi,multi,multi,mmxmov,ssemov")]) + [(set_attr "mode" "SI,SI,SI,DI,DI,TI,TI") + (set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")]) (define_insn "zero_extendsidi2_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*y,?*Y2") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,*y,?*Yi,*Y2") (zero_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))] + (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] "TARGET_64BIT" "@ mov\t{%k1, %k0|%k0, %k1} # movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} movd\t{%1, %0|%0, %1}" - [(set_attr "type" "imovx,imov,mmxmov,ssemov") - (set_attr "mode" "SI,DI,SI,SI")]) + [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "mode" "SI,DI,DI,DI,TI,TI")]) (define_split [(set (match_operand:DI 0 "memory_operand" "") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 50abb8ae2ba..ed1de1946f4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3961,26 +3961,25 @@ "operands[2] = CONST0_RTX (V4SImode);") (define_insn "sse2_loadld" - [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x") + [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x") (vec_merge:V4SI (vec_duplicate:V4SI - (match_operand:SI 2 "nonimmediate_operand" "mr ,m,x")) - (match_operand:V4SI 1 "reg_or_0_operand" " C ,C,0") + (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x")) + (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0") (const_int 1)))] "TARGET_SSE" "@ + movd\t{%2, %0|%0, %2} movd\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2} movss\t{%2, %0|%0, %2}" [(set_attr "type" "ssemov") - (set_attr "mode" "TI,V4SF,SF")]) + (set_attr "mode" "TI,TI,V4SF,SF")]) -;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must -;; be taken into account, and movdi isn't fully populated even without. (define_insn_and_split "sse2_stored" - [(set (match_operand:SI 0 "nonimmediate_operand" "=mx") + [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r") (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") + (match_operand:V4SI 1 "register_operand" "x,Yi") (parallel [(const_int 0)])))] "TARGET_SSE" "#" @@ -3998,8 +3997,14 @@ "TARGET_SSE" "") -;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must -;; be taken into account, and movdi isn't fully populated even without. +(define_insn "*sse2_storeq_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r") + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "x,Yi") + (parallel [(const_int 0)])))] + "TARGET_64BIT && TARGET_SSE" + "#") + (define_insn "*sse2_storeq" [(set (match_operand:DI 0 "nonimmediate_operand" "=mx") (vec_select:DI