diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 39c786e554a..828da1e9ff0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,75 @@ +2013-09-25 Alexander Ivchenko + Maxim Kuznetsov + Sergey Lega + Anna Tikhonova + Ilya Tocar + Andrey Turetskiy + Ilya Verbin + Kirill Yukhin + Michael Zolotukhin + + * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): + Use new names. + (ix86_expand_vector_move_misalign): Support new unaligned load and + stores and use new names. + (CODE_FOR_sse2_storedqu): Rename to ... + (CODE_FOR_sse2_storedquv16qi): ... this. + (CODE_FOR_sse2_loaddqu): Rename to ... + (CODE_FOR_sse2_loaddquv16qi): ... this. + (CODE_FOR_avx_loaddqu256): Rename to ... + (CODE_FOR_avx_loaddquv32qi): ... this. + (CODE_FOR_avx_storedqu256): Rename to ... + (CODE_FOR_avx_storedquv32qi): ... this. + * config/i386/i386.md (fpint_logic): New. + * config/i386/sse.md (VMOVE): Extend for AVX512. + (VF): Ditto. + (VF_128_256): New. + (VF_512): Ditto. + (VI_UNALIGNED_LOADSTORE): Ditto. + (sse2_avx_avx512f): Ditto. + (sse2_avx2): Extend for AVX512. + (sse4_1_avx2): Ditto. + (avx2_avx512f): New. + (sse): Extend for AVX512. + (sse2): Ditto. + (sse4_1): Ditto. + (avxsizesuffix): Ditto. + (sseintvecmode): Ditto. + (ssePSmode): Ditto. + (_loadu): Ditto. + (_storeu): Ditto. + (_loaddqu): Extend for AVX512 and rename to ... + (_loaddqu): ... this. + (_storedqu): Extend for AVX512 and rename to ... + (_storedqu_movnt): Replace constraint "x" with "v". + (STORENT_MODE): Extend for AVX512. + (*absneg2): Replace constraint "x" with "v". + (*mul3): Ditto. + (*ieee_smin3): Ditto. + (*ieee_smax3): Ditto. + (avx_cmp3): Replace VF with VF_128_256. + (*_maskcmp3_comm): Ditto. + (_maskcmp3): Ditto. + (_andnot3): Extend for AVX512. + (3, anylogic): Replace VF with VF_128_256. + (3, fpint_logic): New. + (*3): Extend for AVX512. + (avx512flogicsuff): New. + (avx512f_): Ditto. + (_movmsk): Replace VF with + VF_128_256. + (_blend): Ditto. + (_blendv): Ditto. + (_dp): Ditto. + (avx_vtest): Ditto. + (_round): Ditto. + (xop_vpermil23): Ditto. + (*avx_vpermilp): Extend for AVX512 and rename to ... + (*_vpermilp): ... this. + (avx_vpermilvar3): Extend for AVX512 and rename to ... + (_vpermilvar3): ... this. + 2013-09-25 Tom Tromey * Makefile.in (PARTITION_H, LTO_SYMTAB_H, COMMON_TARGET_DEF_H) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b4d40bc650c..f10113fd3c4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16457,8 +16457,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) gcc_unreachable (); case V32QImode: extract = gen_avx_vextractf128v32qi; - load_unaligned = gen_avx_loaddqu256; - store_unaligned = gen_avx_storedqu256; + load_unaligned = gen_avx_loaddquv32qi; + store_unaligned = gen_avx_storedquv32qi; mode = V16QImode; break; case V8SFmode: @@ -16561,10 +16561,56 @@ void ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { rtx op0, op1, m; + rtx (*load_unaligned) (rtx, rtx); + rtx (*store_unaligned) (rtx, rtx); op0 = operands[0]; op1 = operands[1]; + if (GET_MODE_SIZE (mode) == 64) + { + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_INT: + case MODE_INT: + op0 = gen_lowpart (V16SImode, op0); + op1 = gen_lowpart (V16SImode, op1); + /* FALLTHRU */ + + case MODE_VECTOR_FLOAT: + switch (GET_MODE (op0)) + { + default: + gcc_unreachable (); + case V16SImode: + load_unaligned = gen_avx512f_loaddquv16si; + store_unaligned = gen_avx512f_storedquv16si; + break; + case V16SFmode: + load_unaligned = gen_avx512f_loadups512; + store_unaligned = gen_avx512f_storeups512; + break; + case V8DFmode: + load_unaligned = gen_avx512f_loadupd512; + store_unaligned = gen_avx512f_storeupd512; + break; + } + + if (MEM_P (op1)) + emit_insn (load_unaligned (op0, op1)); + else if (MEM_P (op0)) + emit_insn (store_unaligned (op0, op1)); + else + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + } + + return; + } + if (TARGET_AVX && GET_MODE_SIZE (mode) == 32) { @@ -16597,7 +16643,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_loaddqu (op0, op1)); + emit_insn (gen_sse2_loaddquv16qi (op0, op1)); } else if (TARGET_SSE2 && mode == V2DFmode) { @@ -16672,7 +16718,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); /* We will eventually emit movups based on insn attributes. */ - emit_insn (gen_sse2_storedqu (op0, op1)); + emit_insn (gen_sse2_storedquv16qi (op0, op1)); } else if (TARGET_SSE2 && mode == V2DFmode) { @@ -27400,13 +27446,13 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, @@ -27435,8 +27481,8 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e009bc96fc2..03b38426c4b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -779,6 +779,7 @@ ;; Mapping of logic operators (define_code_iterator any_logic [and ior xor]) (define_code_iterator any_or [ior xor]) +(define_code_iterator fpint_logic [and xor]) ;; Base name for insn mnemonic. (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9d9469e2c62..10637cc22e8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -97,13 +97,13 @@ ;; All vector modes including V?TImode, used in move patterns. (define_mode_iterator VMOVE - [(V32QI "TARGET_AVX") V16QI - (V16HI "TARGET_AVX") V8HI - (V8SI "TARGET_AVX") V4SI - (V4DI "TARGET_AVX") V2DI + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI (V2TI "TARGET_AVX") V1TI - (V8SF "TARGET_AVX") V4SF - (V4DF "TARGET_AVX") V2DF]) + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; All vector modes (define_mode_iterator V @@ -124,6 +124,11 @@ ;; All vector float modes (define_mode_iterator VF + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + +;; 128- and 256-bit float vector modes +(define_mode_iterator VF_128_256 [(V8SF "TARGET_AVX") V4SF (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) @@ -143,6 +148,10 @@ (define_mode_iterator VF_256 [V8SF V4DF]) +;; All 512bit vector float modes +(define_mode_iterator VF_512 + [V16SF V8DF]) + ;; All vector integer modes (define_mode_iterator VI [(V32QI "TARGET_AVX") V16QI @@ -160,6 +169,10 @@ (define_mode_iterator VI1 [(V32QI "TARGET_AVX") V16QI]) +(define_mode_iterator VI_UNALIGNED_LOADSTORE + [(V32QI "TARGET_AVX") V16QI + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")]) + ;; All DImode vector integer modes (define_mode_iterator VI8 [(V4DI "TARGET_AVX") V2DI]) @@ -212,11 +225,18 @@ (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) +(define_mode_attr sse2_avx_avx512f + [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") + (V4SI "sse2") (V8SI "avx") (V16SI "avx512f") + (V8DI "avx512f") + (V16SF "avx512f") (V8SF "avx") (V4SF "avx") + (V8DF "avx512f") (V4DF "avx") (V2DF "avx")]) + (define_mode_attr sse2_avx2 [(V16QI "sse2") (V32QI "avx2") (V8HI "sse2") (V16HI "avx2") - (V4SI "sse2") (V8SI "avx2") - (V2DI "sse2") (V4DI "avx2") + (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f") + (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f") (V1TI "sse2") (V2TI "avx2")]) (define_mode_attr ssse3_avx2 @@ -229,7 +249,7 @@ (define_mode_attr sse4_1_avx2 [(V16QI "sse4_1") (V32QI "avx2") (V8HI "sse4_1") (V16HI "avx2") - (V4SI "sse4_1") (V8SI "avx2") + (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f") (V2DI "sse4_1") (V4DI "avx2")]) (define_mode_attr avx_avx2 @@ -244,6 +264,12 @@ (V4SI "vec") (V8SI "avx2") (V2DI "vec") (V4DI "avx2")]) +(define_mode_attr avx2_avx512f + [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f") + (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f") + (V8SF "avx2") (V16SF "avx512f") + (V4DF "avx2") (V8DF "avx512f")]) + (define_mode_attr shuffletype [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i") (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i") @@ -287,22 +313,26 @@ (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2") - (V8SF "avx") (V4DF "avx")]) + (V16SF "avx512f") (V8SF "avx") + (V8DF "avx512f") (V4DF "avx")]) (define_mode_attr sse2 - [(V16QI "sse2") (V32QI "avx") - (V2DI "sse2") (V4DI "avx")]) + [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") + (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")]) (define_mode_attr sse3 [(V16QI "sse3") (V32QI "avx")]) (define_mode_attr sse4_1 [(V4SF "sse4_1") (V2DF "sse4_1") - (V8SF "avx") (V4DF "avx")]) + (V8SF "avx") (V4DF "avx") + (V8DF "avx512f")]) (define_mode_attr avxsizesuffix - [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") + [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512") + (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") (V16QI "") (V8HI "") (V4SI "") (V2DI "") + (V16SF "512") (V8DF "512") (V8SF "256") (V4DF "256") (V4SF "") (V2DF "")]) @@ -318,11 +348,13 @@ ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr sseintvecmode - [(V8SF "V8SI") (V4DF "V4DI") - (V4SF "V4SI") (V2DF "V2DI") - (V8SI "V8SI") (V4DI "V4DI") - (V4SI "V4SI") (V2DI "V2DI") - (V16HI "V16HI") (V8HI "V8HI") + [(V16SF "V16SI") (V8DF "V8DI") + (V8SF "V8SI") (V4DF "V4DI") + (V4SF "V4SI") (V2DF "V2DI") + (V16SI "V16SI") (V8DI "V8DI") + (V8SI "V8SI") (V4DI "V4DI") + (V4SI "V4SI") (V2DI "V2DI") + (V16HI "V16HI") (V8HI "V8HI") (V32QI "V32QI") (V16QI "V16QI")]) (define_mode_attr sseintvecmodelower @@ -349,8 +381,10 @@ ;; Mapping of vector modes ti packed single mode of the same size (define_mode_attr ssePSmode - [(V32QI "V8SF") (V16QI "V4SF") - (V16HI "V8SF") (V8HI "V4SF") + [(V16SI "V16SF") (V8DF "V16SF") + (V16SF "V16SF") (V8DI "V16SF") + (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF") + (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF") (V8SI "V8SF") (V4SI "V4SF") (V4DI "V8SF") (V2DI "V4SF") (V2TI "V8SF") (V1TI "V4SF") @@ -665,12 +699,13 @@ (define_insn "_loadu" [(set (match_operand:VF 0 "register_operand" "=v") (unspec:VF - [(match_operand:VF 1 "memory_operand" "m")] + [(match_operand:VF 1 "nonimmediate_operand" "vm")] UNSPEC_LOADU))] "TARGET_SSE" { switch (get_attr_mode (insn)) { + case MODE_V16SF: case MODE_V8SF: case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; @@ -694,12 +729,13 @@ (define_insn "_storeu" [(set (match_operand:VF 0 "memory_operand" "=m") (unspec:VF - [(match_operand:VF 1 "register_operand" "x")] + [(match_operand:VF 1 "register_operand" "v")] UNSPEC_STOREU))] "TARGET_SSE" { switch (get_attr_mode (insn)) { + case MODE_V16SF: case MODE_V8SF: case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; @@ -721,10 +757,11 @@ ] (const_string "")))]) -(define_insn "_loaddqu" - [(set (match_operand:VI1 0 "register_operand" "=v") - (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] - UNSPEC_LOADU))] +(define_insn "_loaddqu" + [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v") + (unspec:VI_UNALIGNED_LOADSTORE + [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")] + UNSPEC_LOADU))] "TARGET_SSE2" { switch (get_attr_mode (insn)) @@ -732,6 +769,11 @@ case MODE_V8SF: case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; + case MODE_XI: + if (mode == V8DImode) + return "vmovdqu64\t{%1, %0|%0, %1}"; + else + return "vmovdqu32\t{%1, %0|%0, %1}"; default: return "%vmovdqu\t{%1, %0|%0, %1}"; } @@ -754,10 +796,11 @@ ] (const_string "")))]) -(define_insn "_storedqu" - [(set (match_operand:VI1 0 "memory_operand" "=m") - (unspec:VI1 [(match_operand:VI1 1 "register_operand" "v")] - UNSPEC_STOREU))] +(define_insn "_storedqu" + [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m") + (unspec:VI_UNALIGNED_LOADSTORE + [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")] + UNSPEC_STOREU))] "TARGET_SSE2" { switch (get_attr_mode (insn)) @@ -765,6 +808,11 @@ case MODE_V8SF: case MODE_V4SF: return "%vmovups\t{%1, %0|%0, %1}"; + case MODE_XI: + if (mode == V8DImode) + return "vmovdqu64\t{%1, %0|%0, %1}"; + else + return "vmovdqu32\t{%1, %0|%0, %1}"; default: return "%vmovdqu\t{%1, %0|%0, %1}"; } @@ -821,8 +869,9 @@ (define_insn "_movnt" [(set (match_operand:VF 0 "memory_operand" "=m") - (unspec:VF [(match_operand:VF 1 "register_operand" "x")] - UNSPEC_MOVNT))] + (unspec:VF + [(match_operand:VF 1 "register_operand" "v")] + UNSPEC_MOVNT))] "TARGET_SSE" "%vmovnt\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -852,9 +901,9 @@ (define_mode_iterator STORENT_MODE [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") - (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") - (V8SF "TARGET_AVX") V4SF - (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_expand "storent" [(set (match_operand:STORENT_MODE 0 "memory_operand") @@ -877,10 +926,10 @@ "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") (define_insn_and_split "*absneg2" - [(set (match_operand:VF 0 "register_operand" "=x,x,x,x") + [(set (match_operand:VF 0 "register_operand" "=x,x,v,v") (match_operator:VF 3 "absneg_operator" - [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")])) - (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))] + [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")])) + (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))] "TARGET_SSE" "#" "&& reload_completed" @@ -962,10 +1011,10 @@ "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") (define_insn "*mul3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (mult:VF - (match_operand:VF 1 "nonimmediate_operand" "%0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "nonimmediate_operand" "%0,v") + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE && ix86_binary_operator_ok (MULT, mode, operands)" "@ mul\t{%2, %0|%0, %2} @@ -1239,10 +1288,10 @@ ;; presence of -0.0 and NaN. (define_insn "*ieee_smin3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=v,v") (unspec:VF - [(match_operand:VF 1 "register_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] + [(match_operand:VF 1 "register_operand" "0,v") + (match_operand:VF 2 "nonimmediate_operand" "vm,vm")] UNSPEC_IEEE_MIN))] "TARGET_SSE" "@ @@ -1254,10 +1303,10 @@ (set_attr "mode" "")]) (define_insn "*ieee_smax3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=v,v") (unspec:VF - [(match_operand:VF 1 "register_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] + [(match_operand:VF 1 "register_operand" "0,v") + (match_operand:VF 2 "nonimmediate_operand" "vm,vm")] UNSPEC_IEEE_MAX))] "TARGET_SSE" "@ @@ -1632,10 +1681,10 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "avx_cmp3" - [(set (match_operand:VF 0 "register_operand" "=x") - (unspec:VF - [(match_operand:VF 1 "register_operand" "x") - (match_operand:VF 2 "nonimmediate_operand" "xm") + [(set (match_operand:VF_128_256 0 "register_operand" "=x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "register_operand" "x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm") (match_operand:SI 3 "const_0_to_31_operand" "n")] UNSPEC_PCMP))] "TARGET_AVX" @@ -1663,10 +1712,10 @@ (set_attr "mode" "")]) (define_insn "*_maskcmp3_comm" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (match_operator:VF 3 "sse_comparison_operator" - [(match_operand:VF 1 "register_operand" "%0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + (match_operator:VF_128_256 3 "sse_comparison_operator" + [(match_operand:VF_128_256 1 "register_operand" "%0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))] "TARGET_SSE && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" "@ @@ -1679,10 +1728,10 @@ (set_attr "mode" "")]) (define_insn "_maskcmp3" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (match_operator:VF 3 "sse_comparison_operator" - [(match_operand:VF 1 "register_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + (match_operator:VF_128_256 3 "sse_comparison_operator" + [(match_operand:VF_128_256 1 "register_operand" "0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))] "TARGET_SSE" "@ cmp%D3\t{%2, %0|%0, %2} @@ -1792,11 +1841,11 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "_andnot3" - [(set (match_operand:VF 0 "register_operand" "=x,x") + [(set (match_operand:VF 0 "register_operand" "=x,v") (and:VF (not:VF - (match_operand:VF 1 "register_operand" "0,x")) - (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] + (match_operand:VF 1 "register_operand" "0,v")) + (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] "TARGET_SSE" { static char buf[32]; @@ -1825,12 +1874,19 @@ gcc_unreachable (); } + /* There is no vandnp[sd]. Use vpandnq. */ + if (GET_MODE_SIZE (mode) == 64) + { + suffix = "q"; + ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + } + snprintf (buf, sizeof (buf), ops, suffix); return buf; } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set (attr "mode") (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "") @@ -1842,13 +1898,21 @@ (const_string "")))]) (define_expand "3" - [(set (match_operand:VF 0 "register_operand") - (any_logic:VF - (match_operand:VF 1 "nonimmediate_operand") - (match_operand:VF 2 "nonimmediate_operand")))] + [(set (match_operand:VF_128_256 0 "register_operand") + (any_logic:VF_128_256 + (match_operand:VF_128_256 1 "nonimmediate_operand") + (match_operand:VF_128_256 2 "nonimmediate_operand")))] "TARGET_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") +(define_expand "3" + [(set (match_operand:VF_512 0 "register_operand") + (fpint_logic:VF_512 + (match_operand:VF_512 1 "nonimmediate_operand") + (match_operand:VF_512 2 "nonimmediate_operand")))] + "TARGET_AVX512F" + "ix86_fixup_binary_operands_no_copy (, mode, operands);") + (define_insn "*3" [(set (match_operand:VF 0 "register_operand" "=x,v") (any_logic:VF @@ -1882,12 +1946,19 @@ gcc_unreachable (); } + /* There is no vp[sd]. Use vpq. */ + if (GET_MODE_SIZE (mode) == 64) + { + suffix = "q"; + ops = "vp%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + } + snprintf (buf, sizeof (buf), ops, suffix); return buf; } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set (attr "mode") (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "") @@ -2105,6 +2176,23 @@ ] (const_string "TI")))]) +;; There are no floating point xor for V16SF and V8DF in avx512f +;; but we need them for negation. Instead we use int versions of +;; xor. Maybe there could be a better way to do that. + +(define_mode_attr avx512flogicsuff + [(V16SF "d") (V8DF "q")]) + +(define_insn "avx512f_" + [(set (match_operand:VF_512 0 "register_operand" "=v") + (fpint_logic:VF_512 + (match_operand:VF_512 1 "register_operand" "v") + (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] + "TARGET_AVX512F" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog") + (set_attr "prefix" "evex")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; FMA floating point multiply/accumulate instructions. These include @@ -7747,7 +7835,7 @@ (define_insn "_movmsk" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI - [(match_operand:VF 1 "register_operand" "x")] + [(match_operand:VF_128_256 1 "register_operand" "x")] UNSPEC_MOVMSK))] "TARGET_SSE" "%vmovmsk\t{%1, %0|%0, %1}" @@ -8537,10 +8625,10 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "_blend" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (vec_merge:VF - (match_operand:VF 2 "nonimmediate_operand" "xm,xm") - (match_operand:VF 1 "register_operand" "0,x") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + (vec_merge:VF_128_256 + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") + (match_operand:VF_128_256 1 "register_operand" "0,x") (match_operand:SI 3 "const_0_to__operand")))] "TARGET_SSE4_1" "@ @@ -8555,11 +8643,11 @@ (set_attr "mode" "")]) (define_insn "_blendv" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "register_operand" "0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm") - (match_operand:VF 3 "register_operand" "Yz,x")] + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "register_operand" "0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") + (match_operand:VF_128_256 3 "register_operand" "Yz,x")] UNSPEC_BLENDV))] "TARGET_SSE4_1" "@ @@ -8575,10 +8663,10 @@ (set_attr "mode" "")]) (define_insn "_dp" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm,xm") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm") (match_operand:SI 3 "const_0_to_255_operand" "n,n")] UNSPEC_DP))] "TARGET_SSE4_1" @@ -8909,8 +8997,8 @@ ;; setting FLAGS_REG. But it is not a really compare instruction. (define_insn "avx_vtest" [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_operand:VF 0 "register_operand" "x") - (match_operand:VF 1 "nonimmediate_operand" "xm")] + (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x") + (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_VTESTP))] "TARGET_AVX" "vtest\t{%1, %0|%0, %1}" @@ -8947,9 +9035,9 @@ (set_attr "mode" "TI")]) (define_insn "_round" - [(set (match_operand:VF 0 "register_operand" "=x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "xm") + [(set (match_operand:VF_128_256 0 "register_operand" "=x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm") (match_operand:SI 2 "const_0_to_15_operand" "n")] UNSPEC_ROUND))] "TARGET_ROUND" @@ -10341,10 +10429,10 @@ (set_attr "mode" "TI")]) (define_insn "xop_vpermil23" - [(set (match_operand:VF 0 "register_operand" "=x") - (unspec:VF - [(match_operand:VF 1 "register_operand" "x") - (match_operand:VF 2 "nonimmediate_operand" "%x") + [(set (match_operand:VF_128_256 0 "register_operand" "=x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "register_operand" "x") + (match_operand:VF_128_256 2 "nonimmediate_operand" "%x") (match_operand: 3 "nonimmediate_operand" "xm") (match_operand:SI 4 "const_0_to_3_operand" "n")] UNSPEC_VPERMIL2))] @@ -10794,7 +10882,7 @@ = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm)); }) -(define_insn "*avx_vpermilp" +(define_insn "*_vpermilp" [(set (match_operand:VF 0 "register_operand" "=v") (vec_select:VF (match_operand:VF 1 "nonimmediate_operand" "vm") @@ -10811,9 +10899,9 @@ (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) -(define_insn "avx_vpermilvar3" +(define_insn "_vpermilvar3" [(set (match_operand:VF 0 "register_operand" "=v") (unspec:VF [(match_operand:VF 1 "register_operand" "v") @@ -10823,9 +10911,10 @@ "vpermil\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") (set_attr "btver2_decode" "vector") - (set_attr "mode" "")]) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + (define_expand "avx_vperm2f1283" [(set (match_operand:AVX256MODE2P 0 "register_operand")