From ed3e611e05f5490cb986ce9a1398f6755717b937 Mon Sep 17 00:00:00 2001 From: Alexander Ivchenko Date: Tue, 14 Oct 2014 08:40:34 +0000 Subject: [PATCH] AVX-512. 68/n. Add vpmullw, vpacksdw, pmaddwd insn. gcc/ * config/i386/sse.md (define_c_enum "unspec"): Add UNSPEC_PMADDWD512. (define_mode_iterator VI2_AVX2): Add V32HI mode. (define_expand "mul3"): Add masking. (define_insn "*mul3"): Ditto. (define_expand "mul3_highpart"): Ditto. (define_insn "*mul3_highpart"): Ditto. (define_insn "avx512bw_pmaddwd512"): New. (define_mode_attr SDOT_PMADD_SUF): Ditto. (define_expand "sdot_prod"): Add . (define_insn "_packssdw"): Add masking. (define_insn "*_pmulhrsw3"): Ditto. (define_insn "avx2_packusdw"): Delete. (define_insn "sse4_1_packusdw"): Ditto. (define_insn "_packusdw"): New. Co-Authored-By: Andrey Turetskiy Co-Authored-By: Anna Tikhonova Co-Authored-By: Ilya Tocar Co-Authored-By: Ilya Verbin Co-Authored-By: Kirill Yukhin Co-Authored-By: Maxim Kuznetsov Co-Authored-By: Michael Zolotukhin From-SVN: r216185 --- gcc/ChangeLog | 25 +++++++++ gcc/config/i386/sse.md | 120 ++++++++++++++++++++++------------------- 2 files changed, 89 insertions(+), 56 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0ade8e9fd23..79606b70fb5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2014-10-14 Alexander Ivchenko + Maxim Kuznetsov + Anna Tikhonova + Ilya Tocar + Andrey Turetskiy + Ilya Verbin + Kirill Yukhin + Michael Zolotukhin + + * config/i386/sse.md + (define_c_enum "unspec"): Add UNSPEC_PMADDWD512. + (define_mode_iterator VI2_AVX2): Add V32HI mode. + (define_expand "mul3"): Add masking. + (define_insn "*mul3"): Ditto. + (define_expand "mul3_highpart"): Ditto. + (define_insn "*mul3_highpart"): Ditto. + (define_insn "avx512bw_pmaddwd512"): New. + (define_mode_attr SDOT_PMADD_SUF): Ditto. + (define_expand "sdot_prod"): Add . + (define_insn "_packssdw"): Add masking. + (define_insn "*_pmulhrsw3"): Ditto. + (define_insn "avx2_packusdw"): Delete. + (define_insn "sse4_1_packusdw"): Ditto. + (define_insn "_packusdw"): New. + 2014-10-14 Alexander Ivchenko Maxim Kuznetsov Anna Tikhonova diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9998996b676..c9173934585 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -132,6 +132,7 @@ ;; For AVX512BW support UNSPEC_DBPSADBW UNSPEC_PMADDUBSW512 + UNSPEC_PMADDWD512 UNSPEC_PSHUFHW UNSPEC_PSHUFLW @@ -300,7 +301,7 @@ [(V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI2_AVX2 - [(V16HI "TARGET_AVX2") V8HI]) + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX512F [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI]) @@ -9135,28 +9136,30 @@ DONE; }) -(define_expand "mul3" +(define_expand "mul3" [(set (match_operand:VI2_AVX2 0 "register_operand") (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand") (match_operand:VI2_AVX2 2 "nonimmediate_operand")))] - "TARGET_SSE2" + "TARGET_SSE2 && && " "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") -(define_insn "*mul3" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") - (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v") - (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, mode, operands)" +(define_insn "*mul3" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v") + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))] + "TARGET_SSE2 + && ix86_binary_operator_ok (MULT, mode, operands) + && && " "@ pmullw\t{%2, %0|%0, %2} - vpmullw\t{%2, %1, %0|%0, %1, %2}" + vpmullw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseimul") (set_attr "prefix_data16" "1,*") (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) -(define_expand "mul3_highpart" +(define_expand "mul3_highpart" [(set (match_operand:VI2_AVX2 0 "register_operand") (truncate:VI2_AVX2 (lshiftrt: @@ -9166,23 +9169,26 @@ (any_extend: (match_operand:VI2_AVX2 2 "nonimmediate_operand"))) (const_int 16))))] - "TARGET_SSE2" + "TARGET_SSE2 + && && " "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") -(define_insn "*mul3_highpart" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") +(define_insn "*mul3_highpart" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") (truncate:VI2_AVX2 (lshiftrt: (mult: (any_extend: - (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) + (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")) (any_extend: - (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm"))) (const_int 16))))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, mode, operands)" + "TARGET_SSE2 + && ix86_binary_operator_ok (MULT, mode, operands) + && && " "@ pmulhw\t{%2, %0|%0, %2} - vpmulhw\t{%2, %1, %0|%0, %1, %2}" + vpmulhw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseimul") (set_attr "prefix_data16" "1,*") @@ -9425,6 +9431,18 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) +(define_insn "avx512bw_pmaddwd512" + [(set (match_operand: 0 "register_operand" "=v") + (unspec: + [(match_operand:VI2_AVX2 1 "register_operand" "v") + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")] + UNSPEC_PMADDWD512))] + "TARGET_AVX512BW && " + "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"; + [(set_attr "type" "sseiadd") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + (define_expand "avx2_pmaddwd" [(set (match_operand:V8SI 0 "register_operand") (plus:V8SI @@ -9665,6 +9683,9 @@ DONE; }) +(define_mode_attr SDOT_PMADD_SUF + [(V32HI "512v32hi") (V16HI "") (V8HI "")]) + (define_expand "sdot_prod" [(match_operand: 0 "register_operand") (match_operand:VI2_AVX2 1 "register_operand") @@ -9673,7 +9694,7 @@ "TARGET_SSE2" { rtx t = gen_reg_rtx (mode); - emit_insn (gen__pmaddwd (t, operands[1], operands[2])); + emit_insn (gen__pmaddwd (t, operands[1], operands[2])); emit_insn (gen_rtx_SET (VOIDmode, operands[0], gen_rtx_PLUS (mode, operands[3], t))); @@ -10857,17 +10878,17 @@ (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "")]) -(define_insn "_packssdw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") +(define_insn "_packssdw" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") (vec_concat:VI2_AVX2 (ss_truncate: - (match_operand: 1 "register_operand" "0,x")) + (match_operand: 1 "register_operand" "0,v")) (ss_truncate: - (match_operand: 2 "nonimmediate_operand" "xm,xm"))))] - "TARGET_SSE2" + (match_operand: 2 "nonimmediate_operand" "xm,vm"))))] + "TARGET_SSE2 && && " "@ packssdw\t{%2, %0|%0, %2} - vpackssdw\t{%2, %1, %0|%0, %1, %2}" + vpackssdw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_data16" "1,*") @@ -13349,29 +13370,30 @@ ix86_fixup_binary_operands_no_copy (MULT, mode, operands); }) -(define_insn "*_pmulhrsw3" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") +(define_insn "*_pmulhrsw3" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") (truncate:VI2_AVX2 (lshiftrt: (plus: (lshiftrt: (mult: (sign_extend: - (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) + (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")) (sign_extend: - (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) + (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm"))) (const_int 14)) (match_operand:VI2_AVX2 3 "const1_operand")) (const_int 1))))] - "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, mode, operands)" + "TARGET_SSSE3 && && + && ix86_binary_operator_ok (MULT, mode, operands)" "@ pmulhrsw\t{%2, %0|%0, %2} - vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" + vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseimul") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "")]) (define_insn "*ssse3_pmulhrswv4hi3" @@ -13743,36 +13765,22 @@ (set_attr "btver2_decode" "vector,vector") (set_attr "mode" "")]) -(define_insn "avx2_packusdw" - [(set (match_operand:V16HI 0 "register_operand" "=x") - (vec_concat:V16HI - (us_truncate:V8HI - (match_operand:V8SI 1 "register_operand" "x")) - (us_truncate:V8HI - (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))] - "TARGET_AVX2" - "vpackusdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "sse4_1_packusdw" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (vec_concat:V8HI - (us_truncate:V4HI - (match_operand:V4SI 1 "register_operand" "0,x")) - (us_truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))] - "TARGET_SSE4_1" +(define_insn "_packusdw" + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + (vec_concat:VI2_AVX2 + (us_truncate: + (match_operand: 1 "register_operand" "0,v")) + (us_truncate: + (match_operand: 2 "nonimmediate_operand" "xm,vm"))))] + "TARGET_SSE4_1 && && " "@ packusdw\t{%2, %0|%0, %2} - vpackusdw\t{%2, %1, %0|%0, %1, %2}" + vpackusdw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) + (set_attr "prefix" "orig,maybe_evex") + (set_attr "mode" "")]) (define_insn "_pblendvb" [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")