From ee9dd92eeaf6cc38bcf14b75c2afeb825d5aa3a4 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sat, 28 Aug 2010 16:30:34 +0200 Subject: [PATCH] sse.md (extsuffix): New code attribute. * config/i386/sse.md (extsuffix): New code attribute. (sse4_1_v8qiv8hi2): Macroize insn from sse4_1_extendv8qiv8hi2 and sse4_1_zero_extendv8qiv8hi2 using any_extend code iterator. (sse4_1_v4qiv4si2): Ditto from sse4_1_extendv4qiv4si2 and sse4_1_zero_extendv4qiv4si2. (sse4_1_v2qiv2di2): Ditto from sse4_1_extendv2qiv2di2 and sse4_1_zero_extendv2qiv2di2. (sse4_1_v4hiv4si2): Ditto from sse4_1_extendv4hiv4si2 and sse4_1_zero_extendv4hiv4si2. (sse4_1_v2hiv2di2): Ditto from sse4_1_extendv2hiv2di2 and sse4_1_zero_extendv2hiv2di2. (sse4_1_extendv2siv2di2): Ditto from sse4_1_extendv2siv2di2 and sse4_1_zero_extendv2siv2di2 (mulv8hi3_highpart): Macroize expander from {u,s}mulv8hi3_highpart using any_extend code iterator. (*avx_mulv8hi3_highpart): Macroize insn from *avx_{u,s}mulv8hi3_highpart using any_extend code iterator. (*mulv8hi3_highpart): Ditto from *{u,s}mulv8hi3_highpart. * config/i386/i386.c (ix86_expand_sse4_unpack): Update for renamed gen_sse4_1_sign_extend* functions. (struct builtin_description bdesc_args): Ditto. From-SVN: r163616 --- gcc/ChangeLog | 30 ++++++- gcc/config/i386/i386.c | 18 ++-- gcc/config/i386/sse.md | 200 +++++++---------------------------------- 3 files changed, 69 insertions(+), 179 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bddda69c023..196b4269534 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2010-08-28 Uros Bizjak + + * config/i386/sse.md (extsuffix): New code attribute. + (sse4_1_v8qiv8hi2): Macroize insn from sse4_1_extendv8qiv8hi2 + and sse4_1_zero_extendv8qiv8hi2 using any_extend code iterator. + (sse4_1_v4qiv4si2): Ditto from sse4_1_extendv4qiv4si2 + and sse4_1_zero_extendv4qiv4si2. + (sse4_1_v2qiv2di2): Ditto from sse4_1_extendv2qiv2di2 + and sse4_1_zero_extendv2qiv2di2. + (sse4_1_v4hiv4si2): Ditto from sse4_1_extendv4hiv4si2 + and sse4_1_zero_extendv4hiv4si2. + (sse4_1_v2hiv2di2): Ditto from sse4_1_extendv2hiv2di2 + and sse4_1_zero_extendv2hiv2di2. + (sse4_1_extendv2siv2di2): Ditto from sse4_1_extendv2siv2di2 + and sse4_1_zero_extendv2siv2di2 + + (mulv8hi3_highpart): Macroize expander from {u,s}mulv8hi3_highpart + using any_extend code iterator. + (*avx_mulv8hi3_highpart): Macroize insn from + *avx_{u,s}mulv8hi3_highpart using any_extend code iterator. + (*mulv8hi3_highpart): Ditto from *{u,s}mulv8hi3_highpart. + + * config/i386/i386.c (ix86_expand_sse4_unpack): Update for renamed + gen_sse4_1_sign_extend* functions. + (struct builtin_description bdesc_args): Ditto. + 2010-08-27 Xinliang David Li PR/45422 @@ -245,10 +271,10 @@ 2010-08-27 Ramana Radhakrishnan * config/arm/arm.md (enabled): Test the value of arch_enabled - rather than just using it. + rather than just using it. 2010-08-27 Olivier Hainque - Eric Botcazou + Eric Botcazou * dse.c (group_info.base_mem, get_group_info): Use BLKmode to cover all the possible offsets from this base. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0bc1c664cdb..db08244b11d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17397,19 +17397,19 @@ ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) if (unsigned_p) unpack = gen_sse4_1_zero_extendv8qiv8hi2; else - unpack = gen_sse4_1_extendv8qiv8hi2; + unpack = gen_sse4_1_sign_extendv8qiv8hi2; break; case V8HImode: if (unsigned_p) unpack = gen_sse4_1_zero_extendv4hiv4si2; else - unpack = gen_sse4_1_extendv4hiv4si2; + unpack = gen_sse4_1_sign_extendv4hiv4si2; break; case V4SImode: if (unsigned_p) unpack = gen_sse4_1_zero_extendv2siv2di2; else - unpack = gen_sse4_1_extendv2siv2di2; + unpack = gen_sse4_1_sign_extendv2siv2di2; break; default: gcc_unreachable (); @@ -23075,12 +23075,12 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ffcbdf83559..a3488cf4b74 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -19,6 +19,9 @@ ;; . +;; Instruction suffix for sign and zero extensions. +(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) + ;; 16 byte integral modes handled by SSE (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI]) @@ -5243,92 +5246,47 @@ (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) -(define_expand "smulv8hi3_highpart" +(define_expand "mulv8hi3_highpart" [(set (match_operand:V8HI 0 "register_operand" "") (truncate:V8HI (lshiftrt:V8SI (mult:V8SI - (sign_extend:V8SI + (any_extend:V8SI (match_operand:V8HI 1 "nonimmediate_operand" "")) - (sign_extend:V8SI + (any_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" ""))) (const_int 16))))] "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") -(define_insn "*avxv8hi3_highpart" +(define_insn "*avx_mulv8hi3_highpart" [(set (match_operand:V8HI 0 "register_operand" "=x") (truncate:V8HI (lshiftrt:V8SI (mult:V8SI - (sign_extend:V8SI + (any_extend:V8SI (match_operand:V8HI 1 "nonimmediate_operand" "%x")) - (sign_extend:V8SI + (any_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) (const_int 16))))] "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" - "vpmulhw\t{%2, %1, %0|%0, %1, %2}" + "vpmulhw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseimul") (set_attr "prefix" "vex") (set_attr "mode" "TI")]) -(define_insn "*smulv8hi3_highpart" +(define_insn "*mulv8hi3_highpart" [(set (match_operand:V8HI 0 "register_operand" "=x") (truncate:V8HI (lshiftrt:V8SI (mult:V8SI - (sign_extend:V8SI + (any_extend:V8SI (match_operand:V8HI 1 "nonimmediate_operand" "%0")) - (sign_extend:V8SI + (any_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) (const_int 16))))] "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "prefix_data16" "1") - (set_attr "mode" "TI")]) - -(define_expand "umulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" ""))) - (const_int 16))))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") - -(define_insn "*avx_umulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%x")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 16))))] - "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" - "vpmulhuw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseimul") - (set_attr "prefix" "vex") - (set_attr "mode" "TI")]) - -(define_insn "*umulv8hi3_highpart" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (truncate:V8HI - (lshiftrt:V8SI - (mult:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%0")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) - (const_int 16))))] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" - "pmulhuw\t{%2, %0|%0, %2}" + "pmulhw\t{%2, %0|%0, %2}" [(set_attr "type" "sseimul") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -9592,9 +9550,9 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_extendv8qiv8hi2" +(define_insn "sse4_1_v8qiv8hi2" [(set (match_operand:V8HI 0 "register_operand" "=x") - (sign_extend:V8HI + (any_extend:V8HI (vec_select:V8QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) @@ -9606,15 +9564,15 @@ (const_int 6) (const_int 7)]))))] "TARGET_SSE4_1" - "%vpmovsxbw\t{%1, %0|%0, %1}" + "%vpmovbw\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_extendv4qiv4si2" +(define_insn "sse4_1_v4qiv4si2" [(set (match_operand:V4SI 0 "register_operand" "=x") - (sign_extend:V4SI + (any_extend:V4SI (vec_select:V4QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) @@ -9622,29 +9580,15 @@ (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1" - "%vpmovsxbd\t{%1, %0|%0, %1}" + "%vpmovbd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_extendv2qiv2di2" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (sign_extend:V2DI - (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE4_1" - "%vpmovsxbq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "sse4_1_extendv4hiv4si2" +(define_insn "sse4_1_v4hiv4si2" [(set (match_operand:V4SI 0 "register_operand" "=x") - (sign_extend:V4SI + (any_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) @@ -9652,129 +9596,49 @@ (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1" - "%vpmovsxwd\t{%1, %0|%0, %1}" + "%vpmovwd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_extendv2hiv2di2" +(define_insn "sse4_1_v2qiv2di2" [(set (match_operand:V2DI 0 "register_operand" "=x") - (sign_extend:V2DI - (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE4_1" - "%vpmovsxwq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "sse4_1_extendv2siv2di2" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] - "TARGET_SSE4_1" - "%vpmovsxdq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "sse4_1_zero_extendv8qiv8hi2" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (zero_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3) - (const_int 4) - (const_int 5) - (const_int 6) - (const_int 7)]))))] - "TARGET_SSE4_1" - "%vpmovzxbw\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "sse4_1_zero_extendv4qiv4si2" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (zero_extend:V4SI - (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3)]))))] - "TARGET_SSE4_1" - "%vpmovzxbd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "sse4_1_zero_extendv2qiv2di2" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1" - "%vpmovzxbq\t{%1, %0|%0, %1}" + "%vpmovbq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_zero_extendv4hiv4si2" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (zero_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3)]))))] - "TARGET_SSE4_1" - "%vpmovzxwd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -(define_insn "sse4_1_zero_extendv2hiv2di2" +(define_insn "sse4_1_v2hiv2di2" [(set (match_operand:V2DI 0 "register_operand" "=x") - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1" - "%vpmovzxwq\t{%1, %0|%0, %1}" + "%vpmovwq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -(define_insn "sse4_1_zero_extendv2siv2di2" +(define_insn "sse4_1_v2siv2di2" [(set (match_operand:V2DI 0 "register_operand" "=x") - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1" - "%vpmovzxdq\t{%1, %0|%0, %1}" + "%vpmovdq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex")