diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b9c27f59d03..b9e35dc6666 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,46 @@ +2007-04-22 Uros Bizjak + + PR tree-optimization/24659 + * optabs.h (enum optab_index) [OTI_vec_unpacks_hi, + OTI_vec_unpacks_lo]: Update comment to mention floating point operands. + (vec_pack_trunc_optab): Rename from vec_pack_mod_optab. + * genopinit.c (optabs): Rename vec_pack_mod_optab + to vec_pack_trunc_optab. + * tree-vect-transform.c (vectorizable_type_demotion): Do not fail + early for scalar floating point operands for NOP_EXPR. + (vectorizable_type_promotion): Ditto. + * optabs.c (optab_for_tree_code) [VEC_PACK_TRUNC_EXPR]: Return + vec_pack_trunc_optab. + (expand_binop): Rename vec_float_trunc_optab to vec_pack_mod_optab. + + * tree.def (VEC_PACK_TRUNC_EXPR): Rename from VEC_PACK_MOD_EXPR. + * tree-pretty-print.c (dump_generic_node) [VEC_PACK_TRUNC_EXPR]: + Rename from VEC_PACK_MOD_EXPR. + (op_prio) [VEC_PACK_TRUNC_EXPR]: Ditto. + * expr.c (expand_expr_real_1): Ditto. + * tree-inline.c (estimate_num_insns_1): Ditto. + * tree-vect-generic.c (expand_vector_operations_1): Ditto. + + * config/i386/sse.md (vec_unpacks_hi_v4sf): New expander. + (vec_unpacks_lo_v4sf): Ditto. + (vec_pack_trunc_v2df): Ditto. + (vec_pack_trunc_v8hi): Rename from vec_pack_mod_v8hi. + (vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si. + (vec_pack_trunc_v2di): Rename from vec_pack_mod_v2di. + + * config/rs6000/altivec.md (vec_pack_trunc_v8hi): Rename from + vec_pack_mod_v8hi. + (vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si. + + * doc/c-tree.texi (Expression trees) [VEC_PACK_TRUNC_EXPR]: + Rename from VEC_PACK_MOD_EXPR. This expression also represent + packing of floating point operands. + [VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR]: These expression also + represent unpacking of floating point operands. + * doc/md.texi (Standard Names) [vec_pack_trunc]: Update documentation. + [vec_unpacks_hi]: Ditto. + [vec_unpacks_lo]: Ditto. + 2007-04-22 Jan Hubicka * final.c (rest_of_handle_final): Call diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a0047db8c29..16219e08aa2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2150,6 +2150,51 @@ (set_attr "mode" "V2DF") (set_attr "amdfam10_decode" "direct")]) +(define_expand "vec_unpacks_hi_v4sf" + [(set (match_dup 2) + (vec_select:V4SF + (vec_concat:V8SF + (match_dup 2) + (match_operand:V4SF 1 "nonimmediate_operand" "")) + (parallel [(const_int 6) + (const_int 7) + (const_int 2) + (const_int 3)]))) + (set (match_operand:V2DF 0 "register_operand" "") + (float_extend:V2DF + (vec_select:V2SF + (match_dup 2) + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2" +{ + operands[2] = gen_reg_rtx (V4SFmode); +}) + +(define_expand "vec_unpacks_lo_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "nonimmediate_operand" "") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SSE2") + +(define_expand "vec_pack_trunc_v2df" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")] + "TARGET_SSE2" +{ + rtx r1, r2; + + r1 = gen_reg_rtx (V4SFmode); + r2 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_sse2_cvtpd2ps (r1, operands[1])); + emit_insn (gen_sse2_cvtpd2ps (r2, operands[2])); + emit_insn (gen_sse_movlhps (operands[0], r1, r2)); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel double-precision floating point element swizzling @@ -3420,7 +3465,7 @@ ;; h3 = aeimquy2bfjnrvz3 ;; l3 = cgkosw04dhlptx15 ;; result = bdfhjlnprtvxz135 -(define_expand "vec_pack_mod_v8hi" +(define_expand "vec_pack_trunc_v8hi" [(match_operand:V16QI 0 "register_operand" "") (match_operand:V8HI 1 "register_operand" "") (match_operand:V8HI 2 "register_operand" "")] @@ -3455,7 +3500,7 @@ ;; h2 = aeimbfjn ;; l2 = cgkodhlp ;; result = bdfhjlnp -(define_expand "vec_pack_mod_v4si" +(define_expand "vec_pack_trunc_v4si" [(match_operand:V8HI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")] @@ -3484,7 +3529,7 @@ ;; h1 = aebf ;; l1 = cgdh ;; result = bdfh -(define_expand "vec_pack_mod_v2di" +(define_expand "vec_pack_trunc_v2di" [(match_operand:V4SI 0 "register_operand" "") (match_operand:V2DI 1 "register_operand" "") (match_operand:V2DI 2 "register_operand" "")] diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 1c1d6a64f17..59bb481eba5 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2603,7 +2603,7 @@ DONE; }") -(define_expand "vec_pack_mod_v8hi" +(define_expand "vec_pack_trunc_v8hi" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v") (match_operand:V8HI 2 "register_operand" "v")] @@ -2615,7 +2615,7 @@ DONE; }") -(define_expand "vec_pack_mod_v4si" +(define_expand "vec_pack_trunc_v4si" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v") (match_operand:V4SI 2 "register_operand" "v")] diff --git a/gcc/doc/c-tree.texi b/gcc/doc/c-tree.texi index 4bf09057124..60482e4626c 100644 --- a/gcc/doc/c-tree.texi +++ b/gcc/doc/c-tree.texi @@ -1983,7 +1983,7 @@ This macro returns the attributes on the type @var{type}. @tindex VEC_WIDEN_MULT_LO_EXPR @tindex VEC_UNPACK_HI_EXPR @tindex VEC_UNPACK_LO_EXPR -@tindex VEC_PACK_MOD_EXPR +@tindex VEC_PACK_TRUNC_EXPR @tindex VEC_PACK_SAT_EXPR @tindex VEC_EXTRACT_EVEN_EXPR @tindex VEC_EXTRACT_ODD_EXPR @@ -2837,23 +2837,30 @@ vector of @code{N/2} products. @item VEC_UNPACK_HI_EXPR @item VEC_UNPACK_LO_EXPR -These nodes represent unpacking of the high and low parts of the input vector, +These nodes represent unpacking of the high and low parts of the input vector, respectively. The single operand is a vector that contains @code{N} elements -of the same integral type. The result is a vector that contains half as many -elements, of an integral type whose size is twice as wide. In the case of -@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are -extracted and widened (promoted). In the case of @code{VEC_UNPACK_LO_EXPR} the -low @code{N/2} elements of the vector are extracted and widened (promoted). +of the same integral or floating point type. The result is a vector +that contains half as many elements, of an integral or floating point type +whose size is twice as wide. In the case of @code{VEC_UNPACK_HI_EXPR} the +high @code{N/2} elements of the vector are extracted and widened (promoted). +In the case of @code{VEC_UNPACK_LO_EXPR} the low @code{N/2} elements of the +vector are extracted and widened (promoted). + +@item VEC_PACK_TRUNC_EXPR +This node represents packing of truncated elements of the two input vectors +into the output vector. Input operands are vectors that contain the same +number of elements of the same integral or floating point type. The result +is a vector that contains twice as many elements of an integral or floating +point type whose size is half as wide. The elements of the two vectors are +demoted and merged (concatenated) to form the output vector. -@item VEC_PACK_MOD_EXPR @item VEC_PACK_SAT_EXPR -These nodes represent packing of elements of the two input vectors into the -output vector, using modulo or saturating arithmetic, respectively. -Their operands are vectors that contain the same number of elements -of the same integral type. The result is a vector that contains twice as many -elements, of an integral type whose size is half as wide. In both cases -the elements of the two vectors are demoted and merged (concatenated) to form -the output vector. +This node represents packing of elements of the two input vectors into the +output vector using saturation. Input operands are vectors that contain +the same number of elements of the same integral type. The result is a +vector that contains twice as many elements of an integral type whose size +is half as wide. The elements of the two vectors are demoted and merged +(concatenated) to form the output vector. @item VEC_EXTRACT_EVEN_EXPR @item VEC_EXTRACT_ODD_EXPR diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 4d485df41b8..01705ad282d 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3591,35 +3591,48 @@ Operand 2 is an integer shift amount in bits. Operand 0 is where the resulting shifted vector is stored. The output and input vectors should have the same modes. -@cindex @code{vec_pack_mod_@var{m}} instruction pattern +@cindex @code{vec_pack_trunc_@var{m}} instruction pattern +@item @samp{vec_pack_trunc_@var{m}} +Narrow (demote) and merge the elements of two vectors. Operands 1 and 2 +are vectors of the same mode having N integral or floating point elements +of size S. Operand 0 is the resulting vector in which 2*N elements of +size N/2 are concatenated after narrowing them down using truncation. + @cindex @code{vec_pack_ssat_@var{m}} instruction pattern @cindex @code{vec_pack_usat_@var{m}} instruction pattern -@item @samp{vec_pack_mod_@var{m}}, @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}} -Narrow (demote) and merge the elements of two vectors. -Operands 1 and 2 are vectors of the same mode. +@item @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}} +Narrow (demote) and merge the elements of two vectors. Operands 1 and 2 +are vectors of the same mode having N integral elements of size S. Operand 0 is the resulting vector in which the elements of the two input -vectors are concatenated after narrowing them down using modulo arithmetic or -signed/unsigned saturating arithmetic. +vectors are concatenated after narrowing them down using signed/unsigned +saturating arithmetic. @cindex @code{vec_unpacks_hi_@var{m}} instruction pattern @cindex @code{vec_unpacks_lo_@var{m}} instruction pattern +@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}} +Extract and widen (promote) the high/low part of a vector of signed +integral or floating point elements. The input vector (operand 1) has N +elements of size S. Widen (promote) the high/low elements of the vector +using signed or floating point extension and place the resulting N/2 +values of size 2*S in the output vector (operand 0). + @cindex @code{vec_unpacku_hi_@var{m}} instruction pattern @cindex @code{vec_unpacku_lo_@var{m}} instruction pattern -@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}, @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}} -Extract and widen (promote) the high/low part of a vector of signed/unsigned -elements. The input vector (operand 1) has N signed/unsigned elements of size S. -Using sign/zero extension widen (promote) the high/low elements of the vector, -and place the resulting N/2 values of size 2*S in the output vector (operand 0). +@item @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}} +Extract and widen (promote) the high/low part of a vector of unsigned +integral elements. The input vector (operand 1) has N elements of size S. +Widen (promote) the high/low elements of the vector using zero extension and +place the resulting N/2 values of size 2*S in the output vector (operand 0). @cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern @cindex @code{vec_widen_umult_lo__@var{m}} instruction pattern @cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern @cindex @code{vec_widen_smult_lo_@var{m}} instruction pattern @item @samp{vec_widen_umult_hi_@var{m}}, @samp{vec_widen_umult_lo_@var{m}}, @samp{vec_widen_smult_hi_@var{m}}, @samp{vec_widen_smult_lo_@var{m}} -Signed/Unsigned widening multiplication. -The two inputs (operands 1 and 2) are vectors with N -signed/unsigned elements of size S. Multiply the high/low elements of the two -vectors, and put the N/2 products of size 2*S in the output vector (operand 0). +Signed/Unsigned widening multiplication. The two inputs (operands 1 and 2) +are vectors with N signed/unsigned elements of size S. Multiply the high/low +elements of the two vectors, and put the N/2 products of size 2*S in the +output vector (operand 0). @cindex @code{mulhisi3} instruction pattern @item @samp{mulhisi3} diff --git a/gcc/expr.c b/gcc/expr.c index 24dfbf5bd27..c644933c0ba 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8926,7 +8926,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, return target; } - case VEC_PACK_MOD_EXPR: + case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: { mode = TYPE_MODE (TREE_TYPE (TREE_OPERAND (exp, 0))); diff --git a/gcc/genopinit.c b/gcc/genopinit.c index 3c06639f075..0e6d419bcbe 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -229,8 +229,9 @@ static const char * const optabs[] = "vec_unpacks_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacks_lo_$a$)", "vec_unpacku_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_hi_$a$)", "vec_unpacku_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_lo_$a$)", - "vec_pack_mod_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_mod_$a$)", - "vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)", "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)" + "vec_pack_trunc_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_trunc_$a$)", + "vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)", + "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)" }; static void gen_insn (rtx); diff --git a/gcc/optabs.c b/gcc/optabs.c index 94e36916969..b45a9b3abad 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -333,19 +333,19 @@ optab_for_tree_code (enum tree_code code, tree type) vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; case VEC_UNPACK_HI_EXPR: - return TYPE_UNSIGNED (type) ? + return TYPE_UNSIGNED (type) ? vec_unpacku_hi_optab : vec_unpacks_hi_optab; case VEC_UNPACK_LO_EXPR: return TYPE_UNSIGNED (type) ? vec_unpacku_lo_optab : vec_unpacks_lo_optab; - case VEC_PACK_MOD_EXPR: - return vec_pack_mod_optab; - + case VEC_PACK_TRUNC_EXPR: + return vec_pack_trunc_optab; + case VEC_PACK_SAT_EXPR: return TYPE_UNSIGNED (type) ? vec_pack_usat_optab : vec_pack_ssat_optab; - + default: break; } @@ -1373,7 +1373,7 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1, && mode1 != VOIDmode) xop1 = copy_to_mode_reg (mode1, xop1); - if (binoptab == vec_pack_mod_optab + if (binoptab == vec_pack_trunc_optab || binoptab == vec_pack_usat_optab || binoptab == vec_pack_ssat_optab) { @@ -5560,7 +5560,7 @@ init_optabs (void) vec_unpacks_lo_optab = init_optab (UNKNOWN); vec_unpacku_hi_optab = init_optab (UNKNOWN); vec_unpacku_lo_optab = init_optab (UNKNOWN); - vec_pack_mod_optab = init_optab (UNKNOWN); + vec_pack_trunc_optab = init_optab (UNKNOWN); vec_pack_usat_optab = init_optab (UNKNOWN); vec_pack_ssat_optab = init_optab (UNKNOWN); diff --git a/gcc/optabs.h b/gcc/optabs.h index 0f8c7966809..62a69c742fb 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -278,14 +278,16 @@ enum optab_index OTI_vec_widen_umult_lo, OTI_vec_widen_smult_hi, OTI_vec_widen_smult_lo, - /* Extract and widen the high/low part of a vector of signed/unsigned - elements. */ + /* Extract and widen the high/low part of a vector of signed or + floating point elements. */ OTI_vec_unpacks_hi, OTI_vec_unpacks_lo, + /* Extract and widen the high/low part of a vector of unsigned + elements. */ OTI_vec_unpacku_hi, OTI_vec_unpacku_lo, /* Narrow (demote) and merge the elements of two vectors. */ - OTI_vec_pack_mod, + OTI_vec_pack_trunc, OTI_vec_pack_usat, OTI_vec_pack_ssat, @@ -404,7 +406,7 @@ extern GTY(()) optab optab_table[OTI_MAX]; #define reduc_umin_optab (optab_table[OTI_reduc_umin]) #define reduc_splus_optab (optab_table[OTI_reduc_splus]) #define reduc_uplus_optab (optab_table[OTI_reduc_uplus]) - + #define ssum_widen_optab (optab_table[OTI_ssum_widen]) #define usum_widen_optab (optab_table[OTI_usum_widen]) #define sdot_prod_optab (optab_table[OTI_sdot_prod]) @@ -425,13 +427,13 @@ extern GTY(()) optab optab_table[OTI_MAX]; #define vec_widen_smult_hi_optab (optab_table[OTI_vec_widen_smult_hi]) #define vec_widen_smult_lo_optab (optab_table[OTI_vec_widen_smult_lo]) #define vec_unpacks_hi_optab (optab_table[OTI_vec_unpacks_hi]) -#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi]) #define vec_unpacks_lo_optab (optab_table[OTI_vec_unpacks_lo]) +#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi]) #define vec_unpacku_lo_optab (optab_table[OTI_vec_unpacku_lo]) -#define vec_pack_mod_optab (optab_table[OTI_vec_pack_mod]) +#define vec_pack_trunc_optab (optab_table[OTI_vec_pack_trunc]) #define vec_pack_ssat_optab (optab_table[OTI_vec_pack_ssat]) #define vec_pack_usat_optab (optab_table[OTI_vec_pack_usat]) - + #define powi_optab (optab_table[OTI_powi]) /* Conversion optabs have their own table and indexes. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bcabfafbbfb..cc26895a3f5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2007-04-22 Uros Bizjak + + PR tree-optimization/24659 + * gcc.dg/vect/vect-float-extend-1.c: New test. + * gcc.dg/vect/vect-float-truncate-1.c: New test. + 2007-04-22 Richard Guenther PR tree-optimization/29789 diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index b75094f8f7b..ff76b7557c1 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -2149,7 +2149,7 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data) case VEC_WIDEN_MULT_LO_EXPR: case VEC_UNPACK_HI_EXPR: case VEC_UNPACK_LO_EXPR: - case VEC_PACK_MOD_EXPR: + case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: case WIDEN_MULT_EXPR: diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 789aefc8e41..ab637a8822a 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1943,8 +1943,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, pp_string (buffer, " > "); break; - case VEC_PACK_MOD_EXPR: - pp_string (buffer, " VEC_PACK_MOD_EXPR < "); + case VEC_PACK_TRUNC_EXPR: + pp_string (buffer, " VEC_PACK_TRUNC_EXPR < "); dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); pp_string (buffer, ", "); dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); @@ -2348,7 +2348,7 @@ op_prio (tree op) case VEC_RSHIFT_EXPR: case VEC_UNPACK_HI_EXPR: case VEC_UNPACK_LO_EXPR: - case VEC_PACK_MOD_EXPR: + case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: return 16; diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index db7a0ce61b1..0b9b91f6d9a 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -421,7 +421,7 @@ expand_vector_operations_1 (block_stmt_iterator *bsi) || code == VEC_WIDEN_MULT_LO_EXPR || code == VEC_UNPACK_HI_EXPR || code == VEC_UNPACK_LO_EXPR - || code == VEC_PACK_MOD_EXPR + || code == VEC_PACK_TRUNC_EXPR || code == VEC_PACK_SAT_EXPR) type = TREE_TYPE (TREE_OPERAND (rhs, 0)); diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index e5f23a50cb1..4051cc6eb3f 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -174,7 +174,7 @@ vect_create_addr_base_for_vector_ref (tree stmt, offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step); base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset, offset); - base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp); + base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp); append_to_statement_list_force (new_stmt, new_stmt_list); } @@ -561,7 +561,8 @@ get_initial_def_for_induction (tree iv_phi) access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi)); gcc_assert (access_fn); - ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_expr, &step_expr); + ok = vect_is_simple_iv_evolution (loop->num, access_fn, + &init_expr, &step_expr); gcc_assert (ok); /* Create the vector that holds the initial_value of the induction. */ @@ -837,7 +838,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) vectorized stmt to be created (by the caller to this function) is a "copy" created in case the vectorized result cannot fit in one vector, and several copies of the vector-stmt are required. In this case the vector-def is - retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field + retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. DT is the type of the vector def VEC_OPRND. @@ -1057,7 +1058,7 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) loop: vec_def = phi # REDUCTION_PHI - VECT_DEF = vector_stmt # vectorized form of STMT + VECT_DEF = vector_stmt # vectorized form of STMT s_loop = scalar_stmt # (scalar) STMT loop_exit: s_out0 = phi # (scalar) EXIT_PHI @@ -1176,8 +1177,8 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, exit_bsi = bsi_start (exit_bb); /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 - (i.e. when reduc_code is not available) and in the final adjustment code - (if needed). Also get the original scalar reduction variable as + (i.e. when reduc_code is not available) and in the final adjustment + code (if needed). Also get the original scalar reduction variable as defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it represents a reduction pattern), the tree-code and scalar-def are taken from the original stmt that the pattern-stmt (STMT) replaces. @@ -1327,7 +1328,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, bitpos); BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); - epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs); + epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs); new_name = make_ssa_name (new_scalar_dest, epilog_stmt); GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); @@ -1422,28 +1423,28 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original sequence that had been detected and replaced by the pattern-stmt (STMT). - In some cases of reduction patterns, the type of the reduction variable X is + In some cases of reduction patterns, the type of the reduction variable X is different than the type of the other arguments of STMT. In such cases, the vectype that is used when transforming STMT into a vector - stmt is different than the vectype that is used to determine the + stmt is different than the vectype that is used to determine the vectorization factor, because it consists of a different number of elements than the actual number of elements that are being operated upon in parallel. - For example, consider an accumulation of shorts into an int accumulator. + For example, consider an accumulation of shorts into an int accumulator. On some targets it's possible to vectorize this pattern operating on 8 shorts at a time (hence, the vectype for purposes of determining the vectorization factor should be V8HI); on the other hand, the vectype that - is used to create the vector form is actually V4SI (the type of the result). + is used to create the vector form is actually V4SI (the type of the result). - Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that - indicates what is the actual level of parallelism (V8HI in the example), so - that the right vectorization factor would be derived. This vectype - corresponds to the type of arguments to the reduction stmt, and should *NOT* + Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that + indicates what is the actual level of parallelism (V8HI in the example), so + that the right vectorization factor would be derived. This vectype + corresponds to the type of arguments to the reduction stmt, and should *NOT* be used to create the vectorized stmt. The right vectype for the vectorized - stmt is obtained from the type of the result X: + stmt is obtained from the type of the result X: get_vectype_for_scalar_type (TREE_TYPE (X)) - This means that, contrary to "regular" reductions (or "regular" stmts in + This means that, contrary to "regular" reductions (or "regular" stmts in general), the following equation: STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) does *NOT* necessarily hold for reduction patterns. */ @@ -1687,7 +1688,7 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) op = TREE_OPERAND (operation, 1); loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL); } - + /* Get the vector def for the reduction variable from the phi node */ reduc_def = PHI_RESULT (new_phi); } @@ -1697,34 +1698,33 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0); if (op_type == ternary_op) loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1); - + /* Get the vector def for the reduction variable from the vectorized reduction operation generated in the previous iteration (j-1) */ reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0); } - + /* Arguments are ready. create the new vector stmt. */ - if (op_type == binary_op) expr = build2 (code, vectype, loop_vec_def0, reduc_def); else expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1, - reduc_def); + reduc_def); new_stmt = build_gimple_modify_stmt (vec_dest, expr); new_temp = make_ssa_name (vec_dest, new_stmt); GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, new_stmt, bsi); - + if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; prev_stmt_info = vinfo_for_stmt (new_stmt); } - + /* Finalize the reduction-phi (set it's arguments) and create the epilog reduction code. */ - vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); + vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); return true; } @@ -2329,7 +2329,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) fprintf (vect_dump, "use not simple."); return false; } - + if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); @@ -2415,12 +2415,12 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) stmts that use the defs of the current stmt. The example below illustrates the vectorization process when VF=16 and nunits=4 (i.e - we need to create 4 vectorized stmts): - + before vectorization: RELATED_STMT VEC_STMT S1: x = memref - - S2: z = x + 1 - - - + step 1: vectorize stmt S1 (done in vectorizable_load. See more details there): RELATED_STMT VEC_STMT @@ -2430,7 +2430,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) VS1_3: vx3 = memref3 - - S1: x = load - VS1_0 S2: z = x + 1 - - - + step2: vectorize stmt S2 (done here): To vectorize stmt S2 we first need to find the relevant vector def for the first operand 'x'. This is, as usual, obtained from @@ -2457,7 +2457,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) VS2_2: vz2 = vx2 + v1 VS2_3 - VS2_3: vz3 = vx3 + v1 - - S2: z = x + 1 - VS2_0 */ - + prev_stmt_info = NULL; for (j = 0; j < ncopies; j++) { @@ -2493,7 +2493,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } /* Arguments are ready. create the new vector stmt. */ - + if (op_type == binary_op) new_stmt = build_gimple_modify_stmt (vec_dest, build2 (code, vectype, vec_oprnd0, vec_oprnd1)); @@ -2503,7 +2503,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) new_temp = make_ssa_name (vec_dest, new_stmt); GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, new_stmt, bsi); - + if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; else @@ -2516,13 +2516,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Function vectorizable_type_demotion - + Check if STMT performs a binary or unary operation that involves type demotion, and if it can be vectorized. If VEC_STMT is also passed, vectorize the STMT: create a vectorized stmt to replace it, put it in VEC_STMT, and insert it at BSI. Return FALSE if not a vectorizable STMT, TRUE otherwise. */ - + bool vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) @@ -2550,7 +2550,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, tree scalar_type; optab optab; enum machine_mode vec_mode; - + if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -2564,37 +2564,40 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, fprintf (vect_dump, "value used after loop."); return false; } - + /* Is STMT a vectorizable type-demotion operation? */ if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) return false; - + if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME) return false; - + operation = GIMPLE_STMT_OPERAND (stmt, 1); code = TREE_CODE (operation); if (code != NOP_EXPR && code != CONVERT_EXPR) return false; - + op0 = TREE_OPERAND (operation, 0); vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0)); nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); - + scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); scalar_type = TREE_TYPE (scalar_dest); vectype_out = get_vectype_for_scalar_type (scalar_type); nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); if (nunits_in != nunits_out / 2) /* FORNOW */ return false; - + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; gcc_assert (ncopies >= 1); - if (! INTEGRAL_TYPE_P (scalar_type) - || !INTEGRAL_TYPE_P (TREE_TYPE (op0))) + if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && INTEGRAL_TYPE_P (TREE_TYPE (op0))) + || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && (code == NOP_EXPR || code == CONVERT_EXPR)))) return false; - + /* Check the operands of the operation. */ if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) { @@ -2602,31 +2605,30 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, fprintf (vect_dump, "use not simple."); return false; } - + /* Supportable by target? */ - code = VEC_PACK_MOD_EXPR; - optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in); + code = VEC_PACK_TRUNC_EXPR; + optab = optab_for_tree_code (code, vectype_in); if (!optab) return false; - + vec_mode = TYPE_MODE (vectype_in); if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) return false; - + STMT_VINFO_VECTYPE (stmt_info) = vectype_in; - + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; return true; } - + /** Transform. **/ - if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform type demotion operation. ncopies = %d.", - ncopies); - + ncopies); + /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype_out); @@ -2648,22 +2650,22 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1); vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); } - + /* Arguments are ready. Create the new vector stmt. */ expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1); new_stmt = build_gimple_modify_stmt (vec_dest, expr); new_temp = make_ssa_name (vec_dest, new_stmt); GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, new_stmt, bsi); - + if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; else STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; - + prev_stmt_info = vinfo_for_stmt (new_stmt); } - + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); return true; } @@ -2798,8 +2800,11 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, if (nunits_out != nunits_in / 2) /* FORNOW */ return false; - if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) - || !INTEGRAL_TYPE_P (TREE_TYPE (op0))) + if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && INTEGRAL_TYPE_P (TREE_TYPE (op0))) + || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && (code == CONVERT_EXPR || code == NOP_EXPR)))) return false; /* Check the operands of the operation. */ @@ -3268,7 +3273,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* For interleaved stores we created vectorized defs for all the defs stored in OPRNDS in the previous iteration (previous copy). DR_CHAIN is then used as an input to vect_permute_store_chain(), - and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the + and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the next copy. If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and OPRNDS are of size 1. */ @@ -4286,7 +4291,7 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store) done = vectorizable_type_demotion (stmt, bsi, &vec_stmt); gcc_assert (done); break; - + case type_promotion_vec_info_type: done = vectorizable_type_promotion (stmt, bsi, &vec_stmt); gcc_assert (done); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 6b44c0b0610..3c41c5c4ee4 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1773,7 +1773,7 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype, tree wide_vectype = get_vectype_for_scalar_type (type); enum tree_code c1, c2; - /* The result of a vectorized widening operation usually requires two vectors + /* The result of a vectorized widening operation usually requires two vectors (because the widened results do not fit int one vector). The generated vector results would normally be expected to be generated in the same order as in the original scalar computation. i.e. if 8 results are diff --git a/gcc/tree.def b/gcc/tree.def index 1789de839ad..feeab3fce1f 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1093,12 +1093,12 @@ DEFTREECODE (VEC_UNPACK_HI_EXPR, "vec_unpack_hi_expr", tcc_unary, 1) DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_unpack_lo_expr", tcc_unary, 1) /* Pack (demote/narrow and merge) the elements of the two input vectors - into the output vector, using modulo/saturating arithmetic. + into the output vector using truncation/saturation. The elements of the input vectors are twice the size of the elements of the output vector. This is used to support type demotion. */ -DEFTREECODE (VEC_PACK_MOD_EXPR, "vec_pack_mod_expr", tcc_binary, 2) +DEFTREECODE (VEC_PACK_TRUNC_EXPR, "vec_pack_trunc_expr", tcc_binary, 2) DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2) - + /* Extract even/odd fields from vectors. */ DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extracteven_expr", tcc_binary, 2) DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extractodd_expr", tcc_binary, 2)