re PR tree-optimization/24659 (Conversions are not vectorized)

2007-04-22 Uros Bizjak <ubizjak@gmail.com> PR tree-optimization/24659 * optabs.h (enum optab_index) [OTI_vec_unpacks_hi, OTI_vec_unpacks_lo]: Update comment to mention floating point operands. (vec_pack_trunc_optab): Rename from vec_pack_mod_optab. * genopinit.c (optabs): Rename vec_pack_mod_optab to vec_pack_trunc_optab. * tree-vect-transform.c (vectorizable_type_demotion): Do not fail early for scalar floating point operands for NOP_EXPR. (vectorizable_type_promotion): Ditto. * optabs.c (optab_for_tree_code) [VEC_PACK_TRUNC_EXPR]: Return vec_pack_trunc_optab. (expand_binop): Rename vec_float_trunc_optab to vec_pack_mod_optab. * tree.def (VEC_PACK_TRUNC_EXPR): Rename from VEC_PACK_MOD_EXPR. * tree-pretty-print.c (dump_generic_node) [VEC_PACK_TRUNC_EXPR]: Rename from VEC_PACK_MOD_EXPR. (op_prio) [VEC_PACK_TRUNC_EXPR]: Ditto. * expr.c (expand_expr_real_1): Ditto. * tree-inline.c (estimate_num_insns_1): Ditto. * tree-vect-generic.c (expand_vector_operations_1): Ditto. * config/i386/sse.md (vec_unpacks_hi_v4sf): New expander. (vec_unpacks_lo_v4sf): Ditto. (vec_pack_trunc_v2df): Ditto. (vec_pack_trunc_v8hi): Rename from vec_pack_mod_v8hi. (vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si. (vec_pack_trunc_v2di): Rename from vec_pack_mod_v2di. * config/rs6000/altivec.md (vec_pack_trunc_v8hi): Rename from vec_pack_mod_v8hi. (vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si. * doc/c-tree.texi (Expression trees) [VEC_PACK_TRUNC_EXPR]: Rename from VEC_PACK_MOD_EXPR. This expression also represent packing of floating point operands. [VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR]: These expression also represent unpacking of floating point operands. * doc/md.texi (Standard Names) [vec_pack_trunc]: Update documentation. [vec_unpacks_hi]: Ditto. [vec_unpacks_lo]: Ditto. testsuite/ChangeLog: 2007-04-22 Uros Bizjak <ubizjak@gmail.com> PR tree-optimization/24659 * gcc.dg/vect/vect-float-extend-1.c: New test. * gcc.dg/vect/vect-float-truncate-1.c: New test. From-SVN: r124045
2007-04-22 20:45:06 +02:00 · 2007-04-22 20:45:06 +02:00 · 8115817bf2
commit 8115817bf2
parent 395a40e0e2
16 changed files with 245 additions and 123 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,46 @@
+2007-04-22  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR tree-optimization/24659
+	* optabs.h (enum optab_index) [OTI_vec_unpacks_hi,
+	OTI_vec_unpacks_lo]: Update comment to mention floating point operands.
+	(vec_pack_trunc_optab): Rename from vec_pack_mod_optab.
+	* genopinit.c (optabs): Rename vec_pack_mod_optab
+	to vec_pack_trunc_optab.
+	* tree-vect-transform.c (vectorizable_type_demotion): Do not fail
+	early for scalar floating point operands for NOP_EXPR.
+	(vectorizable_type_promotion): Ditto.
+	* optabs.c (optab_for_tree_code) [VEC_PACK_TRUNC_EXPR]: Return
+	vec_pack_trunc_optab.
+	(expand_binop): Rename vec_float_trunc_optab to vec_pack_mod_optab.
+
+	* tree.def (VEC_PACK_TRUNC_EXPR): Rename from VEC_PACK_MOD_EXPR.
+	* tree-pretty-print.c (dump_generic_node) [VEC_PACK_TRUNC_EXPR]:
+	Rename from VEC_PACK_MOD_EXPR.
+	(op_prio) [VEC_PACK_TRUNC_EXPR]: Ditto.
+	* expr.c (expand_expr_real_1): Ditto.
+	* tree-inline.c (estimate_num_insns_1): Ditto.
+	* tree-vect-generic.c (expand_vector_operations_1): Ditto.
+
+	* config/i386/sse.md (vec_unpacks_hi_v4sf): New expander.
+	(vec_unpacks_lo_v4sf): Ditto.
+	(vec_pack_trunc_v2df): Ditto.
+	(vec_pack_trunc_v8hi): Rename from vec_pack_mod_v8hi.
+	(vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si.
+	(vec_pack_trunc_v2di): Rename from vec_pack_mod_v2di.
+
+	* config/rs6000/altivec.md (vec_pack_trunc_v8hi): Rename from
+	vec_pack_mod_v8hi.
+	(vec_pack_trunc_v4si): Rename from vec_pack_mod_v4si.
+
+	* doc/c-tree.texi (Expression trees) [VEC_PACK_TRUNC_EXPR]:
+	Rename from VEC_PACK_MOD_EXPR.  This expression also represent
+	packing of floating point operands.
+	[VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR]: These expression also
+	represent unpacking of floating point operands.
+	* doc/md.texi (Standard Names) [vec_pack_trunc]: Update documentation.
+	[vec_unpacks_hi]: Ditto.
+	[vec_unpacks_lo]: Ditto.
+
 2007-04-22  Jan Hubicka  <jh@suse.cz>

 	* final.c (rest_of_handle_final): Call
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@ -2150,6 +2150,51 @@
   (set_attr "mode" "V2DF")
   (set_attr "amdfam10_decode" "direct")])

+(define_expand "vec_unpacks_hi_v4sf"
+  [(set (match_dup 2)
+   (vec_select:V4SF
+     (vec_concat:V8SF
+       (match_dup 2)
+       (match_operand:V4SF 1 "nonimmediate_operand" ""))
+     (parallel [(const_int 6)
+		(const_int 7)
+		(const_int 2)
+		(const_int 3)])))
+  (set (match_operand:V2DF 0 "register_operand" "")
+   (float_extend:V2DF
+     (vec_select:V2SF
+       (match_dup 2)
+       (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2")
+
+(define_expand "vec_pack_trunc_v2df"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V2DF 1 "nonimmediate_operand" "")
+   (match_operand:V2DF 2 "nonimmediate_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V4SFmode);
+  r2 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
+  emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
+  emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel double-precision floating point element swizzling
@ -3420,7 +3465,7 @@
 ;;       h3 = aeimquy2bfjnrvz3
 ;;       l3 = cgkosw04dhlptx15
 ;;   result = bdfhjlnprtvxz135
-(define_expand "vec_pack_mod_v8hi"
+(define_expand "vec_pack_trunc_v8hi"
  [(match_operand:V16QI 0 "register_operand" "")
   (match_operand:V8HI 1 "register_operand" "")
   (match_operand:V8HI 2 "register_operand" "")]
@ -3455,7 +3500,7 @@
 ;;       h2 = aeimbfjn
 ;;       l2 = cgkodhlp
 ;;   result = bdfhjlnp
-(define_expand "vec_pack_mod_v4si"
+(define_expand "vec_pack_trunc_v4si"
  [(match_operand:V8HI 0 "register_operand" "")
   (match_operand:V4SI 1 "register_operand" "")
   (match_operand:V4SI 2 "register_operand" "")]
@ -3484,7 +3529,7 @@
 ;;      h1 = aebf
 ;;      l1 = cgdh
 ;;  result = bdfh
-(define_expand "vec_pack_mod_v2di"
+(define_expand "vec_pack_trunc_v2di"
  [(match_operand:V4SI 0 "register_operand" "")
   (match_operand:V2DI 1 "register_operand" "")
   (match_operand:V2DI 2 "register_operand" "")]
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@ -2603,7 +2603,7 @@
  DONE;
 }")

-(define_expand "vec_pack_mod_v8hi"
+(define_expand "vec_pack_trunc_v8hi"
  [(set (match_operand:V16QI 0 "register_operand" "=v")
        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
@ -2615,7 +2615,7 @@
  DONE;
 }")
                                                                                
-(define_expand "vec_pack_mod_v4si"
+(define_expand "vec_pack_trunc_v4si"
  [(set (match_operand:V8HI 0 "register_operand" "=v")
        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
                      (match_operand:V4SI 2 "register_operand" "v")]
--- a/gcc/doc/c-tree.texi
+++ b/gcc/doc/c-tree.texi
@ -1983,7 +1983,7 @@ This macro returns the attributes on the type @var{type}.
@tindex VEC_WIDEN_MULT_LO_EXPR
@tindex VEC_UNPACK_HI_EXPR
@tindex VEC_UNPACK_LO_EXPR
-@tindex VEC_PACK_MOD_EXPR
+@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_EXTRACT_EVEN_EXPR 
@tindex VEC_EXTRACT_ODD_EXPR
@ -2837,23 +2837,30 @@ vector of @code{N/2} products.

@item VEC_UNPACK_HI_EXPR
@item VEC_UNPACK_LO_EXPR
-These nodes represent unpacking of the high and low parts of the input vector, 
+These nodes represent unpacking of the high and low parts of the input vector,
 respectively.  The single operand is a vector that contains @code{N} elements 
-of the same integral type.  The result is a vector that contains half as many 
-elements, of an integral type whose size is twice as wide.  In the case of 
-@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are 
-extracted and widened (promoted).  In the case of @code{VEC_UNPACK_LO_EXPR} the 
-low @code{N/2} elements of the vector are extracted and widened (promoted).
+of the same integral or floating point type.  The result is a vector
+that contains half as many elements, of an integral or floating point type
+whose size is twice as wide.  In the case of @code{VEC_UNPACK_HI_EXPR} the
+high @code{N/2} elements of the vector are extracted and widened (promoted).
+In the case of @code{VEC_UNPACK_LO_EXPR} the low @code{N/2} elements of the
+vector are extracted and widened (promoted).
+
+@item VEC_PACK_TRUNC_EXPR
+This node represents packing of truncated elements of the two input vectors
+into the output vector.  Input operands are vectors that contain the same
+number of elements of the same integral or floating point type.  The result
+is a vector that contains twice as many elements of an integral or floating
+point type whose size is half as wide. The elements of the two vectors are
+demoted and merged (concatenated) to form the output vector.

-@item VEC_PACK_MOD_EXPR
@item VEC_PACK_SAT_EXPR
-These nodes represent packing of elements of the two input vectors into the
-output vector, using modulo or saturating arithmetic, respectively.
-Their operands are vectors that contain the same number of elements 
-of the same integral type.  The result is a vector that contains twice as many 
-elements, of an integral type whose size is half as wide.  In both cases
-the elements of the two vectors are demoted and merged (concatenated) to form
-the output vector.
+This node represents packing of elements of the two input vectors into the
+output vector using saturation.  Input operands are vectors that contain
+the same number of elements of the same integral type.  The result is a
+vector that contains twice as many elements of an integral type whose size
+is half as wide.  The elements of the two vectors are demoted and merged
+(concatenated) to form the output vector.

@item VEC_EXTRACT_EVEN_EXPR
@item VEC_EXTRACT_ODD_EXPR
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@ -3591,35 +3591,48 @@ Operand 2 is an integer shift amount in bits.
 Operand 0 is where the resulting shifted vector is stored.
 The output and input vectors should have the same modes.

-@cindex @code{vec_pack_mod_@var{m}} instruction pattern
+@cindex @code{vec_pack_trunc_@var{m}} instruction pattern
+@item @samp{vec_pack_trunc_@var{m}}
+Narrow (demote) and merge the elements of two vectors. Operands 1 and 2
+are vectors of the same mode having N integral or floating point elements
+of size S.  Operand 0 is the resulting vector in which 2*N elements of
+size N/2 are concatenated after narrowing them down using truncation.
+
@cindex @code{vec_pack_ssat_@var{m}} instruction pattern
@cindex @code{vec_pack_usat_@var{m}} instruction pattern
-@item @samp{vec_pack_mod_@var{m}}, @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}}
-Narrow (demote) and merge the elements of two vectors.
-Operands 1 and 2 are vectors of the same mode.
+@item @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}}
+Narrow (demote) and merge the elements of two vectors.  Operands 1 and 2
+are vectors of the same mode having N integral elements of size S.
 Operand 0 is the resulting vector in which the elements of the two input
-vectors are concatenated after narrowing them down using modulo arithmetic or
-signed/unsigned saturating arithmetic.
+vectors are concatenated after narrowing them down using signed/unsigned
+saturating arithmetic.

@cindex @code{vec_unpacks_hi_@var{m}} instruction pattern
@cindex @code{vec_unpacks_lo_@var{m}} instruction pattern
+@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}
+Extract and widen (promote) the high/low part of a vector of signed
+integral or floating point elements.  The input vector (operand 1) has N
+elements of size S.  Widen (promote) the high/low elements of the vector
+using signed or floating point extension and place the resulting N/2
+values of size 2*S in the output vector (operand 0).
+
@cindex @code{vec_unpacku_hi_@var{m}} instruction pattern
@cindex @code{vec_unpacku_lo_@var{m}} instruction pattern
-@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}, @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}}
-Extract and widen (promote) the high/low part of a vector of signed/unsigned
-elements. The input vector (operand 1) has N signed/unsigned elements of size S. 
-Using sign/zero extension widen (promote) the high/low elements of the vector,
-and place the resulting N/2 values of size 2*S in the output vector (operand 0).
+@item @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}}
+Extract and widen (promote) the high/low part of a vector of unsigned
+integral elements.  The input vector (operand 1) has N elements of size S.
+Widen (promote) the high/low elements of the vector using zero extension and
+place the resulting N/2 values of size 2*S in the output vector (operand 0).

@cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern
@cindex @code{vec_widen_umult_lo__@var{m}} instruction pattern
@cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern
@cindex @code{vec_widen_smult_lo_@var{m}} instruction pattern
@item @samp{vec_widen_umult_hi_@var{m}}, @samp{vec_widen_umult_lo_@var{m}}, @samp{vec_widen_smult_hi_@var{m}}, @samp{vec_widen_smult_lo_@var{m}}
-Signed/Unsigned widening multiplication. 
-The two inputs (operands 1 and 2) are vectors with N 
-signed/unsigned elements of size S. Multiply the high/low elements of the two 
-vectors, and put the N/2 products of size 2*S in the output vector (operand 0). 
+Signed/Unsigned widening multiplication.  The two inputs (operands 1 and 2)
+are vectors with N signed/unsigned elements of size S.  Multiply the high/low
+elements of the two vectors, and put the N/2 products of size 2*S in the
+output vector (operand 0).

@cindex @code{mulhisi3} instruction pattern
@item @samp{mulhisi3}
--- a/gcc/expr.c
+++ b/gcc/expr.c
@ -8926,7 +8926,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 	return target;
      }

-    case VEC_PACK_MOD_EXPR:
+    case VEC_PACK_TRUNC_EXPR:
    case VEC_PACK_SAT_EXPR:
      {
 	mode = TYPE_MODE (TREE_TYPE (TREE_OPERAND (exp, 0)));
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@ -229,8 +229,9 @@ static const char * const optabs[] =
  "vec_unpacks_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacks_lo_$a$)",
  "vec_unpacku_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_hi_$a$)",
  "vec_unpacku_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_lo_$a$)",
-  "vec_pack_mod_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_mod_$a$)",
-  "vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)",  "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)"
+  "vec_pack_trunc_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_trunc_$a$)",
+  "vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)",
+  "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)"
 };

 static void gen_insn (rtx);
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@ -333,19 +333,19 @@ optab_for_tree_code (enum tree_code code, tree type)
 	vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;

    case VEC_UNPACK_HI_EXPR:
-      return TYPE_UNSIGNED (type) ? 
+      return TYPE_UNSIGNED (type) ?
 	vec_unpacku_hi_optab : vec_unpacks_hi_optab;

    case VEC_UNPACK_LO_EXPR:
      return TYPE_UNSIGNED (type) ? 
 	vec_unpacku_lo_optab : vec_unpacks_lo_optab;

-    case VEC_PACK_MOD_EXPR:
-      return vec_pack_mod_optab;
-                                                                                
+    case VEC_PACK_TRUNC_EXPR:
+      return vec_pack_trunc_optab;
+
    case VEC_PACK_SAT_EXPR:
      return TYPE_UNSIGNED (type) ? vec_pack_usat_optab : vec_pack_ssat_optab;
-                                                                                
+
    default:
      break;
    }
@ -1373,7 +1373,7 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	  && mode1 != VOIDmode)
 	xop1 = copy_to_mode_reg (mode1, xop1);

-      if (binoptab == vec_pack_mod_optab 
+      if (binoptab == vec_pack_trunc_optab 
 	  || binoptab == vec_pack_usat_optab
          || binoptab == vec_pack_ssat_optab)
 	{
@ -5560,7 +5560,7 @@ init_optabs (void)
  vec_unpacks_lo_optab = init_optab (UNKNOWN);
  vec_unpacku_hi_optab = init_optab (UNKNOWN);
  vec_unpacku_lo_optab = init_optab (UNKNOWN);
-  vec_pack_mod_optab = init_optab (UNKNOWN);
+  vec_pack_trunc_optab = init_optab (UNKNOWN);
  vec_pack_usat_optab = init_optab (UNKNOWN);
  vec_pack_ssat_optab = init_optab (UNKNOWN);

--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@ -278,14 +278,16 @@ enum optab_index
  OTI_vec_widen_umult_lo,
  OTI_vec_widen_smult_hi,
  OTI_vec_widen_smult_lo,
-  /* Extract and widen the high/low part of a vector of signed/unsigned 
-     elements.  */
+  /* Extract and widen the high/low part of a vector of signed or
+     floating point elements.  */
  OTI_vec_unpacks_hi,
  OTI_vec_unpacks_lo,
+  /* Extract and widen the high/low part of a vector of unsigned
+     elements.  */
  OTI_vec_unpacku_hi,
  OTI_vec_unpacku_lo,
  /* Narrow (demote) and merge the elements of two vectors.  */
-  OTI_vec_pack_mod,
+  OTI_vec_pack_trunc,
  OTI_vec_pack_usat,
  OTI_vec_pack_ssat,

@ -404,7 +406,7 @@ extern GTY(()) optab optab_table[OTI_MAX];
 #define reduc_umin_optab (optab_table[OTI_reduc_umin])
 #define reduc_splus_optab (optab_table[OTI_reduc_splus])
 #define reduc_uplus_optab (optab_table[OTI_reduc_uplus])
-                                                                                
+
 #define ssum_widen_optab (optab_table[OTI_ssum_widen])
 #define usum_widen_optab (optab_table[OTI_usum_widen])
 #define sdot_prod_optab (optab_table[OTI_sdot_prod])
@ -425,13 +427,13 @@ extern GTY(()) optab optab_table[OTI_MAX];
 #define vec_widen_smult_hi_optab (optab_table[OTI_vec_widen_smult_hi])
 #define vec_widen_smult_lo_optab (optab_table[OTI_vec_widen_smult_lo])
 #define vec_unpacks_hi_optab (optab_table[OTI_vec_unpacks_hi])
-#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi])
 #define vec_unpacks_lo_optab (optab_table[OTI_vec_unpacks_lo])
+#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi])
 #define vec_unpacku_lo_optab (optab_table[OTI_vec_unpacku_lo])
-#define vec_pack_mod_optab (optab_table[OTI_vec_pack_mod])
+#define vec_pack_trunc_optab (optab_table[OTI_vec_pack_trunc])
 #define vec_pack_ssat_optab (optab_table[OTI_vec_pack_ssat])
 #define vec_pack_usat_optab (optab_table[OTI_vec_pack_usat])
-                                                                                
+
 #define powi_optab (optab_table[OTI_powi])

 /* Conversion optabs have their own table and indexes.  */
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,9 @@
+2007-04-22  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR tree-optimization/24659
+	* gcc.dg/vect/vect-float-extend-1.c: New test.
+	* gcc.dg/vect/vect-float-truncate-1.c: New test.
+
 2007-04-22  Richard Guenther  <rguenther@suse.de>

 	PR tree-optimization/29789
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@ -2149,7 +2149,7 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
    case VEC_WIDEN_MULT_LO_EXPR:
    case VEC_UNPACK_HI_EXPR:
    case VEC_UNPACK_LO_EXPR:
-    case VEC_PACK_MOD_EXPR:
+    case VEC_PACK_TRUNC_EXPR:
    case VEC_PACK_SAT_EXPR:

    case WIDEN_MULT_EXPR:
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@ -1943,8 +1943,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
      pp_string (buffer, " > ");
      break;

-    case VEC_PACK_MOD_EXPR:
-      pp_string (buffer, " VEC_PACK_MOD_EXPR < ");
+    case VEC_PACK_TRUNC_EXPR:
+      pp_string (buffer, " VEC_PACK_TRUNC_EXPR < ");
      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
      pp_string (buffer, ", ");
      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
@ -2348,7 +2348,7 @@ op_prio (tree op)
    case VEC_RSHIFT_EXPR:
    case VEC_UNPACK_HI_EXPR:
    case VEC_UNPACK_LO_EXPR:
-    case VEC_PACK_MOD_EXPR:
+    case VEC_PACK_TRUNC_EXPR:
    case VEC_PACK_SAT_EXPR:
      return 16;

--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@ -421,7 +421,7 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
      || code == VEC_WIDEN_MULT_LO_EXPR
      || code == VEC_UNPACK_HI_EXPR
      || code == VEC_UNPACK_LO_EXPR
-      || code == VEC_PACK_MOD_EXPR
+      || code == VEC_PACK_TRUNC_EXPR
      || code == VEC_PACK_SAT_EXPR)
    type = TREE_TYPE (TREE_OPERAND (rhs, 0));

--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@ -174,7 +174,7 @@ vect_create_addr_base_for_vector_ref (tree stmt,
      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
 				 base_offset, offset);
-      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);  
+      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
      append_to_statement_list_force (new_stmt, new_stmt_list);
    }
  
@ -561,7 +561,8 @@ get_initial_def_for_induction (tree iv_phi)

  access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
  gcc_assert (access_fn);
-  ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_expr, &step_expr);
+  ok = vect_is_simple_iv_evolution (loop->num, access_fn,
+				    &init_expr, &step_expr);
  gcc_assert (ok);

  /* Create the vector that holds the initial_value of the induction.  */
@ -837,7 +838,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
   vectorized stmt to be created (by the caller to this function) is a "copy" 
   created in case the vectorized result cannot fit in one vector, and several 
   copies of the vector-stmt are required. In this case the vector-def is 
-   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 
+   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
   of the stmt that defines VEC_OPRND. 
   DT is the type of the vector def VEC_OPRND.

@ -1057,7 +1058,7 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
    
        loop:
          vec_def = phi <null, null>            # REDUCTION_PHI
-          VECT_DEF = vector_stmt                # vectorized form of STMT       
+          VECT_DEF = vector_stmt                # vectorized form of STMT
          s_loop = scalar_stmt                  # (scalar) STMT
        loop_exit:
          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
@ -1176,8 +1177,8 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
  exit_bsi = bsi_start (exit_bb);

  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 
-         (i.e. when reduc_code is not available) and in the final adjustment code
-         (if needed).  Also get the original scalar reduction variable as
+         (i.e. when reduc_code is not available) and in the final adjustment
+	 code (if needed).  Also get the original scalar reduction variable as
         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it 
         represents a reduction pattern), the tree-code and scalar-def are 
         taken from the original stmt that the pattern-stmt (STMT) replaces.  
@ -1327,7 +1328,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
 				 bitpos);
 		
 	      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
-	      epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);	
+	      epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
 	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
 	      GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
 	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
@ -1422,28 +1423,28 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
   sequence that had been detected and replaced by the pattern-stmt (STMT).
  
-   In some cases of reduction patterns, the type of the reduction variable X is 
+   In some cases of reduction patterns, the type of the reduction variable X is
   different than the type of the other arguments of STMT.
   In such cases, the vectype that is used when transforming STMT into a vector
-   stmt is different than the vectype that is used to determine the 
+   stmt is different than the vectype that is used to determine the
   vectorization factor, because it consists of a different number of elements 
   than the actual number of elements that are being operated upon in parallel.

-   For example, consider an accumulation of shorts into an int accumulator. 
+   For example, consider an accumulation of shorts into an int accumulator.
   On some targets it's possible to vectorize this pattern operating on 8
   shorts at a time (hence, the vectype for purposes of determining the
   vectorization factor should be V8HI); on the other hand, the vectype that
-   is used to create the vector form is actually V4SI (the type of the result). 
+   is used to create the vector form is actually V4SI (the type of the result).

-   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that 
-   indicates what is the actual level of parallelism (V8HI in the example), so 
-   that the right vectorization factor would be derived. This vectype 
-   corresponds to the type of arguments to the reduction stmt, and should *NOT* 
+   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
+   indicates what is the actual level of parallelism (V8HI in the example), so
+   that the right vectorization factor would be derived. This vectype
+   corresponds to the type of arguments to the reduction stmt, and should *NOT*
   be used to create the vectorized stmt. The right vectype for the vectorized
-   stmt is obtained from the type of the result X: 
+   stmt is obtained from the type of the result X:
        get_vectype_for_scalar_type (TREE_TYPE (X))

-   This means that, contrary to "regular" reductions (or "regular" stmts in 
+   This means that, contrary to "regular" reductions (or "regular" stmts in
   general), the following equation:
      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
   does *NOT* necessarily hold for reduction patterns.  */
@ -1687,7 +1688,7 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
              op = TREE_OPERAND (operation, 1);
              loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
            }
-                                                                                
+
          /* Get the vector def for the reduction variable from the phi node */
          reduc_def = PHI_RESULT (new_phi);
        }
@ -1697,34 +1698,33 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
          loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
          if (op_type == ternary_op)
            loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
-                                                                                
+
          /* Get the vector def for the reduction variable from the vectorized
             reduction operation generated in the previous iteration (j-1)  */
          reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
        }
-                                                                                
+
      /* Arguments are ready. create the new vector stmt.  */
-                                                                                
      if (op_type == binary_op)
        expr = build2 (code, vectype, loop_vec_def0, reduc_def);
      else
        expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1, 
-								reduc_def);
+		       reduc_def);
      new_stmt = build_gimple_modify_stmt (vec_dest, expr);
      new_temp = make_ssa_name (vec_dest, new_stmt);
      GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
-                                                                                
+
      if (j == 0)
 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
      else
 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
      prev_stmt_info = vinfo_for_stmt (new_stmt);
    }
-                                                                                
+
  /* Finalize the reduction-phi (set it's arguments) and create the
     epilog reduction code.  */
-  vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);                                                                                
+  vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
  return true;
 }

@ -2329,7 +2329,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
        fprintf (vect_dump, "use not simple.");
      return false;
    }
-                                                                                
+
  if (op_type == binary_op)
    {
      op1 = TREE_OPERAND (operation, 1);
@ -2415,12 +2415,12 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
     stmts that use the defs of the current stmt. The example below illustrates
     the vectorization process when VF=16 and nunits=4 (i.e - we need to create
     4 vectorized stmts):
-                                                                                
+
     before vectorization:
                                RELATED_STMT    VEC_STMT
        S1:     x = memref      -               -
        S2:     z = x + 1       -               -
-                                                                                
+
     step 1: vectorize stmt S1 (done in vectorizable_load. See more details
             there):
                                RELATED_STMT    VEC_STMT
@ -2430,7 +2430,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
        VS1_3:  vx3 = memref3   -               -
        S1:     x = load        -               VS1_0
        S2:     z = x + 1       -               -
-                                                                                
+
     step2: vectorize stmt S2 (done here):
        To vectorize stmt S2 we first need to find the relevant vector
        def for the first operand 'x'. This is, as usual, obtained from
@ -2457,7 +2457,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
        VS2_2:  vz2 = vx2 + v1  VS2_3           -
        VS2_3:  vz3 = vx3 + v1  -               -
        S2:     z = x + 1       -               VS2_0  */
-                                                                                
+
  prev_stmt_info = NULL;
  for (j = 0; j < ncopies; j++)
    {
@ -2493,7 +2493,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 	}

      /* Arguments are ready. create the new vector stmt.  */
-                                                                                
+
      if (op_type == binary_op)
        new_stmt = build_gimple_modify_stmt (vec_dest,
                    build2 (code, vectype, vec_oprnd0, vec_oprnd1));
@ -2503,7 +2503,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      new_temp = make_ssa_name (vec_dest, new_stmt);
      GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
-                                                                                
+
      if (j == 0)
 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
      else
@ -2516,13 +2516,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)


 /* Function vectorizable_type_demotion
-                                                                                
+
   Check if STMT performs a binary or unary operation that involves
   type demotion, and if it can be vectorized.
   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
-                                                                                
+
 bool
 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
                             tree *vec_stmt)
@ -2550,7 +2550,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
  tree scalar_type;
  optab optab;
  enum machine_mode vec_mode;
-                                                                                
+
  if (!STMT_VINFO_RELEVANT_P (stmt_info))
    return false;

@ -2564,37 +2564,40 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
        fprintf (vect_dump, "value used after loop.");
      return false;
    }
-                                                                                
+
  /* Is STMT a vectorizable type-demotion operation?  */
  if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
    return false;
-                                                                                
+
  if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
    return false;
-                                                                                
+
  operation = GIMPLE_STMT_OPERAND (stmt, 1);
  code = TREE_CODE (operation);
  if (code != NOP_EXPR && code != CONVERT_EXPR)
    return false;
-                                                                                
+
  op0 = TREE_OPERAND (operation, 0);
  vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
-                                                                                
+
  scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
  scalar_type = TREE_TYPE (scalar_dest);
  vectype_out = get_vectype_for_scalar_type (scalar_type);
  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
  if (nunits_in != nunits_out / 2) /* FORNOW */
    return false;
-                                                                                
+
  ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
  gcc_assert (ncopies >= 1);

-  if (! INTEGRAL_TYPE_P (scalar_type)
-      || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+  if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
+	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
+	     && (code == NOP_EXPR || code == CONVERT_EXPR))))
    return false;
-                                                                                
+
  /* Check the operands of the operation.  */
  if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
    {
@ -2602,31 +2605,30 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
        fprintf (vect_dump, "use not simple.");
      return false;
    }
-                                                                                
+
  /* Supportable by target?  */
-  code = VEC_PACK_MOD_EXPR;
-  optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
+  code = VEC_PACK_TRUNC_EXPR;
+  optab = optab_for_tree_code (code, vectype_in);
  if (!optab)
    return false;
-                                                                                
+
  vec_mode = TYPE_MODE (vectype_in);
  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
    return false;
-                                                                                
+
  STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
-                                                                                
+
  if (!vec_stmt) /* transformation not required.  */
    {
      STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
      return true;
    }
-                                                                                
+
  /** Transform.  **/
-                                                                                
  if (vect_print_dump_info (REPORT_DETAILS))
    fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
-                        ncopies);
-                                                                                
+	     ncopies);
+
  /* Handle def.  */
  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
  
@ -2648,22 +2650,22 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
 	  vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
 	  vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
 	}
-                                                                                
+
      /* Arguments are ready. Create the new vector stmt.  */
      expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
      new_stmt = build_gimple_modify_stmt (vec_dest, expr);
      new_temp = make_ssa_name (vec_dest, new_stmt);
      GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
      vect_finish_stmt_generation (stmt, new_stmt, bsi);
-                                                                                
+
      if (j == 0)
 	STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
      else
 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
-                                                                                
+
      prev_stmt_info = vinfo_for_stmt (new_stmt);
    }
-                                                                                
+
  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
  return true;
 }
@ -2798,8 +2800,11 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
  if (nunits_out != nunits_in / 2) /* FORNOW */
    return false;

-  if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
-      || !INTEGRAL_TYPE_P (TREE_TYPE (op0))) 
+  if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
+	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
+	     && (code == CONVERT_EXPR || code == NOP_EXPR))))
    return false;

  /* Check the operands of the operation.  */
@ -3268,7 +3273,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 	  /* For interleaved stores we created vectorized defs for all the 
 	     defs stored in OPRNDS in the previous iteration (previous copy). 
 	     DR_CHAIN is then used as an input to vect_permute_store_chain(), 
-	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 
+	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
 	     next copy.
 	     If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
 	     OPRNDS are of size 1.  */
@ -4286,7 +4291,7 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
      done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
      gcc_assert (done);
      break;
-                                                                                
+
    case type_promotion_vec_info_type:
      done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
      gcc_assert (done);
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@ -1773,7 +1773,7 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
  tree wide_vectype = get_vectype_for_scalar_type (type);
  enum tree_code c1, c2;

-  /* The result of a vectorized widening operation usually requires two vectors 
+  /* The result of a vectorized widening operation usually requires two vectors
     (because the widened results do not fit int one vector). The generated 
     vector results would normally be expected to be generated in the same 
     order as in the original scalar computation. i.e. if 8 results are 
--- a/gcc/tree.def
+++ b/gcc/tree.def
@ -1093,12 +1093,12 @@ DEFTREECODE (VEC_UNPACK_HI_EXPR, "vec_unpack_hi_expr", tcc_unary, 1)
 DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_unpack_lo_expr", tcc_unary, 1)

 /* Pack (demote/narrow and merge) the elements of the two input vectors
-   into the output vector, using modulo/saturating arithmetic.
+   into the output vector using truncation/saturation.
   The elements of the input vectors are twice the size of the elements of the
   output vector.  This is used to support type demotion.  */
-DEFTREECODE (VEC_PACK_MOD_EXPR, "vec_pack_mod_expr", tcc_binary, 2)
+DEFTREECODE (VEC_PACK_TRUNC_EXPR, "vec_pack_trunc_expr", tcc_binary, 2)
 DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2)
-                                                                                
+
 /* Extract even/odd fields from vectors.  */
 DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extracteven_expr", tcc_binary, 2)
 DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extractodd_expr", tcc_binary, 2)