From b95becfc098b8853f95c5302657c0b46ff575cb3 Mon Sep 17 00:00:00 2001 From: Richard Guenther Date: Mon, 5 Feb 2007 21:38:53 +0000 Subject: [PATCH] tree-vectorizer.h (vectorizable_function): Add argument type argument, change return type. 2007-02-05 Richard Guenther * tree-vectorizer.h (vectorizable_function): Add argument type argument, change return type. * tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller. * tree-vect-transform.c (vectorizable_function): Handle extra argument, return vectorized function decl. (build_vectorized_function_call): Remove. (vectorizable_call): Handle calls with result and argument types differing. Handle loop vectorization factor correctly. * targhooks.c (default_builtin_vectorized_function): Adjust for extra argument. * targhooks.h (default_builtin_vectorized_function): Likewise. * target.h (builtin_vectorized_function): Add argument type argument. * config/i386/i386.c (ix86_builtin_vectorized_function): Handle extra argument, allow vectorizing of lrintf. * doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust documentation of target hook. * gcc.target/i386/vectorize3.c: New testcase. From-SVN: r121617 --- gcc/ChangeLog | 20 +++ gcc/config/i386/i386.c | 30 ++-- gcc/doc/tm.texi | 5 +- gcc/target.h | 2 +- gcc/targhooks.c | 3 +- gcc/targhooks.h | 2 +- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/i386/vectorize3.c | 30 ++++ gcc/tree-vect-patterns.c | 2 +- gcc/tree-vect-transform.c | 175 +++++++++++++-------- gcc/tree-vectorizer.h | 2 +- 11 files changed, 191 insertions(+), 84 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vectorize3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e2b0f58fff6..00483ff65c3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2007-02-05 Richard Guenther + + * tree-vectorizer.h (vectorizable_function): Add argument type + argument, change return type. + * tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller. + * tree-vect-transform.c (vectorizable_function): Handle extra + argument, return vectorized function decl. + (build_vectorized_function_call): Remove. + (vectorizable_call): Handle calls with result and argument types + differing. Handle loop vectorization factor correctly. + * targhooks.c (default_builtin_vectorized_function): Adjust for + extra argument. + * targhooks.h (default_builtin_vectorized_function): Likewise. + * target.h (builtin_vectorized_function): Add argument type + argument. + * config/i386/i386.c (ix86_builtin_vectorized_function): Handle + extra argument, allow vectorizing of lrintf. + * doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust + documentation of target hook. + 2007-02-05 Hans-Peter Nilsson PR target/30665 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e652d7ef341..ea1284960ad 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1355,7 +1355,7 @@ static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); static void ix86_init_builtins (void); static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); -static tree ix86_builtin_vectorized_function (enum built_in_function, tree); +static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree); static const char *ix86_mangle_fundamental_type (tree); static tree ix86_stack_protect_fail (void); static rtx ix86_internal_arg_pointer (void); @@ -17661,29 +17661,41 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if it is not available. */ static tree -ix86_builtin_vectorized_function (enum built_in_function fn, tree type) +ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out, + tree type_in) { - enum machine_mode el_mode; - int n; + enum machine_mode in_mode, out_mode; + int in_n, out_n; - if (TREE_CODE (type) != VECTOR_TYPE) + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; - el_mode = TYPE_MODE (TREE_TYPE (type)); - n = TYPE_VECTOR_SUBPARTS (type); + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); switch (fn) { case BUILT_IN_SQRT: - if (el_mode == DFmode && n == 2) + if (out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) return ix86_builtins[IX86_BUILTIN_SQRTPD]; return NULL_TREE; case BUILT_IN_SQRTF: - if (el_mode == SFmode && n == 4) + if (out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) return ix86_builtins[IX86_BUILTIN_SQRTPS]; return NULL_TREE; + case BUILT_IN_LRINTF: + if (out_mode == SImode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return ix86_builtins[IX86_BUILTIN_CVTPS2DQ]; + return NULL_TREE; + default: ; } diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c9f6cb89d47..ebb3f2c871b 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5384,11 +5384,12 @@ preserved (e.g. used only by a reduction computation). Otherwise, the @code{widen_mult_hi/lo} idioms will be used. @end deftypefn -@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type}) +@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in}) This hook should return the decl of a function that implements the vectorized variant of the builtin function with builtin function code @var{code} or @code{NULL_TREE} if such a function is not available. The return type of -the vectorized function shall be of vector type @var{vec_type}. +the vectorized function shall be of vector type @var{vec_type_out} and the +argument types should be @var{vec_type_in}. @end deftypefn @node Anchored Addresses diff --git a/gcc/target.h b/gcc/target.h index f4678e44bc4..2d8d69bed64 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -395,7 +395,7 @@ struct gcc_target /* Returns a code for builtin that realizes vectorized version of function, or NULL_TREE if not available. */ - tree (* builtin_vectorized_function) (unsigned, tree); + tree (* builtin_vectorized_function) (unsigned, tree, tree); /* Target builtin that implements vector widening multiplication. builtin_mul_widen_eve computes the element-by-element products diff --git a/gcc/targhooks.c b/gcc/targhooks.c index e7bdf0b3cbc..ed4d890d2cf 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -323,7 +323,8 @@ default_invalid_within_doloop (rtx insn) tree default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED, - tree type ATTRIBUTE_UNUSED) + tree type_out ATTRIBUTE_UNUSED, + tree type_in ATTRIBUTE_UNUSED) { return NULL_TREE; } diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 5f63dd77f6f..0b4ded93c45 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -57,7 +57,7 @@ extern const char * default_invalid_within_doloop (rtx); extern bool default_narrow_bitfield (void); -extern tree default_builtin_vectorized_function (enum built_in_function, tree); +extern tree default_builtin_vectorized_function (enum built_in_function, tree, tree); /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d6f14ca147c..2b88029caf4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-02-05 Richard Guenther + + * gcc.target/i386/vectorize3.c: New testcase. + 2007-02-05 Hans-Peter Nilsson PR target/30665 diff --git a/gcc/testsuite/gcc.target/i386/vectorize3.c b/gcc/testsuite/gcc.target/i386/vectorize3.c new file mode 100644 index 00000000000..2947acbafda --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vectorize3.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse" } */ + +float a[256]; +int b[256]; +unsigned short c[256]; + +extern long lrintf (float); + +void foo(void) +{ + int i; + + for (i=0; i<256; ++i) + b[i] = lrintf (a[i]); +} + +void bar(void) +{ + int i; + + for (i=0; i<256; ++i) + { + b[i] = lrintf (a[i]); + c[i] += c[i]; + } +} + +/* { dg-final { scan-assembler "cvtps2dq" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index f9d3de6d4eb..c9e34e3e419 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -488,7 +488,7 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out) if (*type_in) { newfn = build_function_call_expr (newfn, newarglist); - if (vectorizable_function (newfn, *type_in)) + if (vectorizable_function (newfn, *type_in, *type_in) != NULL_TREE) return newfn; } } diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 846d52bf90c..fc95e6090aa 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -1579,47 +1579,28 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } /* Checks if CALL can be vectorized in type VECTYPE. Returns - true if the target has a vectorized version of the function, - or false if the function cannot be vectorized. */ + a function declaration if the target has a vectorized version + of the function, or NULL_TREE if the function cannot be vectorized. */ -bool -vectorizable_function (tree call, tree vectype) +tree +vectorizable_function (tree call, tree vectype_out, tree vectype_in) { tree fndecl = get_callee_fndecl (call); + enum built_in_function code; /* We only handle functions that do not read or clobber memory -- i.e. const or novops ones. */ if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS))) - return false; + return NULL_TREE; if (!fndecl || TREE_CODE (fndecl) != FUNCTION_DECL || !DECL_BUILT_IN (fndecl)) - return false; + return NULL_TREE; - if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype)) - return true; - - return false; -} - -/* Returns an expression that performs a call to vectorized version - of FNDECL in type VECTYPE, with the arguments given by ARGS. - If extra statements need to be generated, they are inserted - before BSI. */ - -static tree -build_vectorized_function_call (tree fndecl, - tree vectype, tree args) -{ - tree vfndecl; - enum built_in_function code = DECL_FUNCTION_CODE (fndecl); - - /* The target specific builtin should be available. */ - vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype); - gcc_assert (vfndecl != NULL_TREE); - - return build_function_call_expr (vfndecl, args); + code = DECL_FUNCTION_CODE (fndecl); + return targetm.vectorize.builtin_vectorized_function (code, vectype_out, + vectype_in); } /* Function vectorizable_call. @@ -1635,13 +1616,13 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) tree vec_dest; tree scalar_dest; tree operation; - tree op, args, type; - tree vec_oprnd, vargs, *pvargs_end; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree args, type; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; + tree vectype_out, vectype_in; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - tree fndecl, rhs, new_temp, def, def_stmt; - enum vect_def_type dt; + tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; + enum vect_def_type dt[2]; + int ncopies, j, nargs; /* Is STMT a vectorizable call? */ if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) @@ -1653,25 +1634,31 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) operation = GIMPLE_STMT_OPERAND (stmt, 1); if (TREE_CODE (operation) != CALL_EXPR) return false; - - /* For now, we only vectorize functions if a target specific builtin - is available. TODO -- in some cases, it might be profitable to - insert the calls for pieces of the vector, in order to be able - to vectorize other operations in the loop. */ - if (!vectorizable_function (operation, vectype)) + + /* Process function arguments. */ + rhs_type = NULL_TREE; + for (args = TREE_OPERAND (operation, 1), nargs = 0; + args; args = TREE_CHAIN (args), ++nargs) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "function is not vectorizable."); + tree op = TREE_VALUE (args); - return false; - } - gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)); + /* Bail out if the function has more than two arguments, we + do not have interesting builtin functions to vectorize with + more than two arguments. */ + if (nargs >= 2) + return false; - for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args)) - { - op = TREE_VALUE (args); + /* We can only handle calls with arguments of the same type. */ + if (rhs_type + && rhs_type != TREE_TYPE (op)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "argument types differ."); + return false; + } + rhs_type = TREE_TYPE (op); - if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) + if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -1679,6 +1666,37 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } } + /* No arguments is also not good. */ + if (nargs == 0) + return false; + + vectype_in = get_vectype_for_scalar_type (rhs_type); + + lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0)); + vectype_out = get_vectype_for_scalar_type (lhs_type); + + /* Only handle the case of vectors with the same number of elements. + FIXME: We need a way to handle for example the SSE2 cvtpd2dq + instruction which converts V2DFmode to V4SImode but only + using the lower half of the V4SImode result. */ + if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out)) + return false; + + /* For now, we only vectorize functions if a target specific builtin + is available. TODO -- in some cases, it might be profitable to + insert the calls for pieces of the vector, in order to be able + to vectorize other operations in the loop. */ + fndecl = vectorizable_function (operation, vectype_out, vectype_in); + if (fndecl == NULL_TREE) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "function is not vectorizable."); + + return false; + } + + gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)); + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; @@ -1690,30 +1708,51 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform operation."); + ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo) + / TYPE_VECTOR_SUBPARTS (vectype_out)); + gcc_assert (ncopies >= 1); + /* Handle def. */ scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); - vec_dest = vect_create_destination_var (scalar_dest, vectype); + vec_dest = vect_create_destination_var (scalar_dest, vectype_out); - /* Handle uses. */ - vargs = NULL_TREE; - pvargs_end = &vargs; - for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args)) + prev_stmt_info = NULL; + for (j = 0; j < ncopies; ++j) { - op = TREE_VALUE (args); - vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); - - *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE); - pvargs_end = &TREE_CHAIN (*pvargs_end); + tree new_stmt, vargs; + tree vec_oprnd[2]; + int n; + + /* Build argument list for the vectorized call. */ + vargs = NULL_TREE; + for (args = TREE_OPERAND (operation, 1), n = 0; + args; args = TREE_CHAIN (args), ++n) + { + tree op = TREE_VALUE (args); + + if (j == 0) + vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL); + else + vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]); + + vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs); + } + vargs = nreverse (vargs); + + rhs = build_function_call_expr (fndecl, vargs); + new_stmt = build2 (GIMPLE_MODIFY_STMT, NULL_TREE, vec_dest, rhs); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + + vect_finish_stmt_generation (stmt, new_stmt, bsi); + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); } - fndecl = get_callee_fndecl (operation); - rhs = build_vectorized_function_call (fndecl, vectype, vargs); - *vec_stmt = build2 (GIMPLE_MODIFY_STMT, vectype, vec_dest, rhs); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp; - - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); - /* The call in STMT might prevent it from being removed in dce. We however cannot remove it here, due to the way the ssa name it defines is mapped to the new definition. So just replace rhs of the statement with something diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index a13ee1e5c69..b523d880bfd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -412,7 +412,7 @@ extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *); extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *); extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *); extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); -extern bool vectorizable_function (tree, tree); +extern tree vectorizable_function (tree, tree, tree); extern bool vectorizable_call (tree, block_stmt_iterator *, tree *); extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *); extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);