tree-vectorizer.h (vectorizable_function): Add argument type argument, change return type.

2007-02-05  Richard Guenther  <rguenther@suse.de>

	* tree-vectorizer.h (vectorizable_function): Add argument type
	argument, change return type.
	* tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller.
	* tree-vect-transform.c (vectorizable_function): Handle extra
	argument, return vectorized function decl.
	(build_vectorized_function_call): Remove.
	(vectorizable_call): Handle calls with result and argument types
	differing.  Handle loop vectorization factor correctly.
	* targhooks.c (default_builtin_vectorized_function): Adjust for
	extra argument.
	* targhooks.h (default_builtin_vectorized_function): Likewise.
	* target.h (builtin_vectorized_function): Add argument type
        argument.
	* config/i386/i386.c (ix86_builtin_vectorized_function): Handle
	extra argument, allow vectorizing of lrintf.
	* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust
	documentation of target hook.

	* gcc.target/i386/vectorize3.c: New testcase.

From-SVN: r121617
This commit is contained in:
Richard Guenther 2007-02-05 21:38:53 +00:00 committed by Richard Biener
parent d6df67efcd
commit b95becfc09
11 changed files with 191 additions and 84 deletions

View File

@ -1,3 +1,23 @@
2007-02-05 Richard Guenther <rguenther@suse.de>
* tree-vectorizer.h (vectorizable_function): Add argument type
argument, change return type.
* tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller.
* tree-vect-transform.c (vectorizable_function): Handle extra
argument, return vectorized function decl.
(build_vectorized_function_call): Remove.
(vectorizable_call): Handle calls with result and argument types
differing. Handle loop vectorization factor correctly.
* targhooks.c (default_builtin_vectorized_function): Adjust for
extra argument.
* targhooks.h (default_builtin_vectorized_function): Likewise.
* target.h (builtin_vectorized_function): Add argument type
argument.
* config/i386/i386.c (ix86_builtin_vectorized_function): Handle
extra argument, allow vectorizing of lrintf.
* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust
documentation of target hook.
2007-02-05 Hans-Peter Nilsson <hp@axis.com>
PR target/30665

View File

@ -1355,7 +1355,7 @@ static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
static void ix86_init_builtins (void);
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
static const char *ix86_mangle_fundamental_type (tree);
static tree ix86_stack_protect_fail (void);
static rtx ix86_internal_arg_pointer (void);
@ -17661,29 +17661,41 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if it is not available. */
static tree
ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
tree type_in)
{
enum machine_mode el_mode;
int n;
enum machine_mode in_mode, out_mode;
int in_n, out_n;
if (TREE_CODE (type) != VECTOR_TYPE)
if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;
el_mode = TYPE_MODE (TREE_TYPE (type));
n = TYPE_VECTOR_SUBPARTS (type);
out_mode = TYPE_MODE (TREE_TYPE (type_out));
out_n = TYPE_VECTOR_SUBPARTS (type_out);
in_mode = TYPE_MODE (TREE_TYPE (type_in));
in_n = TYPE_VECTOR_SUBPARTS (type_in);
switch (fn)
{
case BUILT_IN_SQRT:
if (el_mode == DFmode && n == 2)
if (out_mode == DFmode && out_n == 2
&& in_mode == DFmode && in_n == 2)
return ix86_builtins[IX86_BUILTIN_SQRTPD];
return NULL_TREE;
case BUILT_IN_SQRTF:
if (el_mode == SFmode && n == 4)
if (out_mode == SFmode && out_n == 4
&& in_mode == SFmode && in_n == 4)
return ix86_builtins[IX86_BUILTIN_SQRTPS];
return NULL_TREE;
case BUILT_IN_LRINTF:
if (out_mode == SImode && out_n == 4
&& in_mode == SFmode && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
return NULL_TREE;
default:
;
}

View File

@ -5384,11 +5384,12 @@ preserved (e.g. used only by a reduction computation). Otherwise, the
@code{widen_mult_hi/lo} idioms will be used.
@end deftypefn
@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type})
@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in})
This hook should return the decl of a function that implements the vectorized
variant of the builtin function with builtin function code @var{code} or
@code{NULL_TREE} if such a function is not available. The return type of
the vectorized function shall be of vector type @var{vec_type}.
the vectorized function shall be of vector type @var{vec_type_out} and the
argument types should be @var{vec_type_in}.
@end deftypefn
@node Anchored Addresses

View File

@ -395,7 +395,7 @@ struct gcc_target
/* Returns a code for builtin that realizes vectorized version of
function, or NULL_TREE if not available. */
tree (* builtin_vectorized_function) (unsigned, tree);
tree (* builtin_vectorized_function) (unsigned, tree, tree);
/* Target builtin that implements vector widening multiplication.
builtin_mul_widen_eve computes the element-by-element products

View File

@ -323,7 +323,8 @@ default_invalid_within_doloop (rtx insn)
tree
default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
tree type ATTRIBUTE_UNUSED)
tree type_out ATTRIBUTE_UNUSED,
tree type_in ATTRIBUTE_UNUSED)
{
return NULL_TREE;
}

View File

@ -57,7 +57,7 @@ extern const char * default_invalid_within_doloop (rtx);
extern bool default_narrow_bitfield (void);
extern tree default_builtin_vectorized_function (enum built_in_function, tree);
extern tree default_builtin_vectorized_function (enum built_in_function, tree, tree);
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */

View File

@ -1,3 +1,7 @@
2007-02-05 Richard Guenther <rguenther@suse.de>
* gcc.target/i386/vectorize3.c: New testcase.
2007-02-05 Hans-Peter Nilsson <hp@axis.com>
PR target/30665

View File

@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse" } */
float a[256];
int b[256];
unsigned short c[256];
extern long lrintf (float);
void foo(void)
{
int i;
for (i=0; i<256; ++i)
b[i] = lrintf (a[i]);
}
void bar(void)
{
int i;
for (i=0; i<256; ++i)
{
b[i] = lrintf (a[i]);
c[i] += c[i];
}
}
/* { dg-final { scan-assembler "cvtps2dq" } } */

View File

@ -488,7 +488,7 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out)
if (*type_in)
{
newfn = build_function_call_expr (newfn, newarglist);
if (vectorizable_function (newfn, *type_in))
if (vectorizable_function (newfn, *type_in, *type_in) != NULL_TREE)
return newfn;
}
}

View File

@ -1579,47 +1579,28 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
}
/* Checks if CALL can be vectorized in type VECTYPE. Returns
true if the target has a vectorized version of the function,
or false if the function cannot be vectorized. */
a function declaration if the target has a vectorized version
of the function, or NULL_TREE if the function cannot be vectorized. */
bool
vectorizable_function (tree call, tree vectype)
tree
vectorizable_function (tree call, tree vectype_out, tree vectype_in)
{
tree fndecl = get_callee_fndecl (call);
enum built_in_function code;
/* We only handle functions that do not read or clobber memory -- i.e.
const or novops ones. */
if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
return false;
return NULL_TREE;
if (!fndecl
|| TREE_CODE (fndecl) != FUNCTION_DECL
|| !DECL_BUILT_IN (fndecl))
return false;
return NULL_TREE;
if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
return true;
return false;
}
/* Returns an expression that performs a call to vectorized version
of FNDECL in type VECTYPE, with the arguments given by ARGS.
If extra statements need to be generated, they are inserted
before BSI. */
static tree
build_vectorized_function_call (tree fndecl,
tree vectype, tree args)
{
tree vfndecl;
enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
/* The target specific builtin should be available. */
vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
gcc_assert (vfndecl != NULL_TREE);
return build_function_call_expr (vfndecl, args);
code = DECL_FUNCTION_CODE (fndecl);
return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
vectype_in);
}
/* Function vectorizable_call.
@ -1635,13 +1616,13 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree vec_dest;
tree scalar_dest;
tree operation;
tree op, args, type;
tree vec_oprnd, vargs, *pvargs_end;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree args, type;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
tree vectype_out, vectype_in;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
tree fndecl, rhs, new_temp, def, def_stmt;
enum vect_def_type dt;
tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
enum vect_def_type dt[2];
int ncopies, j, nargs;
/* Is STMT a vectorizable call? */
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
@ -1653,25 +1634,31 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
operation = GIMPLE_STMT_OPERAND (stmt, 1);
if (TREE_CODE (operation) != CALL_EXPR)
return false;
/* For now, we only vectorize functions if a target specific builtin
is available. TODO -- in some cases, it might be profitable to
insert the calls for pieces of the vector, in order to be able
to vectorize other operations in the loop. */
if (!vectorizable_function (operation, vectype))
/* Process function arguments. */
rhs_type = NULL_TREE;
for (args = TREE_OPERAND (operation, 1), nargs = 0;
args; args = TREE_CHAIN (args), ++nargs)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "function is not vectorizable.");
tree op = TREE_VALUE (args);
return false;
}
gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
/* Bail out if the function has more than two arguments, we
do not have interesting builtin functions to vectorize with
more than two arguments. */
if (nargs >= 2)
return false;
for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
{
op = TREE_VALUE (args);
/* We can only handle calls with arguments of the same type. */
if (rhs_type
&& rhs_type != TREE_TYPE (op))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "argument types differ.");
return false;
}
rhs_type = TREE_TYPE (op);
if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
@ -1679,6 +1666,37 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
}
}
/* No arguments is also not good. */
if (nargs == 0)
return false;
vectype_in = get_vectype_for_scalar_type (rhs_type);
lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
vectype_out = get_vectype_for_scalar_type (lhs_type);
/* Only handle the case of vectors with the same number of elements.
FIXME: We need a way to handle for example the SSE2 cvtpd2dq
instruction which converts V2DFmode to V4SImode but only
using the lower half of the V4SImode result. */
if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
return false;
/* For now, we only vectorize functions if a target specific builtin
is available. TODO -- in some cases, it might be profitable to
insert the calls for pieces of the vector, in order to be able
to vectorize other operations in the loop. */
fndecl = vectorizable_function (operation, vectype_out, vectype_in);
if (fndecl == NULL_TREE)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "function is not vectorizable.");
return false;
}
gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
@ -1690,30 +1708,51 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
/ TYPE_VECTOR_SUBPARTS (vectype_out));
gcc_assert (ncopies >= 1);
/* Handle def. */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vec_dest = vect_create_destination_var (scalar_dest, vectype);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
/* Handle uses. */
vargs = NULL_TREE;
pvargs_end = &vargs;
for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
prev_stmt_info = NULL;
for (j = 0; j < ncopies; ++j)
{
op = TREE_VALUE (args);
vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
*pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
pvargs_end = &TREE_CHAIN (*pvargs_end);
tree new_stmt, vargs;
tree vec_oprnd[2];
int n;
/* Build argument list for the vectorized call. */
vargs = NULL_TREE;
for (args = TREE_OPERAND (operation, 1), n = 0;
args; args = TREE_CHAIN (args), ++n)
{
tree op = TREE_VALUE (args);
if (j == 0)
vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
else
vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
}
vargs = nreverse (vargs);
rhs = build_function_call_expr (fndecl, vargs);
new_stmt = build2 (GIMPLE_MODIFY_STMT, NULL_TREE, vec_dest, rhs);
new_temp = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
fndecl = get_callee_fndecl (operation);
rhs = build_vectorized_function_call (fndecl, vectype, vargs);
*vec_stmt = build2 (GIMPLE_MODIFY_STMT, vectype, vec_dest, rhs);
new_temp = make_ssa_name (vec_dest, *vec_stmt);
GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
/* The call in STMT might prevent it from being removed in dce. We however
cannot remove it here, due to the way the ssa name it defines is mapped
to the new definition. So just replace rhs of the statement with something

View File

@ -412,7 +412,7 @@ extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_function (tree, tree);
extern tree vectorizable_function (tree, tree, tree);
extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);