[Vectorizer] Make REDUC_xxx_EXPR tree codes produce a scalar result

PR tree-optimization/61114
	* expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add
	extract_bit_field around optab result.

	* fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce
	scalar not vector.

	* tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type
	for REDUC_{MIN,MAX,PLUS}_EXPR.

	* tree-vect-loop.c (vect_analyze_loop): Update comment.
	(vect_create_epilog_for_reduction): For direct vector reduction, use
	result of tree code directly without extract_bit_field.

	* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update
	comment.

From-SVN: r216736
This commit is contained in:
Alan Lawrence 2014-10-27 14:04:43 +00:00 committed by Alan Lawrence
parent 60393bbc61
commit 99f76d9bac
6 changed files with 70 additions and 20 deletions

View File

@ -1,3 +1,22 @@
2014-10-27 Alan Lawrence <alan.lawrence@arm.com>
PR tree-optimization/61114
* expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add
extract_bit_field around optab result.
* fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce
scalar not vector.
* tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type
for REDUC_{MIN,MAX,PLUS}_EXPR.
* tree-vect-loop.c (vect_analyze_loop): Update comment.
(vect_create_epilog_for_reduction): For direct vector reduction, use
result of tree code directly without extract_bit_field.
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update
comment.
2014-10-27 Andrew MacLeod <amacleod@redhat.com>
* basic-block.h: Remove all includes.

View File

@ -9051,7 +9051,17 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
{
op0 = expand_normal (treeop0);
this_optab = optab_for_tree_code (code, type, optab_default);
temp = expand_unop (mode, this_optab, op0, target, unsignedp);
enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp);
gcc_assert (temp);
/* The tree code produces a scalar result, but (somewhat by convention)
the optab produces a vector with the result in element 0 if
little-endian, or element N-1 if big-endian. So pull the scalar
result out of that element. */
int index = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (vec_mode) - 1 : 0;
int bitsize = GET_MODE_BITSIZE (GET_MODE_INNER (vec_mode));
temp = extract_bit_field (temp, bitsize, bitsize * index, unsignedp,
target, mode, mode);
gcc_assert (temp);
return temp;
}

View File

@ -8259,12 +8259,13 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
case REDUC_MAX_EXPR:
case REDUC_PLUS_EXPR:
{
unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
unsigned int nelts, i;
tree *elts;
enum tree_code subcode;
if (TREE_CODE (op0) != VECTOR_CST)
return NULL_TREE;
nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0));
elts = XALLOCAVEC (tree, nelts);
if (!vec_cst_ctor_to_array (op0, elts))
@ -8283,10 +8284,9 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
elts[0] = const_binop (subcode, elts[0], elts[i]);
if (elts[0] == NULL_TREE || !CONSTANT_CLASS_P (elts[0]))
return NULL_TREE;
elts[i] = build_zero_cst (TREE_TYPE (type));
}
return build_vector (type, elts);
return elts[0];
}
default:

View File

@ -3553,12 +3553,21 @@ verify_gimple_assign_unary (gimple stmt)
return false;
}
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
case REDUC_MAX_EXPR:
case REDUC_MIN_EXPR:
case REDUC_PLUS_EXPR:
if (!VECTOR_TYPE_P (rhs1_type)
|| !useless_type_conversion_p (lhs_type, TREE_TYPE (rhs1_type)))
{
error ("reduction should convert from vector to element type");
debug_generic_expr (lhs_type);
debug_generic_expr (rhs1_type);
return true;
}
return false;
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
/* FIXME. */

View File

@ -1903,9 +1903,9 @@ vect_analyze_loop (struct loop *loop)
Output:
REDUC_CODE - the corresponding tree-code to be used to reduce the
vector of partial results into a single scalar result (which
will also reside in a vector) or ERROR_MARK if the operation is
a supported reduction operation, but does not have such tree-code.
vector of partial results into a single scalar result, or ERROR_MARK
if the operation is a supported reduction operation, but does not have
such a tree-code.
Return FALSE if CODE currently cannot be vectorized as reduction. */
@ -4179,6 +4179,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
if (reduc_code != ERROR_MARK && !slp_reduc)
{
tree tmp;
tree vec_elem_type;
/*** Case 1: Create:
v_out2 = reduc_expr <v_out1> */
@ -4187,14 +4188,26 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
dump_printf_loc (MSG_NOTE, vect_location,
"Reduce using direct vector reduction.\n");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
tmp = build1 (reduc_code, vectype, new_phi_result);
epilog_stmt = gimple_build_assign (vec_dest, tmp);
new_temp = make_ssa_name (vec_dest, epilog_stmt);
vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
if (!useless_type_conversion_p (scalar_type, vec_elem_type))
{
tree tmp_dest =
vect_create_destination_var (scalar_dest, vec_elem_type);
tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
epilog_stmt = gimple_build_assign (tmp_dest, tmp);
new_temp = make_ssa_name (tmp_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
tmp = build1 (NOP_EXPR, scalar_type, new_temp);
}
else
tmp = build1 (reduc_code, scalar_type, new_phi_result);
epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
extract_scalar_result = true;
scalar_results.safe_push (new_temp);
}
else
{

View File

@ -1161,10 +1161,9 @@ DEFTREECODE (TRANSACTION_EXPR, "transaction_expr", tcc_expression, 1)
result (e.g. summing the elements of the vector, finding the minimum over
the vector elements, etc).
Operand 0 is a vector.
The expression returns a vector of the same type, with the first
element in the vector holding the result of the reduction of all elements
of the operand. The content of the other elements in the returned vector
is undefined. */
The expression returns a scalar, with type the same as the elements of the
vector, holding the result of the reduction of all elements of the operand.
*/
DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)