[Vectorizer] Make REDUC_xxx_EXPR tree codes produce a scalar result
PR tree-optimization/61114 * expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add extract_bit_field around optab result. * fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce scalar not vector. * tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type for REDUC_{MIN,MAX,PLUS}_EXPR. * tree-vect-loop.c (vect_analyze_loop): Update comment. (vect_create_epilog_for_reduction): For direct vector reduction, use result of tree code directly without extract_bit_field. * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update comment. From-SVN: r216736
This commit is contained in:
parent
60393bbc61
commit
99f76d9bac
|
@ -1,3 +1,22 @@
|
|||
2014-10-27 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR tree-optimization/61114
|
||||
* expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add
|
||||
extract_bit_field around optab result.
|
||||
|
||||
* fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce
|
||||
scalar not vector.
|
||||
|
||||
* tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type
|
||||
for REDUC_{MIN,MAX,PLUS}_EXPR.
|
||||
|
||||
* tree-vect-loop.c (vect_analyze_loop): Update comment.
|
||||
(vect_create_epilog_for_reduction): For direct vector reduction, use
|
||||
result of tree code directly without extract_bit_field.
|
||||
|
||||
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update
|
||||
comment.
|
||||
|
||||
2014-10-27 Andrew MacLeod <amacleod@redhat.com>
|
||||
|
||||
* basic-block.h: Remove all includes.
|
||||
|
|
12
gcc/expr.c
12
gcc/expr.c
|
@ -9051,7 +9051,17 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
|
|||
{
|
||||
op0 = expand_normal (treeop0);
|
||||
this_optab = optab_for_tree_code (code, type, optab_default);
|
||||
temp = expand_unop (mode, this_optab, op0, target, unsignedp);
|
||||
enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
|
||||
temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp);
|
||||
gcc_assert (temp);
|
||||
/* The tree code produces a scalar result, but (somewhat by convention)
|
||||
the optab produces a vector with the result in element 0 if
|
||||
little-endian, or element N-1 if big-endian. So pull the scalar
|
||||
result out of that element. */
|
||||
int index = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (vec_mode) - 1 : 0;
|
||||
int bitsize = GET_MODE_BITSIZE (GET_MODE_INNER (vec_mode));
|
||||
temp = extract_bit_field (temp, bitsize, bitsize * index, unsignedp,
|
||||
target, mode, mode);
|
||||
gcc_assert (temp);
|
||||
return temp;
|
||||
}
|
||||
|
|
|
@ -8259,12 +8259,13 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
|
|||
case REDUC_MAX_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
{
|
||||
unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
|
||||
unsigned int nelts, i;
|
||||
tree *elts;
|
||||
enum tree_code subcode;
|
||||
|
||||
if (TREE_CODE (op0) != VECTOR_CST)
|
||||
return NULL_TREE;
|
||||
nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0));
|
||||
|
||||
elts = XALLOCAVEC (tree, nelts);
|
||||
if (!vec_cst_ctor_to_array (op0, elts))
|
||||
|
@ -8283,10 +8284,9 @@ fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
|
|||
elts[0] = const_binop (subcode, elts[0], elts[i]);
|
||||
if (elts[0] == NULL_TREE || !CONSTANT_CLASS_P (elts[0]))
|
||||
return NULL_TREE;
|
||||
elts[i] = build_zero_cst (TREE_TYPE (type));
|
||||
}
|
||||
|
||||
return build_vector (type, elts);
|
||||
return elts[0];
|
||||
}
|
||||
|
||||
default:
|
||||
|
|
|
@ -3553,12 +3553,21 @@ verify_gimple_assign_unary (gimple stmt)
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
if (!VECTOR_TYPE_P (rhs1_type)
|
||||
|| !useless_type_conversion_p (lhs_type, TREE_TYPE (rhs1_type)))
|
||||
{
|
||||
error ("reduction should convert from vector to element type");
|
||||
debug_generic_expr (lhs_type);
|
||||
debug_generic_expr (rhs1_type);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
case VEC_UNPACK_FLOAT_HI_EXPR:
|
||||
case VEC_UNPACK_FLOAT_LO_EXPR:
|
||||
/* FIXME. */
|
||||
|
|
|
@ -1903,9 +1903,9 @@ vect_analyze_loop (struct loop *loop)
|
|||
|
||||
Output:
|
||||
REDUC_CODE - the corresponding tree-code to be used to reduce the
|
||||
vector of partial results into a single scalar result (which
|
||||
will also reside in a vector) or ERROR_MARK if the operation is
|
||||
a supported reduction operation, but does not have such tree-code.
|
||||
vector of partial results into a single scalar result, or ERROR_MARK
|
||||
if the operation is a supported reduction operation, but does not have
|
||||
such a tree-code.
|
||||
|
||||
Return FALSE if CODE currently cannot be vectorized as reduction. */
|
||||
|
||||
|
@ -4179,6 +4179,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
|
|||
if (reduc_code != ERROR_MARK && !slp_reduc)
|
||||
{
|
||||
tree tmp;
|
||||
tree vec_elem_type;
|
||||
|
||||
/*** Case 1: Create:
|
||||
v_out2 = reduc_expr <v_out1> */
|
||||
|
@ -4187,14 +4188,26 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
|
|||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Reduce using direct vector reduction.\n");
|
||||
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
tmp = build1 (reduc_code, vectype, new_phi_result);
|
||||
epilog_stmt = gimple_build_assign (vec_dest, tmp);
|
||||
new_temp = make_ssa_name (vec_dest, epilog_stmt);
|
||||
vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
|
||||
if (!useless_type_conversion_p (scalar_type, vec_elem_type))
|
||||
{
|
||||
tree tmp_dest =
|
||||
vect_create_destination_var (scalar_dest, vec_elem_type);
|
||||
tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
|
||||
epilog_stmt = gimple_build_assign (tmp_dest, tmp);
|
||||
new_temp = make_ssa_name (tmp_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_temp);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
|
||||
tmp = build1 (NOP_EXPR, scalar_type, new_temp);
|
||||
}
|
||||
else
|
||||
tmp = build1 (reduc_code, scalar_type, new_phi_result);
|
||||
epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_temp);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
|
||||
extract_scalar_result = true;
|
||||
scalar_results.safe_push (new_temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -1161,10 +1161,9 @@ DEFTREECODE (TRANSACTION_EXPR, "transaction_expr", tcc_expression, 1)
|
|||
result (e.g. summing the elements of the vector, finding the minimum over
|
||||
the vector elements, etc).
|
||||
Operand 0 is a vector.
|
||||
The expression returns a vector of the same type, with the first
|
||||
element in the vector holding the result of the reduction of all elements
|
||||
of the operand. The content of the other elements in the returned vector
|
||||
is undefined. */
|
||||
The expression returns a scalar, with type the same as the elements of the
|
||||
vector, holding the result of the reduction of all elements of the operand.
|
||||
*/
|
||||
DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
|
||||
DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
|
||||
DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
|
||||
|
|
Loading…
Reference in New Issue