Add new optabs for reducing vectors to scalars

PR tree-optimization/61114
	* doc/md.texi (Standard Names): Add reduc_(plus,[us](min|max))|scal
	optabs, and note in reduc_[us](plus|min|max) to prefer the former.

	* expr.c (expand_expr_real_2): Use reduc_..._scal if available, fall
	back to old reduc_... + BIT_FIELD_REF only if not.

	* optabs.c (optab_for_tree_code): for REDUC_(MAX,MIN,PLUS)_EXPR,
	return the reduce-to-scalar (reduc_..._scal) optab.
	(scalar_reduc_to_vector): New.

	* optabs.def (reduc_smax_scal_optab, reduc_smin_scal_optab,
	reduc_plus_scal_optab, reduc_umax_scal_optab, reduc_umin_scal_optab):
	New.

	* optabs.h (scalar_reduc_to_vector): Declare.

	* tree-vect-loop.c (vectorizable_reduction): Look for optabs reducing
	to either scalar or vector.

From-SVN: r216737
This commit is contained in:
Alan Lawrence 2014-10-27 14:20:52 +00:00 committed by Alan Lawrence
parent 99f76d9bac
commit d43a252e2f
7 changed files with 118 additions and 23 deletions

View File

@ -1,3 +1,25 @@
2014-10-27 Alan Lawrence <alan.lawrence@arm.com>
PR tree-optimization/61114
* doc/md.texi (Standard Names): Add reduc_(plus,[us](min|max))|scal
optabs, and note in reduc_[us](plus|min|max) to prefer the former.
* expr.c (expand_expr_real_2): Use reduc_..._scal if available, fall
back to old reduc_... + BIT_FIELD_REF only if not.
* optabs.c (optab_for_tree_code): for REDUC_(MAX,MIN,PLUS)_EXPR,
return the reduce-to-scalar (reduc_..._scal) optab.
(scalar_reduc_to_vector): New.
* optabs.def (reduc_smax_scal_optab, reduc_smin_scal_optab,
reduc_plus_scal_optab, reduc_umax_scal_optab, reduc_umin_scal_optab):
New.
* optabs.h (scalar_reduc_to_vector): Declare.
* tree-vect-loop.c (vectorizable_reduction): Look for optabs reducing
to either scalar or vector.
2014-10-27 Alan Lawrence <alan.lawrence@arm.com>
PR tree-optimization/61114

View File

@ -4724,29 +4724,48 @@ it is unspecified which of the two operands is returned as the result.
@cindex @code{reduc_smax_@var{m}} instruction pattern
@item @samp{reduc_smin_@var{m}}, @samp{reduc_smax_@var{m}}
Find the signed minimum/maximum of the elements of a vector. The vector is
operand 1, and the scalar result is stored in the least significant bits of
operand 1, and the result is stored in the least significant bits of
operand 0 (also a vector). The output and input vector should have the same
modes.
modes. These are legacy optabs, and platforms should prefer to implement
@samp{reduc_smin_scal_@var{m}} and @samp{reduc_smax_scal_@var{m}}.
@cindex @code{reduc_umin_@var{m}} instruction pattern
@cindex @code{reduc_umax_@var{m}} instruction pattern
@item @samp{reduc_umin_@var{m}}, @samp{reduc_umax_@var{m}}
Find the unsigned minimum/maximum of the elements of a vector. The vector is
operand 1, and the scalar result is stored in the least significant bits of
operand 1, and the result is stored in the least significant bits of
operand 0 (also a vector). The output and input vector should have the same
modes.
modes. These are legacy optabs, and platforms should prefer to implement
@samp{reduc_umin_scal_@var{m}} and @samp{reduc_umax_scal_@var{m}}.
@cindex @code{reduc_splus_@var{m}} instruction pattern
@item @samp{reduc_splus_@var{m}}
Compute the sum of the signed elements of a vector. The vector is operand 1,
and the scalar result is stored in the least significant bits of operand 0
(also a vector). The output and input vector should have the same modes.
@cindex @code{reduc_uplus_@var{m}} instruction pattern
@item @samp{reduc_uplus_@var{m}}
Compute the sum of the unsigned elements of a vector. The vector is operand 1,
and the scalar result is stored in the least significant bits of operand 0
@item @samp{reduc_splus_@var{m}}, @samp{reduc_uplus_@var{m}}
Compute the sum of the signed/unsigned elements of a vector. The vector is
operand 1, and the result is stored in the least significant bits of operand 0
(also a vector). The output and input vector should have the same modes.
These are legacy optabs, and platforms should prefer to implement
@samp{reduc_plus_scal_@var{m}}.
@cindex @code{reduc_smin_scal_@var{m}} instruction pattern
@cindex @code{reduc_smax_scal_@var{m}} instruction pattern
@item @samp{reduc_smin_scal_@var{m}}, @samp{reduc_smax_scal_@var{m}}
Find the signed minimum/maximum of the elements of a vector. The vector is
operand 1, and operand 0 is the scalar result, with mode equal to the mode of
the elements of the input vector.
@cindex @code{reduc_umin_scal_@var{m}} instruction pattern
@cindex @code{reduc_umax_scal_@var{m}} instruction pattern
@item @samp{reduc_umin_scal_@var{m}}, @samp{reduc_umax_scal_@var{m}}
Find the unsigned minimum/maximum of the elements of a vector. The vector is
operand 1, and operand 0 is the scalar result, with mode equal to the mode of
the elements of the input vector.
@cindex @code{reduc_plus_scal_@var{m}} instruction pattern
@item @samp{reduc_plus_scal_@var{m}}
Compute the sum of the elements of a vector. The vector is operand 1, and
operand 0 is the scalar result, with mode equal to the mode of the elements of
the input vector.
@cindex @code{sdot_prod@var{m}} instruction pattern
@item @samp{sdot_prod@var{m}}

View File

@ -9052,6 +9052,24 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
op0 = expand_normal (treeop0);
this_optab = optab_for_tree_code (code, type, optab_default);
enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
if (optab_handler (this_optab, vec_mode) != CODE_FOR_nothing)
{
struct expand_operand ops[2];
enum insn_code icode = optab_handler (this_optab, vec_mode);
create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], op0, vec_mode);
if (maybe_expand_insn (icode, 2, ops))
{
target = ops[0].value;
if (GET_MODE (target) != mode)
return gen_lowpart (tmode, target);
return target;
}
}
/* Fall back to optab with vector result, and then extract scalar. */
this_optab = scalar_reduc_to_vector (this_optab, type);
temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp);
gcc_assert (temp);
/* The tree code produces a scalar result, but (somewhat by convention)

View File

@ -509,13 +509,15 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return fma_optab;
case REDUC_MAX_EXPR:
return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
return TYPE_UNSIGNED (type)
? reduc_umax_scal_optab : reduc_smax_scal_optab;
case REDUC_MIN_EXPR:
return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
return TYPE_UNSIGNED (type)
? reduc_umin_scal_optab : reduc_smin_scal_optab;
case REDUC_PLUS_EXPR:
return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
return reduc_plus_scal_optab;
case VEC_LSHIFT_EXPR:
return vec_shl_optab;
@ -611,7 +613,26 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return unknown_optab;
}
}
/* Given optab UNOPTAB that reduces a vector to a scalar, find instead the old
optab that produces a vector with the reduction result in one element,
for a tree with type TYPE. */
optab
scalar_reduc_to_vector (optab unoptab, const_tree type)
{
switch (unoptab)
{
case reduc_plus_scal_optab:
return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
case reduc_smin_scal_optab: return reduc_smin_optab;
case reduc_umin_scal_optab: return reduc_umin_optab;
case reduc_smax_scal_optab: return reduc_smax_optab;
case reduc_umax_scal_optab: return reduc_umax_optab;
default: return unknown_optab;
}
}
/* Expand vector widening operations.

View File

@ -243,12 +243,20 @@ OPTAB_D (sin_optab, "sin$a2")
OPTAB_D (sincos_optab, "sincos$a3")
OPTAB_D (tan_optab, "tan$a2")
/* Vector reduction to a scalar. */
OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a")
OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
/* (Old) Vector reduction, returning a vector with the result in one lane. */
OPTAB_D (reduc_smax_optab, "reduc_smax_$a")
OPTAB_D (reduc_smin_optab, "reduc_smin_$a")
OPTAB_D (reduc_splus_optab, "reduc_splus_$a")
OPTAB_D (reduc_umax_optab, "reduc_umax_$a")
OPTAB_D (reduc_umin_optab, "reduc_umin_$a")
OPTAB_D (reduc_uplus_optab, "reduc_uplus_$a")
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")

View File

@ -162,6 +162,11 @@ enum optab_subtype
vector shifts and rotates */
extern optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype);
/* Given an optab that reduces a vector to a scalar, find instead the old
optab that produces a vector with the reduction result in one element,
for a tree with the specified type. */
extern optab scalar_reduc_to_vector (optab, const_tree type);
/* The various uses that a comparison can have; used by can_compare_p:
jumps, conditional moves, store flag operations. */
enum can_compare_purpose

View File

@ -5113,15 +5113,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
epilog_reduc_code = ERROR_MARK;
}
if (reduc_optab
&& optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduc op not supported by target.\n");
optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduc op not supported by target.\n");
epilog_reduc_code = ERROR_MARK;
epilog_reduc_code = ERROR_MARK;
}
}
}
else