Move vector highpart emulation to the optabs layer

* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
        (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
        * optabs.c (can_mult_highpart_p): New.
        (expand_mult_highpart): New.
        * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
        * tree-vect-generic.c (expand_vector_operations_1): Don't expand
        by pieces if can_mult_highpart_p.
        (expand_vector_divmod): Use can_mult_highpart_p and always
        generate MULT_HIGHPART_EXPR.
        * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
        * tree-vect-stmts.c (vectorizable_operation): Likewise.

From-SVN: r189407
This commit is contained in:
Richard Henderson 2012-07-10 01:25:20 -07:00 committed by Richard Henderson
parent 9283726f76
commit 00f07b86e7
8 changed files with 204 additions and 286 deletions

View File

@ -1,5 +1,17 @@
2012-07-10 Richard Henderson <rth@redhat.com>
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
(expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
* optabs.c (can_mult_highpart_p): New.
(expand_mult_highpart): New.
* expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
* tree-vect-generic.c (expand_vector_operations_1): Don't expand
by pieces if can_mult_highpart_p.
(expand_vector_divmod): Use can_mult_highpart_p and always
generate MULT_HIGHPART_EXPR.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
* tree-vect-stmts.c (vectorizable_operation): Likewise.
* config/spu/spu-builtins.md (spu_mpy): Move to spu.md.
(spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise.
* config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy.

View File

@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx);
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
int, int);
/* Compute and return the best algorithm for multiplying by T.
The algorithm must cost less than cost_limit
@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
return target;
}
/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
static rtx
extract_high_half (enum machine_mode mode, rtx op)
@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op)
return convert_modes (mode, wider_mode, op, 0);
}
/* Like expand_mult_highpart, but only consider using a multiplication
/* Like expmed_mult_highpart, but only consider using a multiplication
optab. OP1 is an rtx for the constant operand. */
static rtx
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost)
{
rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
MAX_COST is the total allowed cost for the expanded RTL. */
static rtx
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost)
{
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
mode == word_mode, however all the cost calculations in
synth_mult etc. assume single-word operations. */
if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
return expand_mult_highpart_optab (mode, op0, op1, target,
return expmed_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
{
/* See whether the specialized multiplication optabs are
cheaper than the shift/add version. */
tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
alg.cost.cost + extra_cost);
if (tem)
return tem;
@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
return tem;
}
return expand_mult_highpart_optab (mode, op0, op1, target,
return expmed_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
}
@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
half of the product. Different strategies for generating the product are
implemented in expand_mult_highpart.
implemented in expmed_mult_highpart.
If what we actually want is the remainder, we generate that by another
by-constant multiplication and a subtraction. */
@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
mode for which we can do the operation with a library call. */
/* We might want to refine this now that we have division-by-constant
optimization. Since expand_mult_highpart tries so many variants, it is
optimization. Since expmed_mult_highpart tries so many variants, it is
not straightforward to generalize this. Maybe we should make an array
of possible modes in init_expmed? Save this for GCC 2.7. */
@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= (shift_cost[speed][compute_mode][post_shift - 1]
+ shift_cost[speed][compute_mode][1]
+ 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0,
t1 = expmed_mult_highpart (compute_mode, op0,
GEN_INT (ml),
NULL_RTX, 1,
max_cost - extra_cost);
@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost
= (shift_cost[speed][compute_mode][pre_shift]
+ shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1,
t2 = expmed_mult_highpart (compute_mode, t1,
GEN_INT (ml),
NULL_RTX, 1,
max_cost - extra_cost);
@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0,
t1 = expmed_mult_highpart (compute_mode, op0,
GEN_INT (ml), NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0)
@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
t1 = expmed_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0)
@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2,
t3 = expmed_mult_highpart (compute_mode, t2,
GEN_INT (ml), NULL_RTX, 1,
max_cost - extra_cost);
if (t3 != 0)

View File

@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
case RDIV_EXPR:
case MULT_HIGHPART_EXPR:
goto binop;
case MULT_HIGHPART_EXPR:
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
temp = expand_mult_highpart (mode, op0, op1, target, unsignedp);
gcc_assert (temp);
return temp;
case TRUNC_MOD_EXPR:
case FLOOR_MOD_EXPR:
case CEIL_MOD_EXPR:

View File

@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
return ops[0].value;
}
/* Return non-zero if a highpart multiply is supported of can be synthisized.
For the benefit of expand_mult_highpart, the return value is 1 for direct,
2 for even/odd widening, and 3 for hi/lo widening. */
int
can_mult_highpart_p (enum machine_mode mode, bool uns_p)
{
optab op;
unsigned char *sel;
unsigned i, nunits;
op = uns_p ? umul_highpart_optab : smul_highpart_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
return 1;
/* If the mode is an integral vector, synth from widening operations. */
if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
return 0;
nunits = GET_MODE_NUNITS (mode);
sel = XALLOCAVEC (unsigned char, nunits);
op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits; ++i)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
if (can_vec_perm_p (mode, false, sel))
return 2;
}
}
op = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits; ++i)
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
if (can_vec_perm_p (mode, false, sel))
return 3;
}
}
return 0;
}
/* Expand a highpart multiply. */
rtx
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
rtx target, bool uns_p)
{
struct expand_operand eops[3];
enum insn_code icode;
int method, i, nunits;
enum machine_mode wmode;
rtx m1, m2, perm;
optab tab1, tab2;
rtvec v;
method = can_mult_highpart_p (mode, uns_p);
switch (method)
{
case 0:
return NULL_RTX;
case 1:
tab1 = uns_p ? umul_highpart_optab : smul_highpart_optab;
return expand_binop (mode, tab1, op0, op1, target, uns_p,
OPTAB_LIB_WIDEN);
case 2:
tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
break;
case 3:
tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
if (BYTES_BIG_ENDIAN)
{
optab t = tab1;
tab1 = tab2;
tab2 = t;
}
break;
default:
gcc_unreachable ();
}
icode = optab_handler (tab1, mode);
nunits = GET_MODE_NUNITS (mode);
wmode = insn_data[icode].operand[0].mode;
gcc_checking_assert (2 * GET_MODE_NUNITS (wmode) == nunits);
gcc_checking_assert (GET_MODE_SIZE (wmode) == GET_MODE_SIZE (mode));
create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
create_input_operand (&eops[1], op0, mode);
create_input_operand (&eops[2], op1, mode);
expand_insn (icode, 3, eops);
m1 = gen_lowpart (mode, eops[0].value);
create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
create_input_operand (&eops[1], op0, mode);
create_input_operand (&eops[2], op1, mode);
expand_insn (optab_handler (tab2, mode), 3, eops);
m2 = gen_lowpart (mode, eops[0].value);
v = rtvec_alloc (nunits);
if (method == 2)
{
for (i = 0; i < nunits; ++i)
RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits : 0));
}
else
{
for (i = 0; i < nunits; ++i)
RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
}
perm = gen_rtx_CONST_VECTOR (mode, v);
return expand_vec_perm (mode, m1, m2, perm, target);
}
/* Return true if there is a compare_and_swap pattern. */

View File

@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
/* Generate code for VEC_PERM_EXPR. */
extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
/* Return non-zero if target supports a given highpart multiplication. */
extern int can_mult_highpart_p (enum machine_mode, bool);
/* Generate code for MULT_HIGHPART_EXPR. */
extern rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, bool);
/* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
if the target does not have such an insn. */

View File

@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
int dummy_int;
unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
optab op;
tree *vec;
unsigned char *sel = NULL;
tree cur_op, m1, m2, mulcst, perm_mask, wider_type, tem, decl_e, decl_o;
tree cur_op, mulcst, tem;
optab op;
if (prec > HOST_BITS_PER_WIDE_INT)
return NULL_TREE;
@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
return NULL_TREE;
op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
wider_type = decl_e = decl_o = NULL_TREE;
else
{
wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
wider_type = build_vector_type (wider_type, nunits / 2);
if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
|| GET_MODE_BITSIZE (TYPE_MODE (wider_type))
!= GET_MODE_BITSIZE (TYPE_MODE (type)))
return NULL_TREE;
sel = XALLOCAVEC (unsigned char, nunits);
if (targetm.vectorize.builtin_mul_widen_even
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
&& (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
&& (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
== TYPE_MODE (wider_type)))
{
for (i = 0; i < nunits; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
decl_e = decl_o = NULL_TREE;
}
else
decl_e = decl_o = NULL_TREE;
if (decl_e == NULL_TREE)
{
op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
for (i = 0; i < nunits; i++)
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
return NULL_TREE;
}
}
if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
return NULL_TREE;
cur_op = op0;
@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
mulcst = build_vector (type, vec);
if (wider_type == NULL_TREE)
cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
else
{
for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
perm_mask = build_vector (type, vec);
if (decl_e != NULL_TREE)
{
gimple call;
call = gimple_build_call (decl_e, 2, cur_op, mulcst);
m1 = create_tmp_reg (wider_type, NULL);
add_referenced_var (m1);
m1 = make_ssa_name (m1, call);
gimple_call_set_lhs (call, m1);
gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
call = gimple_build_call (decl_o, 2, cur_op, mulcst);
m2 = create_tmp_reg (wider_type, NULL);
add_referenced_var (m2);
m2 = make_ssa_name (m2, call);
gimple_call_set_lhs (call, m2);
gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
}
else
{
m1 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
wider_type, cur_op, mulcst);
m2 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
wider_type, cur_op, mulcst);
}
m1 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m1);
m2 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m2);
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, m1, m2, perm_mask);
}
cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
switch (mode)
{
@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
if (compute_type == type)
{
compute_mode = TYPE_MODE (compute_type);
if (VECTOR_MODE_P (compute_mode)
&& op != NULL
&& optab_handler (op, compute_mode) != CODE_FOR_nothing)
return;
else
/* There is no operation in hardware, so fall back to scalars. */
compute_type = TREE_TYPE (type);
if (VECTOR_MODE_P (compute_mode))
{
if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
return;
if (code == MULT_HIGHPART_EXPR
&& can_mult_highpart_p (compute_mode,
TYPE_UNSIGNED (compute_type)))
return;
}
/* There is no operation in hardware, so fall back to scalars. */
compute_type = TREE_TYPE (type);
}
gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);

View File

@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
optab optab;
tree dummy, q;
enum tree_code dummy_code;
tree q;
int dummy_int, prec;
VEC (tree, heap) *dummy_vec;
stmt_vec_info def_stmt_vinfo;
if (!is_gimple_assign (last_stmt))
@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
|| prec > HOST_BITS_PER_WIDE_INT)
return NULL;
optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default);
if (optab == NULL
|| optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
{
tree witype = build_nonstandard_integer_type (prec * 2,
TYPE_UNSIGNED (itype));
tree vecwtype = get_vectype_for_scalar_type (witype);
if (vecwtype == NULL_TREE)
return NULL;
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
vecwtype, vectype,
&dummy, &dummy, &dummy_code,
&dummy_code, &dummy_int,
&dummy_vec))
return NULL;
}
if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
return NULL;
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;

View File

@ -3304,18 +3304,17 @@ static bool
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
gimple *vec_stmt, slp_tree slp_node)
{
tree vec_dest, vec_dest2 = NULL_TREE;
tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
tree vec_dest;
tree scalar_dest;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype, wide_vectype = NULL_TREE;
tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum tree_code code;
enum machine_mode vec_mode;
tree new_temp;
int op_type;
optab optab, optab2 = NULL;
optab optab;
int icode;
tree def;
gimple def_stmt;
@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf;
unsigned char *sel = NULL;
tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|| code == RROTATE_EXPR)
return false;
optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */
if (!optab && code != MULT_HIGHPART_EXPR)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab.");
return false;
}
vec_mode = TYPE_MODE (vectype);
icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
if (icode == CODE_FOR_nothing
&& code == MULT_HIGHPART_EXPR
&& VECTOR_MODE_P (vec_mode)
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
if (code == MULT_HIGHPART_EXPR)
{
/* If MULT_HIGHPART_EXPR isn't supported by the backend, see
if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
tree wide_type
= build_nonstandard_integer_type (prec * 2, unsignedp);
wide_vectype
= get_same_sized_vectype (wide_type, vectype);
sel = XALLOCAVEC (unsigned char, nunits_in);
if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
&& GET_MODE_SIZE (TYPE_MODE (wide_vectype))
== GET_MODE_SIZE (vec_mode))
if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
icode = 0;
else
icode = CODE_FOR_nothing;
}
else
{
optab = optab_for_tree_code (code, vectype, optab_default);
if (!optab)
{
if (targetm.vectorize.builtin_mul_widen_even
&& (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
&& TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits_in : 0);
if (can_vec_perm_p (vec_mode, false, sel))
icode = 0;
}
if (icode == CODE_FOR_nothing)
{
decl1 = NULL_TREE;
decl2 = NULL_TREE;
optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
vectype, optab_default);
optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
if (optab != NULL
&& optab2 != NULL
&& optab_handler (optab, vec_mode) != CODE_FOR_nothing
&& optab_handler (optab2, vec_mode) != CODE_FOR_nothing
&& insn_data[optab_handler (optab, vec_mode)].operand[0].mode
== TYPE_MODE (wide_vectype)
&& insn_data[optab_handler (optab2,
vec_mode)].operand[0].mode
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
if (can_vec_perm_p (vec_mode, false, sel))
icode = optab_handler (optab, vec_mode);
}
}
}
if (icode == CODE_FOR_nothing)
{
if (optab_for_tree_code (code, vectype, optab_default) == NULL)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab.");
return false;
}
wide_vectype = NULL_TREE;
optab2 = NULL;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab.");
return false;
}
icode = (int) optab_handler (optab, vec_mode);
}
if (icode == CODE_FOR_nothing)
@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "transform binary/unary operation.");
/* Handle def. */
if (wide_vectype)
{
vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
perm_mask = vect_gen_perm_mask (vectype, sel);
}
else
vec_dest = vect_create_destination_var (scalar_dest, vectype);
vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Allocate VECs for vector operands. In case of SLP, vector operands are
created in the previous stages of the recursion, so no allocation is
@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
vop2 = ((op_type == ternary_op)
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
if (wide_vectype)
{
tree new_temp2, vce;
gcc_assert (code == MULT_HIGHPART_EXPR);
if (decl1 != NULL_TREE)
{
new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
new_temp2 = make_ssa_name (vec_dest2, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
new_temp, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp2 = make_ssa_name (vec_dest2, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
new_temp2, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest3, vce,
NULL_TREE);
new_temp = make_ssa_name (vec_dest3, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest4, vce,
NULL_TREE);
new_temp2 = make_ssa_name (vec_dest4, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp = permute_vec_elements (new_temp, new_temp2,
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (slp_node)
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
new_stmt);
continue;
}
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt);