expr.c (do_store_flag): Use expand_vec_cmp_expr for mask results.
gcc/ * expr.c (do_store_flag): Use expand_vec_cmp_expr for mask results. (const_vector_mask_from_tree): New. (const_vector_from_tree): Use const_vector_mask_from_tree for boolean vectors. * optabs-query.h (get_vec_cmp_icode): New. * optabs-tree.c (expand_vec_cmp_expr_p): New. * optabs-tree.h (expand_vec_cmp_expr_p): New. * optabs.c (vector_compare_rtx): Add OPNO arg. (expand_vec_cond_expr): Adjust to vector_compare_rtx change. (expand_vec_cmp_expr): New. * optabs.def (vec_cmp_optab): New. (vec_cmpu_optab): New. * optabs.h (expand_vec_cmp_expr): New. * tree-vect-generic.c (expand_vector_comparison): Add vector comparison optabs check. * tree-vect-loop.c (vect_determine_vectorization_factor): Ignore mask operations for VF. Add mask type computation. * tree-vect-stmts.c (get_mask_type_for_scalar_type): New. (vectorizable_comparison): New. (vect_analyze_stmt): Add vectorizable_comparison. (vect_transform_stmt): Likewise. (vect_init_vector): Support boolean vector invariants. (vect_get_vec_def_for_operand): Add VECTYPE arg. (vectorizable_condition): Directly provide vectype for invariants used in comparison. * tree-vectorizer.h (get_mask_type_for_scalar_type): New. (enum vect_var_kind): Add vect_mask_var. (enum stmt_vec_info_type): Add comparison_vec_info_type. (vectorizable_comparison): New. (vect_get_vec_def_for_operand): Add VECTYPE arg. * tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var. (vect_create_destination_var): Likewise. * tree-vect-patterns.c (check_bool_pattern): Check fails if we can vectorize comparison directly. (search_type_for_mask): New. (vect_recog_bool_pattern): Support cases when bool pattern check fails. * tree-vect-slp.c (vect_build_slp_tree_1): Allow comparison statements. (vect_get_constant_vectors): Support boolean vector constants. * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): New. (ix86_expand_int_vec_cmp): New. (ix86_expand_fp_vec_cmp): New. * config/i386/i386.c (ix86_expand_sse_cmp): Allow NULL for op_true and op_false. (ix86_int_cmp_code_to_pcmp_immediate): New. (ix86_fp_cmp_code_to_pcmp_immediate): New. (ix86_cmp_code_to_pcmp_immediate): New. (ix86_expand_mask_vec_cmp): New. (ix86_expand_fp_vec_cmp): New. (ix86_expand_int_sse_cmp): New. (ix86_expand_int_vcond): Use ix86_expand_int_sse_cmp. (ix86_expand_int_vec_cmp): New. (ix86_get_mask_mode): New. (TARGET_VECTORIZE_GET_MASK_MODE): New. * config/i386/sse.md (avx512fmaskmodelower): New. (vec_cmp<mode><avx512fmaskmodelower>): New. (vec_cmp<mode><sseintvecmodelower>): New. (vec_cmpv2div2di): New. (vec_cmpu<mode><avx512fmaskmodelower>): New. (vec_cmpu<mode><sseintvecmodelower>): New. (vec_cmpuv2div2di): New. gcc/testsuite/ * gcc.dg/vect/slp-cond-5.c: New test. From-SVN: r230098
This commit is contained in:
parent
fb9333352b
commit
42fd8198b4
|
@ -1,3 +1,69 @@
|
|||
2015-11-10 Ilya Enkovich <enkovich.gnu@gmail.com>
|
||||
|
||||
* expr.c (do_store_flag): Use expand_vec_cmp_expr for mask results.
|
||||
(const_vector_mask_from_tree): New.
|
||||
(const_vector_from_tree): Use const_vector_mask_from_tree
|
||||
for boolean vectors.
|
||||
* optabs-query.h (get_vec_cmp_icode): New.
|
||||
* optabs-tree.c (expand_vec_cmp_expr_p): New.
|
||||
* optabs-tree.h (expand_vec_cmp_expr_p): New.
|
||||
* optabs.c (vector_compare_rtx): Add OPNO arg.
|
||||
(expand_vec_cond_expr): Adjust to vector_compare_rtx change.
|
||||
(expand_vec_cmp_expr): New.
|
||||
* optabs.def (vec_cmp_optab): New.
|
||||
(vec_cmpu_optab): New.
|
||||
* optabs.h (expand_vec_cmp_expr): New.
|
||||
* tree-vect-generic.c (expand_vector_comparison): Add vector
|
||||
comparison optabs check.
|
||||
* tree-vect-loop.c (vect_determine_vectorization_factor): Ignore mask
|
||||
operations for VF. Add mask type computation.
|
||||
* tree-vect-stmts.c (get_mask_type_for_scalar_type): New.
|
||||
(vectorizable_comparison): New.
|
||||
(vect_analyze_stmt): Add vectorizable_comparison.
|
||||
(vect_transform_stmt): Likewise.
|
||||
(vect_init_vector): Support boolean vector invariants.
|
||||
(vect_get_vec_def_for_operand): Add VECTYPE arg.
|
||||
(vectorizable_condition): Directly provide vectype for invariants
|
||||
used in comparison.
|
||||
* tree-vectorizer.h (get_mask_type_for_scalar_type): New.
|
||||
(enum vect_var_kind): Add vect_mask_var.
|
||||
(enum stmt_vec_info_type): Add comparison_vec_info_type.
|
||||
(vectorizable_comparison): New.
|
||||
(vect_get_vec_def_for_operand): Add VECTYPE arg.
|
||||
* tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var.
|
||||
(vect_create_destination_var): Likewise.
|
||||
* tree-vect-patterns.c (check_bool_pattern): Check fails
|
||||
if we can vectorize comparison directly.
|
||||
(search_type_for_mask): New.
|
||||
(vect_recog_bool_pattern): Support cases when bool pattern
|
||||
check fails.
|
||||
* tree-vect-slp.c (vect_build_slp_tree_1): Allow
|
||||
comparison statements.
|
||||
(vect_get_constant_vectors): Support boolean vector
|
||||
constants.
|
||||
* config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): New.
|
||||
(ix86_expand_int_vec_cmp): New.
|
||||
(ix86_expand_fp_vec_cmp): New.
|
||||
* config/i386/i386.c (ix86_expand_sse_cmp): Allow NULL for
|
||||
op_true and op_false.
|
||||
(ix86_int_cmp_code_to_pcmp_immediate): New.
|
||||
(ix86_fp_cmp_code_to_pcmp_immediate): New.
|
||||
(ix86_cmp_code_to_pcmp_immediate): New.
|
||||
(ix86_expand_mask_vec_cmp): New.
|
||||
(ix86_expand_fp_vec_cmp): New.
|
||||
(ix86_expand_int_sse_cmp): New.
|
||||
(ix86_expand_int_vcond): Use ix86_expand_int_sse_cmp.
|
||||
(ix86_expand_int_vec_cmp): New.
|
||||
(ix86_get_mask_mode): New.
|
||||
(TARGET_VECTORIZE_GET_MASK_MODE): New.
|
||||
* config/i386/sse.md (avx512fmaskmodelower): New.
|
||||
(vec_cmp<mode><avx512fmaskmodelower>): New.
|
||||
(vec_cmp<mode><sseintvecmodelower>): New.
|
||||
(vec_cmpv2div2di): New.
|
||||
(vec_cmpu<mode><avx512fmaskmodelower>): New.
|
||||
(vec_cmpu<mode><sseintvecmodelower>): New.
|
||||
(vec_cmpuv2div2di): New.
|
||||
|
||||
2015-11-10 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/68240
|
||||
|
|
|
@ -129,6 +129,9 @@ extern bool ix86_expand_fp_vcond (rtx[]);
|
|||
extern bool ix86_expand_int_vcond (rtx[]);
|
||||
extern void ix86_expand_vec_perm (rtx[]);
|
||||
extern bool ix86_expand_vec_perm_const (rtx[]);
|
||||
extern bool ix86_expand_mask_vec_cmp (rtx[]);
|
||||
extern bool ix86_expand_int_vec_cmp (rtx[]);
|
||||
extern bool ix86_expand_fp_vec_cmp (rtx[]);
|
||||
extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
|
||||
extern bool ix86_expand_int_addcc (rtx[]);
|
||||
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
|
||||
|
|
|
@ -22582,8 +22582,8 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
|
|||
cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
|
||||
|
||||
if (optimize
|
||||
|| reg_overlap_mentioned_p (dest, op_true)
|
||||
|| reg_overlap_mentioned_p (dest, op_false))
|
||||
|| (op_true && reg_overlap_mentioned_p (dest, op_true))
|
||||
|| (op_false && reg_overlap_mentioned_p (dest, op_false)))
|
||||
dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
|
||||
|
||||
/* Compare patterns for int modes are unspec in AVX512F only. */
|
||||
|
@ -22644,6 +22644,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
|
||||
rtx t2, t3, x;
|
||||
|
||||
/* If we have an integer mask and FP value then we need
|
||||
to cast mask to FP mode. */
|
||||
if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
|
||||
{
|
||||
cmp = force_reg (cmpmode, cmp);
|
||||
cmp = gen_rtx_SUBREG (mode, cmp, 0);
|
||||
}
|
||||
|
||||
if (vector_all_ones_operand (op_true, mode)
|
||||
&& rtx_equal_p (op_false, CONST0_RTX (mode))
|
||||
&& !maskcmp)
|
||||
|
@ -22855,6 +22863,332 @@ ix86_expand_fp_movcc (rtx operands[])
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
|
||||
|
||||
static int
|
||||
ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
return 0;
|
||||
case LT:
|
||||
case LTU:
|
||||
return 1;
|
||||
case LE:
|
||||
case LEU:
|
||||
return 2;
|
||||
case NE:
|
||||
return 4;
|
||||
case GE:
|
||||
case GEU:
|
||||
return 5;
|
||||
case GT:
|
||||
case GTU:
|
||||
return 6;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
|
||||
|
||||
static int
|
||||
ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
return 0x08;
|
||||
case NE:
|
||||
return 0x04;
|
||||
case GT:
|
||||
return 0x16;
|
||||
case LE:
|
||||
return 0x1a;
|
||||
case GE:
|
||||
return 0x15;
|
||||
case LT:
|
||||
return 0x19;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Return immediate value to be used in UNSPEC_PCMP
|
||||
for comparison CODE in MODE. */
|
||||
|
||||
static int
|
||||
ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
|
||||
{
|
||||
if (FLOAT_MODE_P (mode))
|
||||
return ix86_fp_cmp_code_to_pcmp_immediate (code);
|
||||
return ix86_int_cmp_code_to_pcmp_immediate (code);
|
||||
}
|
||||
|
||||
/* Expand AVX-512 vector comparison. */
|
||||
|
||||
bool
|
||||
ix86_expand_mask_vec_cmp (rtx operands[])
|
||||
{
|
||||
machine_mode mask_mode = GET_MODE (operands[0]);
|
||||
machine_mode cmp_mode = GET_MODE (operands[2]);
|
||||
enum rtx_code code = GET_CODE (operands[1]);
|
||||
rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
|
||||
int unspec_code;
|
||||
rtx unspec;
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case LEU:
|
||||
case GTU:
|
||||
case GEU:
|
||||
case LTU:
|
||||
unspec_code = UNSPEC_UNSIGNED_PCMP;
|
||||
default:
|
||||
unspec_code = UNSPEC_PCMP;
|
||||
}
|
||||
|
||||
unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
|
||||
operands[3], imm),
|
||||
unspec_code);
|
||||
emit_insn (gen_rtx_SET (operands[0], unspec));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Expand fp vector comparison. */
|
||||
|
||||
bool
|
||||
ix86_expand_fp_vec_cmp (rtx operands[])
|
||||
{
|
||||
enum rtx_code code = GET_CODE (operands[1]);
|
||||
rtx cmp;
|
||||
|
||||
code = ix86_prepare_sse_fp_compare_args (operands[0], code,
|
||||
&operands[2], &operands[3]);
|
||||
if (code == UNKNOWN)
|
||||
{
|
||||
rtx temp;
|
||||
switch (GET_CODE (operands[1]))
|
||||
{
|
||||
case LTGT:
|
||||
temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
|
||||
operands[3], NULL, NULL);
|
||||
cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
|
||||
operands[3], NULL, NULL);
|
||||
code = AND;
|
||||
break;
|
||||
case UNEQ:
|
||||
temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
|
||||
operands[3], NULL, NULL);
|
||||
cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
|
||||
operands[3], NULL, NULL);
|
||||
code = IOR;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
|
||||
OPTAB_DIRECT);
|
||||
}
|
||||
else
|
||||
cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
|
||||
operands[1], operands[2]);
|
||||
|
||||
if (operands[0] != cmp)
|
||||
emit_move_insn (operands[0], cmp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static rtx
|
||||
ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
|
||||
rtx op_true, rtx op_false, bool *negate)
|
||||
{
|
||||
machine_mode data_mode = GET_MODE (dest);
|
||||
machine_mode mode = GET_MODE (cop0);
|
||||
rtx x;
|
||||
|
||||
*negate = false;
|
||||
|
||||
/* XOP supports all of the comparisons on all 128-bit vector int types. */
|
||||
if (TARGET_XOP
|
||||
&& (mode == V16QImode || mode == V8HImode
|
||||
|| mode == V4SImode || mode == V2DImode))
|
||||
;
|
||||
else
|
||||
{
|
||||
/* Canonicalize the comparison to EQ, GT, GTU. */
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
case GT:
|
||||
case GTU:
|
||||
break;
|
||||
|
||||
case NE:
|
||||
case LE:
|
||||
case LEU:
|
||||
code = reverse_condition (code);
|
||||
*negate = true;
|
||||
break;
|
||||
|
||||
case GE:
|
||||
case GEU:
|
||||
code = reverse_condition (code);
|
||||
*negate = true;
|
||||
/* FALLTHRU */
|
||||
|
||||
case LT:
|
||||
case LTU:
|
||||
std::swap (cop0, cop1);
|
||||
code = swap_condition (code);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Only SSE4.1/SSE4.2 supports V2DImode. */
|
||||
if (mode == V2DImode)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
/* SSE4.1 supports EQ. */
|
||||
if (!TARGET_SSE4_1)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
case GT:
|
||||
case GTU:
|
||||
/* SSE4.2 supports GT/GTU. */
|
||||
if (!TARGET_SSE4_2)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Unsigned parallel compare is not supported by the hardware.
|
||||
Play some tricks to turn this into a signed comparison
|
||||
against 0. */
|
||||
if (code == GTU)
|
||||
{
|
||||
cop0 = force_reg (mode, cop0);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16SImode:
|
||||
case V8DImode:
|
||||
case V8SImode:
|
||||
case V4DImode:
|
||||
case V4SImode:
|
||||
case V2DImode:
|
||||
{
|
||||
rtx t1, t2, mask;
|
||||
rtx (*gen_sub3) (rtx, rtx, rtx);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16SImode: gen_sub3 = gen_subv16si3; break;
|
||||
case V8DImode: gen_sub3 = gen_subv8di3; break;
|
||||
case V8SImode: gen_sub3 = gen_subv8si3; break;
|
||||
case V4DImode: gen_sub3 = gen_subv4di3; break;
|
||||
case V4SImode: gen_sub3 = gen_subv4si3; break;
|
||||
case V2DImode: gen_sub3 = gen_subv2di3; break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
/* Subtract (-(INT MAX) - 1) from both operands to make
|
||||
them signed. */
|
||||
mask = ix86_build_signbit_mask (mode, true, false);
|
||||
t1 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_sub3 (t1, cop0, mask));
|
||||
|
||||
t2 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_sub3 (t2, cop1, mask));
|
||||
|
||||
cop0 = t1;
|
||||
cop1 = t2;
|
||||
code = GT;
|
||||
}
|
||||
break;
|
||||
|
||||
case V64QImode:
|
||||
case V32HImode:
|
||||
case V32QImode:
|
||||
case V16HImode:
|
||||
case V16QImode:
|
||||
case V8HImode:
|
||||
/* Perform a parallel unsigned saturating subtraction. */
|
||||
x = gen_reg_rtx (mode);
|
||||
emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
|
||||
cop1)));
|
||||
|
||||
cop0 = x;
|
||||
cop1 = CONST0_RTX (mode);
|
||||
code = EQ;
|
||||
*negate = !*negate;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*negate)
|
||||
std::swap (op_true, op_false);
|
||||
|
||||
/* Allow the comparison to be done in one mode, but the movcc to
|
||||
happen in another mode. */
|
||||
if (data_mode == mode)
|
||||
{
|
||||
x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
|
||||
op_true, op_false);
|
||||
}
|
||||
else
|
||||
{
|
||||
gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
|
||||
x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
|
||||
op_true, op_false);
|
||||
if (GET_MODE (x) == mode)
|
||||
x = gen_lowpart (data_mode, x);
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Expand integer vector comparison. */
|
||||
|
||||
bool
|
||||
ix86_expand_int_vec_cmp (rtx operands[])
|
||||
{
|
||||
rtx_code code = GET_CODE (operands[1]);
|
||||
bool negate = false;
|
||||
rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
|
||||
operands[3], NULL, NULL, &negate);
|
||||
|
||||
if (!cmp)
|
||||
return false;
|
||||
|
||||
if (negate)
|
||||
cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
|
||||
CONST0_RTX (GET_MODE (cmp)),
|
||||
NULL, NULL, &negate);
|
||||
|
||||
gcc_assert (!negate);
|
||||
|
||||
if (operands[0] != cmp)
|
||||
emit_move_insn (operands[0], cmp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Expand a floating-point vector conditional move; a vcond operation
|
||||
rather than a movcc operation. */
|
||||
|
||||
|
@ -22957,149 +23291,11 @@ ix86_expand_int_vcond (rtx operands[])
|
|||
if (!general_operand (operands[2], data_mode))
|
||||
operands[2] = force_reg (data_mode, operands[2]);
|
||||
|
||||
/* XOP supports all of the comparisons on all 128-bit vector int types. */
|
||||
if (TARGET_XOP
|
||||
&& (mode == V16QImode || mode == V8HImode
|
||||
|| mode == V4SImode || mode == V2DImode))
|
||||
;
|
||||
else
|
||||
{
|
||||
/* Canonicalize the comparison to EQ, GT, GTU. */
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
case GT:
|
||||
case GTU:
|
||||
break;
|
||||
x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
|
||||
operands[1], operands[2], &negate);
|
||||
|
||||
case NE:
|
||||
case LE:
|
||||
case LEU:
|
||||
code = reverse_condition (code);
|
||||
negate = true;
|
||||
break;
|
||||
|
||||
case GE:
|
||||
case GEU:
|
||||
code = reverse_condition (code);
|
||||
negate = true;
|
||||
/* FALLTHRU */
|
||||
|
||||
case LT:
|
||||
case LTU:
|
||||
std::swap (cop0, cop1);
|
||||
code = swap_condition (code);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Only SSE4.1/SSE4.2 supports V2DImode. */
|
||||
if (mode == V2DImode)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
/* SSE4.1 supports EQ. */
|
||||
if (!TARGET_SSE4_1)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case GT:
|
||||
case GTU:
|
||||
/* SSE4.2 supports GT/GTU. */
|
||||
if (!TARGET_SSE4_2)
|
||||
return false;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Unsigned parallel compare is not supported by the hardware.
|
||||
Play some tricks to turn this into a signed comparison
|
||||
against 0. */
|
||||
if (code == GTU)
|
||||
{
|
||||
cop0 = force_reg (mode, cop0);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16SImode:
|
||||
case V8DImode:
|
||||
case V8SImode:
|
||||
case V4DImode:
|
||||
case V4SImode:
|
||||
case V2DImode:
|
||||
{
|
||||
rtx t1, t2, mask;
|
||||
rtx (*gen_sub3) (rtx, rtx, rtx);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16SImode: gen_sub3 = gen_subv16si3; break;
|
||||
case V8DImode: gen_sub3 = gen_subv8di3; break;
|
||||
case V8SImode: gen_sub3 = gen_subv8si3; break;
|
||||
case V4DImode: gen_sub3 = gen_subv4di3; break;
|
||||
case V4SImode: gen_sub3 = gen_subv4si3; break;
|
||||
case V2DImode: gen_sub3 = gen_subv2di3; break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
/* Subtract (-(INT MAX) - 1) from both operands to make
|
||||
them signed. */
|
||||
mask = ix86_build_signbit_mask (mode, true, false);
|
||||
t1 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_sub3 (t1, cop0, mask));
|
||||
|
||||
t2 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_sub3 (t2, cop1, mask));
|
||||
|
||||
cop0 = t1;
|
||||
cop1 = t2;
|
||||
code = GT;
|
||||
}
|
||||
break;
|
||||
|
||||
case V64QImode:
|
||||
case V32HImode:
|
||||
case V32QImode:
|
||||
case V16HImode:
|
||||
case V16QImode:
|
||||
case V8HImode:
|
||||
/* Perform a parallel unsigned saturating subtraction. */
|
||||
x = gen_reg_rtx (mode);
|
||||
emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
|
||||
|
||||
cop0 = x;
|
||||
cop1 = CONST0_RTX (mode);
|
||||
code = EQ;
|
||||
negate = !negate;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allow the comparison to be done in one mode, but the movcc to
|
||||
happen in another mode. */
|
||||
if (data_mode == mode)
|
||||
{
|
||||
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
|
||||
operands[1+negate], operands[2-negate]);
|
||||
}
|
||||
else
|
||||
{
|
||||
gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
|
||||
x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
|
||||
operands[1+negate], operands[2-negate]);
|
||||
if (GET_MODE (x) == mode)
|
||||
x = gen_lowpart (data_mode, x);
|
||||
}
|
||||
if (!x)
|
||||
return false;
|
||||
|
||||
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
|
||||
operands[2-negate]);
|
||||
|
@ -53085,6 +53281,28 @@ ix86_autovectorize_vector_sizes (void)
|
|||
(TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
|
||||
}
|
||||
|
||||
/* Implemenation of targetm.vectorize.get_mask_mode. */
|
||||
|
||||
static machine_mode
|
||||
ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
|
||||
{
|
||||
unsigned elem_size = vector_size / nunits;
|
||||
|
||||
/* Scalar mask case. */
|
||||
if (TARGET_AVX512F && vector_size == 64)
|
||||
{
|
||||
if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
|
||||
return smallest_mode_for_size (nunits, MODE_INT);
|
||||
}
|
||||
|
||||
machine_mode elem_mode
|
||||
= smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
|
||||
|
||||
gcc_assert (elem_size * nunits == vector_size);
|
||||
|
||||
return mode_for_vector (elem_mode, nunits);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Return class of registers which could be used for pseudo of MODE
|
||||
|
@ -54096,6 +54314,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
|
|||
#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
||||
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
|
||||
ix86_autovectorize_vector_sizes
|
||||
#undef TARGET_VECTORIZE_GET_MASK_MODE
|
||||
#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
|
||||
#undef TARGET_VECTORIZE_INIT_COST
|
||||
#define TARGET_VECTORIZE_INIT_COST ix86_init_cost
|
||||
#undef TARGET_VECTORIZE_ADD_STMT_COST
|
||||
|
|
|
@ -613,6 +613,15 @@
|
|||
(V16SF "HI") (V8SF "QI") (V4SF "QI")
|
||||
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
|
||||
|
||||
;; Mapping of vector modes to corresponding mask size
|
||||
(define_mode_attr avx512fmaskmodelower
|
||||
[(V64QI "di") (V32QI "si") (V16QI "hi")
|
||||
(V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
|
||||
(V16SI "hi") (V8SI "qi") (V4SI "qi")
|
||||
(V8DI "qi") (V4DI "qi") (V2DI "qi")
|
||||
(V16SF "hi") (V8SF "qi") (V4SF "qi")
|
||||
(V8DF "qi") (V4DF "qi") (V2DF "qi")])
|
||||
|
||||
;; Mapping of vector float modes to an integer mode of the same size
|
||||
(define_mode_attr sseintvecmode
|
||||
[(V16SF "V16SI") (V8DF "V8DI")
|
||||
|
@ -2811,6 +2820,150 @@
|
|||
(const_string "0")))
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
|
||||
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(match_operator:<avx512fmaskmode> 1 ""
|
||||
[(match_operand:V48_AVX512VL 2 "register_operand")
|
||||
(match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX512F"
|
||||
{
|
||||
bool ok = ix86_expand_mask_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
|
||||
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(match_operator:<avx512fmaskmode> 1 ""
|
||||
[(match_operand:VI12_AVX512VL 2 "register_operand")
|
||||
(match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX512BW"
|
||||
{
|
||||
bool ok = ix86_expand_mask_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmp<mode><sseintvecmodelower>"
|
||||
[(set (match_operand:<sseintvecmode> 0 "register_operand")
|
||||
(match_operator:<sseintvecmode> 1 ""
|
||||
[(match_operand:VI_256 2 "register_operand")
|
||||
(match_operand:VI_256 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
bool ok = ix86_expand_int_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmp<mode><sseintvecmodelower>"
|
||||
[(set (match_operand:<sseintvecmode> 0 "register_operand")
|
||||
(match_operator:<sseintvecmode> 1 ""
|
||||
[(match_operand:VI124_128 2 "register_operand")
|
||||
(match_operand:VI124_128 3 "nonimmediate_operand")]))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
bool ok = ix86_expand_int_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpv2div2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand")
|
||||
(match_operator:V2DI 1 ""
|
||||
[(match_operand:V2DI 2 "register_operand")
|
||||
(match_operand:V2DI 3 "nonimmediate_operand")]))]
|
||||
"TARGET_SSE4_2"
|
||||
{
|
||||
bool ok = ix86_expand_int_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmp<mode><sseintvecmodelower>"
|
||||
[(set (match_operand:<sseintvecmode> 0 "register_operand")
|
||||
(match_operator:<sseintvecmode> 1 ""
|
||||
[(match_operand:VF_256 2 "register_operand")
|
||||
(match_operand:VF_256 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
bool ok = ix86_expand_fp_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmp<mode><sseintvecmodelower>"
|
||||
[(set (match_operand:<sseintvecmode> 0 "register_operand")
|
||||
(match_operator:<sseintvecmode> 1 ""
|
||||
[(match_operand:VF_128 2 "register_operand")
|
||||
(match_operand:VF_128 3 "nonimmediate_operand")]))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
bool ok = ix86_expand_fp_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
|
||||
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(match_operator:<avx512fmaskmode> 1 ""
|
||||
[(match_operand:VI48_AVX512VL 2 "register_operand")
|
||||
(match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX512F"
|
||||
{
|
||||
bool ok = ix86_expand_mask_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
|
||||
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
|
||||
(match_operator:<avx512fmaskmode> 1 ""
|
||||
[(match_operand:VI12_AVX512VL 2 "register_operand")
|
||||
(match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX512BW"
|
||||
{
|
||||
bool ok = ix86_expand_mask_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpu<mode><sseintvecmodelower>"
|
||||
[(set (match_operand:<sseintvecmode> 0 "register_operand")
|
||||
(match_operator:<sseintvecmode> 1 ""
|
||||
[(match_operand:VI_256 2 "register_operand")
|
||||
(match_operand:VI_256 3 "nonimmediate_operand")]))]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
bool ok = ix86_expand_int_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpu<mode><sseintvecmodelower>"
|
||||
[(set (match_operand:<sseintvecmode> 0 "register_operand")
|
||||
(match_operator:<sseintvecmode> 1 ""
|
||||
[(match_operand:VI124_128 2 "register_operand")
|
||||
(match_operand:VI124_128 3 "nonimmediate_operand")]))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
bool ok = ix86_expand_int_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_cmpuv2div2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand")
|
||||
(match_operator:V2DI 1 ""
|
||||
[(match_operand:V2DI 2 "register_operand")
|
||||
(match_operand:V2DI 3 "nonimmediate_operand")]))]
|
||||
"TARGET_SSE4_2"
|
||||
{
|
||||
bool ok = ix86_expand_int_vec_cmp (operands);
|
||||
gcc_assert (ok);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vcond<V_512:mode><VF_512:mode>"
|
||||
[(set (match_operand:V_512 0 "register_operand")
|
||||
(if_then_else:V_512
|
||||
|
|
49
gcc/expr.c
49
gcc/expr.c
|
@ -11128,9 +11128,15 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
|
|||
if (TREE_CODE (ops->type) == VECTOR_TYPE)
|
||||
{
|
||||
tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
|
||||
tree if_true = constant_boolean_node (true, ops->type);
|
||||
tree if_false = constant_boolean_node (false, ops->type);
|
||||
return expand_vec_cond_expr (ops->type, ifexp, if_true, if_false, target);
|
||||
if (VECTOR_BOOLEAN_TYPE_P (ops->type))
|
||||
return expand_vec_cmp_expr (ops->type, ifexp, target);
|
||||
else
|
||||
{
|
||||
tree if_true = constant_boolean_node (true, ops->type);
|
||||
tree if_false = constant_boolean_node (false, ops->type);
|
||||
return expand_vec_cond_expr (ops->type, ifexp, if_true,
|
||||
if_false, target);
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the rtx comparison code to use. We know that EXP is a comparison
|
||||
|
@ -11417,6 +11423,40 @@ try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Return a CONST_VECTOR rtx representing vector mask for
|
||||
a VECTOR_CST of booleans. */
|
||||
static rtx
|
||||
const_vector_mask_from_tree (tree exp)
|
||||
{
|
||||
rtvec v;
|
||||
unsigned i;
|
||||
int units;
|
||||
tree elt;
|
||||
machine_mode inner, mode;
|
||||
|
||||
mode = TYPE_MODE (TREE_TYPE (exp));
|
||||
units = GET_MODE_NUNITS (mode);
|
||||
inner = GET_MODE_INNER (mode);
|
||||
|
||||
v = rtvec_alloc (units);
|
||||
|
||||
for (i = 0; i < VECTOR_CST_NELTS (exp); ++i)
|
||||
{
|
||||
elt = VECTOR_CST_ELT (exp, i);
|
||||
|
||||
gcc_assert (TREE_CODE (elt) == INTEGER_CST);
|
||||
if (integer_zerop (elt))
|
||||
RTVEC_ELT (v, i) = CONST0_RTX (inner);
|
||||
else if (integer_onep (elt)
|
||||
|| integer_minus_onep (elt))
|
||||
RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
return gen_rtx_CONST_VECTOR (mode, v);
|
||||
}
|
||||
|
||||
/* Return a CONST_VECTOR rtx for a VECTOR_CST tree. */
|
||||
static rtx
|
||||
const_vector_from_tree (tree exp)
|
||||
|
@ -11432,6 +11472,9 @@ const_vector_from_tree (tree exp)
|
|||
if (initializer_zerop (exp))
|
||||
return CONST0_RTX (mode);
|
||||
|
||||
if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
|
||||
return const_vector_mask_from_tree (exp);
|
||||
|
||||
units = GET_MODE_NUNITS (mode);
|
||||
inner = GET_MODE_INNER (mode);
|
||||
|
||||
|
|
|
@ -74,6 +74,16 @@ trapv_binoptab_p (optab binoptab)
|
|||
|| binoptab == smulv_optab);
|
||||
}
|
||||
|
||||
/* Return insn code for a comparison operator with VMODE
|
||||
resultin MASK_MODE, unsigned if UNS is true. */
|
||||
|
||||
static inline enum insn_code
|
||||
get_vec_cmp_icode (machine_mode vmode, machine_mode mask_mode, bool uns)
|
||||
{
|
||||
optab tab = uns ? vec_cmpu_optab : vec_cmp_optab;
|
||||
return convert_optab_handler (tab, vmode, mask_mode);
|
||||
}
|
||||
|
||||
/* Return insn code for a conditional operator with a comparison in
|
||||
mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE. */
|
||||
|
||||
|
|
|
@ -320,6 +320,19 @@ supportable_convert_operation (enum tree_code code,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Return TRUE if appropriate vector insn is available
|
||||
for vector comparison expr with vector type VALUE_TYPE
|
||||
and resulting mask with MASK_TYPE. */
|
||||
|
||||
bool
|
||||
expand_vec_cmp_expr_p (tree value_type, tree mask_type)
|
||||
{
|
||||
enum insn_code icode = get_vec_cmp_icode (TYPE_MODE (value_type),
|
||||
TYPE_MODE (mask_type),
|
||||
TYPE_UNSIGNED (value_type));
|
||||
return (icode != CODE_FOR_nothing);
|
||||
}
|
||||
|
||||
/* Return TRUE iff, appropriate vector insns are available
|
||||
for vector cond expr with vector type VALUE_TYPE and a comparison
|
||||
with operand vector types in CMP_OP_TYPE. */
|
||||
|
|
|
@ -39,6 +39,7 @@ optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype);
|
|||
optab scalar_reduc_to_vector (optab, const_tree);
|
||||
bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
|
||||
enum tree_code *);
|
||||
bool expand_vec_cmp_expr_p (tree, tree);
|
||||
bool expand_vec_cond_expr_p (tree, tree);
|
||||
void init_tree_optimization_optabs (tree);
|
||||
|
||||
|
|
44
gcc/optabs.c
44
gcc/optabs.c
|
@ -5180,11 +5180,13 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
|
|||
}
|
||||
|
||||
/* Return comparison rtx for COND. Use UNSIGNEDP to select signed or
|
||||
unsigned operators. Do not generate compare instruction. */
|
||||
unsigned operators. OPNO holds an index of the first comparison
|
||||
operand in insn with code ICODE. Do not generate compare instruction. */
|
||||
|
||||
static rtx
|
||||
vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
|
||||
bool unsignedp, enum insn_code icode)
|
||||
bool unsignedp, enum insn_code icode,
|
||||
unsigned int opno)
|
||||
{
|
||||
struct expand_operand ops[2];
|
||||
rtx rtx_op0, rtx_op1;
|
||||
|
@ -5210,7 +5212,7 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
|
|||
|
||||
create_input_operand (&ops[0], rtx_op0, m0);
|
||||
create_input_operand (&ops[1], rtx_op1, m1);
|
||||
if (!maybe_legitimize_operands (icode, 4, 2, ops))
|
||||
if (!maybe_legitimize_operands (icode, opno, 2, ops))
|
||||
gcc_unreachable ();
|
||||
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
|
||||
}
|
||||
|
@ -5465,7 +5467,7 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
|
|||
if (icode == CODE_FOR_nothing)
|
||||
return 0;
|
||||
|
||||
comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode);
|
||||
comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, 4);
|
||||
rtx_op1 = expand_normal (op1);
|
||||
rtx_op2 = expand_normal (op2);
|
||||
|
||||
|
@ -5479,6 +5481,40 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
|
|||
return ops[0].value;
|
||||
}
|
||||
|
||||
/* Generate insns for a vector comparison into a mask. */
|
||||
|
||||
rtx
|
||||
expand_vec_cmp_expr (tree type, tree exp, rtx target)
|
||||
{
|
||||
struct expand_operand ops[4];
|
||||
enum insn_code icode;
|
||||
rtx comparison;
|
||||
machine_mode mask_mode = TYPE_MODE (type);
|
||||
machine_mode vmode;
|
||||
bool unsignedp;
|
||||
tree op0a, op0b;
|
||||
enum tree_code tcode;
|
||||
|
||||
op0a = TREE_OPERAND (exp, 0);
|
||||
op0b = TREE_OPERAND (exp, 1);
|
||||
tcode = TREE_CODE (exp);
|
||||
|
||||
unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
|
||||
vmode = TYPE_MODE (TREE_TYPE (op0a));
|
||||
|
||||
icode = get_vec_cmp_icode (vmode, mask_mode, unsignedp);
|
||||
if (icode == CODE_FOR_nothing)
|
||||
return 0;
|
||||
|
||||
comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, 2);
|
||||
create_output_operand (&ops[0], target, mask_mode);
|
||||
create_fixed_operand (&ops[1], comparison);
|
||||
create_fixed_operand (&ops[2], XEXP (comparison, 0));
|
||||
create_fixed_operand (&ops[3], XEXP (comparison, 1));
|
||||
expand_insn (icode, 4, ops);
|
||||
return ops[0].value;
|
||||
}
|
||||
|
||||
/* Expand a highpart multiply. */
|
||||
|
||||
rtx
|
||||
|
|
|
@ -61,6 +61,8 @@ OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b")
|
|||
OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b")
|
||||
OPTAB_CD(vcond_optab, "vcond$a$b")
|
||||
OPTAB_CD(vcondu_optab, "vcondu$a$b")
|
||||
OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b")
|
||||
OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
|
||||
|
||||
OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
|
||||
OPTAB_NX(add_optab, "add$F$a3")
|
||||
|
|
|
@ -304,6 +304,9 @@ extern rtx_insn *gen_cond_trap (enum rtx_code, rtx, rtx, rtx);
|
|||
/* Generate code for VEC_PERM_EXPR. */
|
||||
extern rtx expand_vec_perm (machine_mode, rtx, rtx, rtx, rtx);
|
||||
|
||||
/* Generate code for vector comparison. */
|
||||
extern rtx expand_vec_cmp_expr (tree, tree, rtx);
|
||||
|
||||
/* Generate code for VEC_COND_EXPR. */
|
||||
extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
|
||||
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2015-11-10 Ilya Enkovich <enkovich.gnu@gmail.com>
|
||||
|
||||
* gcc.dg/vect/slp-cond-5.c: New test.
|
||||
|
||||
2015-11-10 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/68240
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
/* { dg-require-effective-target vect_condition } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
static inline int
|
||||
foo (int x, int y, int a, int b)
|
||||
{
|
||||
if (x >= y && a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar (int * __restrict__ a, int * __restrict__ b,
|
||||
int * __restrict__ c, int * __restrict__ d,
|
||||
int * __restrict__ e, int w)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N/16; i++, a += 16, b += 16, c += 16, d += 16, e += 16)
|
||||
{
|
||||
e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
|
||||
e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
|
||||
e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
|
||||
e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
|
||||
e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
|
||||
e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
|
||||
e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
|
||||
e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
|
||||
e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
|
||||
e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
|
||||
e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
|
||||
e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
|
||||
e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
|
||||
e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
|
||||
e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
|
||||
e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int a[N], b[N], c[N], d[N], e[N];
|
||||
|
||||
int main ()
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = 5;
|
||||
e[i] = 0;
|
||||
|
||||
switch (i % 9)
|
||||
{
|
||||
case 0: asm (""); c[i] = i; d[i] = i + 1; break;
|
||||
case 1: c[i] = 0; d[i] = 0; break;
|
||||
case 2: c[i] = i + 1; d[i] = i - 1; break;
|
||||
case 3: c[i] = i; d[i] = i + 7; break;
|
||||
case 4: c[i] = i; d[i] = i; break;
|
||||
case 5: c[i] = i + 16; d[i] = i + 3; break;
|
||||
case 6: c[i] = i - 5; d[i] = i; break;
|
||||
case 7: c[i] = i; d[i] = i; break;
|
||||
case 8: c[i] = i; d[i] = i - 7; break;
|
||||
}
|
||||
}
|
||||
|
||||
bar (a, b, c, d, e, 2);
|
||||
for (i = 0; i < N; i++)
|
||||
if (e[i] != ((i % 3) == 0 || i <= 5 ? 10 : 2 * i))
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
|
||||
|
|
@ -3863,6 +3863,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
|
|||
case vect_scalar_var:
|
||||
prefix = "stmp";
|
||||
break;
|
||||
case vect_mask_var:
|
||||
prefix = "mask";
|
||||
break;
|
||||
case vect_pointer_var:
|
||||
prefix = "vectp";
|
||||
break;
|
||||
|
@ -4452,7 +4455,11 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
|
|||
tree type;
|
||||
enum vect_var_kind kind;
|
||||
|
||||
kind = vectype ? vect_simple_var : vect_scalar_var;
|
||||
kind = vectype
|
||||
? VECTOR_BOOLEAN_TYPE_P (vectype)
|
||||
? vect_mask_var
|
||||
: vect_simple_var
|
||||
: vect_scalar_var;
|
||||
type = vectype ? vectype : TREE_TYPE (scalar_dest);
|
||||
|
||||
gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
|
||||
|
|
|
@ -346,7 +346,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
|
|||
tree op1, enum tree_code code)
|
||||
{
|
||||
tree t;
|
||||
if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
|
||||
if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type)
|
||||
&& !expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
|
||||
t = expand_vector_piecewise (gsi, do_compare, type,
|
||||
TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
|
||||
else
|
||||
|
|
|
@ -178,19 +178,21 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
{
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
|
||||
int nbbs = loop->num_nodes;
|
||||
unsigned nbbs = loop->num_nodes;
|
||||
unsigned int vectorization_factor = 0;
|
||||
tree scalar_type;
|
||||
gphi *phi;
|
||||
tree vectype;
|
||||
unsigned int nunits;
|
||||
stmt_vec_info stmt_info;
|
||||
int i;
|
||||
unsigned i;
|
||||
HOST_WIDE_INT dummy;
|
||||
gimple *stmt, *pattern_stmt = NULL;
|
||||
gimple_seq pattern_def_seq = NULL;
|
||||
gimple_stmt_iterator pattern_def_si = gsi_none ();
|
||||
bool analyze_pattern_stmt = false;
|
||||
bool bool_result;
|
||||
auto_vec<stmt_vec_info> mask_producers;
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
|
@ -409,6 +411,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool_result = false;
|
||||
|
||||
if (STMT_VINFO_VECTYPE (stmt_info))
|
||||
{
|
||||
/* The only case when a vectype had been already set is for stmts
|
||||
|
@ -429,6 +433,32 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
|
||||
else
|
||||
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
|
||||
|
||||
/* Bool ops don't participate in vectorization factor
|
||||
computation. For comparison use compared types to
|
||||
compute a factor. */
|
||||
if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
|
||||
{
|
||||
mask_producers.safe_push (stmt_info);
|
||||
bool_result = true;
|
||||
|
||||
if (gimple_code (stmt) == GIMPLE_ASSIGN
|
||||
&& TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
|
||||
== tcc_comparison
|
||||
&& TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
|
||||
!= BOOLEAN_TYPE)
|
||||
scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
|
||||
else
|
||||
{
|
||||
if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
|
||||
{
|
||||
pattern_def_seq = NULL;
|
||||
gsi_next (&si);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
|
@ -451,7 +481,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
return false;
|
||||
}
|
||||
|
||||
STMT_VINFO_VECTYPE (stmt_info) = vectype;
|
||||
if (!bool_result)
|
||||
STMT_VINFO_VECTYPE (stmt_info) = vectype;
|
||||
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
|
@ -464,8 +495,9 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
/* The vectorization factor is according to the smallest
|
||||
scalar type (or the largest vector size, but we only
|
||||
support one vector size per loop). */
|
||||
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
|
||||
&dummy);
|
||||
if (!bool_result)
|
||||
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
|
||||
&dummy);
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
|
@ -540,6 +572,99 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
}
|
||||
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
|
||||
|
||||
for (i = 0; i < mask_producers.length (); i++)
|
||||
{
|
||||
tree mask_type = NULL;
|
||||
|
||||
stmt = STMT_VINFO_STMT (mask_producers[i]);
|
||||
|
||||
if (gimple_code (stmt) == GIMPLE_ASSIGN
|
||||
&& TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
|
||||
&& TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
|
||||
{
|
||||
scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
|
||||
mask_type = get_mask_type_for_scalar_type (scalar_type);
|
||||
|
||||
if (!mask_type)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: unsupported mask\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tree rhs;
|
||||
ssa_op_iter iter;
|
||||
gimple *def_stmt;
|
||||
enum vect_def_type dt;
|
||||
|
||||
FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
|
||||
{
|
||||
if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
|
||||
&def_stmt, &dt, &vectype))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: can't compute mask type "
|
||||
"for statement, ");
|
||||
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
|
||||
0);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* No vectype probably means external definition.
|
||||
Allow it in case there is another operand which
|
||||
allows to determine mask type. */
|
||||
if (!vectype)
|
||||
continue;
|
||||
|
||||
if (!mask_type)
|
||||
mask_type = vectype;
|
||||
else if (TYPE_VECTOR_SUBPARTS (mask_type)
|
||||
!= TYPE_VECTOR_SUBPARTS (vectype))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: different sized masks "
|
||||
"types in statement, ");
|
||||
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
|
||||
mask_type);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
|
||||
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
|
||||
vectype);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* No mask_type should mean loop invariant predicate.
|
||||
This is probably a subject for optimization in
|
||||
if-conversion. */
|
||||
if (!mask_type)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: can't compute mask type "
|
||||
"for statement, ");
|
||||
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
|
||||
0);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -2849,7 +2849,9 @@ vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
|
||||
|
||||
/* Helper function of vect_recog_bool_pattern. Called recursively, return
|
||||
true if bool VAR can be optimized that way. */
|
||||
true if bool VAR can and should be optimized that way. Assume it shouldn't
|
||||
in case it's a result of a comparison which can be directly vectorized into
|
||||
a vector comparison. */
|
||||
|
||||
static bool
|
||||
check_bool_pattern (tree var, vec_info *vinfo)
|
||||
|
@ -2898,7 +2900,7 @@ check_bool_pattern (tree var, vec_info *vinfo)
|
|||
default:
|
||||
if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
|
||||
{
|
||||
tree vecitype, comp_vectype;
|
||||
tree vecitype, comp_vectype, mask_type;
|
||||
|
||||
/* If the comparison can throw, then is_gimple_condexpr will be
|
||||
false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
|
||||
|
@ -2909,6 +2911,11 @@ check_bool_pattern (tree var, vec_info *vinfo)
|
|||
if (comp_vectype == NULL_TREE)
|
||||
return false;
|
||||
|
||||
mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
|
||||
if (mask_type
|
||||
&& expand_vec_cmp_expr_p (comp_vectype, mask_type))
|
||||
return false;
|
||||
|
||||
if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
|
||||
{
|
||||
machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
|
||||
|
@ -3133,6 +3140,73 @@ adjust_bool_pattern (tree var, tree out_type, tree trueval,
|
|||
}
|
||||
|
||||
|
||||
/* Return the proper type for converting bool VAR into
|
||||
an integer value or NULL_TREE if no such type exists.
|
||||
The type is chosen so that converted value has the
|
||||
same number of elements as VAR's vector type. */
|
||||
|
||||
static tree
|
||||
search_type_for_mask (tree var, vec_info *vinfo)
|
||||
{
|
||||
gimple *def_stmt;
|
||||
enum vect_def_type dt;
|
||||
tree rhs1;
|
||||
enum tree_code rhs_code;
|
||||
tree res = NULL_TREE;
|
||||
|
||||
if (TREE_CODE (var) != SSA_NAME)
|
||||
return NULL_TREE;
|
||||
|
||||
if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
|
||||
|| !TYPE_UNSIGNED (TREE_TYPE (var)))
|
||||
&& TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
|
||||
return NULL_TREE;
|
||||
|
||||
if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
|
||||
return NULL_TREE;
|
||||
|
||||
if (dt != vect_internal_def)
|
||||
return NULL_TREE;
|
||||
|
||||
if (!is_gimple_assign (def_stmt))
|
||||
return NULL_TREE;
|
||||
|
||||
rhs_code = gimple_assign_rhs_code (def_stmt);
|
||||
rhs1 = gimple_assign_rhs1 (def_stmt);
|
||||
|
||||
switch (rhs_code)
|
||||
{
|
||||
case SSA_NAME:
|
||||
case BIT_NOT_EXPR:
|
||||
CASE_CONVERT:
|
||||
res = search_type_for_mask (rhs1, vinfo);
|
||||
break;
|
||||
|
||||
case BIT_AND_EXPR:
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
if (!(res = search_type_for_mask (rhs1, vinfo)))
|
||||
res = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
|
||||
{
|
||||
if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
|
||||
|| !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
|
||||
{
|
||||
machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
|
||||
res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
|
||||
}
|
||||
else
|
||||
res = TREE_TYPE (rhs1);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_recog_bool_pattern
|
||||
|
||||
Try to find pattern like following:
|
||||
|
@ -3190,6 +3264,7 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
enum tree_code rhs_code;
|
||||
tree var, lhs, rhs, vectype;
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
|
||||
stmt_vec_info new_stmt_info;
|
||||
vec_info *vinfo = stmt_vinfo->vinfo;
|
||||
gimple *pattern_stmt;
|
||||
|
||||
|
@ -3214,16 +3289,52 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
if (vectype == NULL_TREE)
|
||||
return NULL;
|
||||
|
||||
if (!check_bool_pattern (var, vinfo))
|
||||
return NULL;
|
||||
|
||||
rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
|
||||
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
|
||||
if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
|
||||
pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
|
||||
if (check_bool_pattern (var, vinfo))
|
||||
{
|
||||
rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
|
||||
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
|
||||
if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
|
||||
pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
|
||||
else
|
||||
pattern_stmt
|
||||
= gimple_build_assign (lhs, NOP_EXPR, rhs);
|
||||
}
|
||||
else
|
||||
pattern_stmt
|
||||
= gimple_build_assign (lhs, NOP_EXPR, rhs);
|
||||
{
|
||||
tree type = search_type_for_mask (var, vinfo);
|
||||
tree cst0, cst1, cmp, tmp;
|
||||
|
||||
if (!type)
|
||||
return NULL;
|
||||
|
||||
/* We may directly use cond with narrowed type to avoid
|
||||
multiple cond exprs with following result packing and
|
||||
perform single cond with packed mask instead. In case
|
||||
of widening we better make cond first and then extract
|
||||
results. */
|
||||
if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
|
||||
type = TREE_TYPE (lhs);
|
||||
|
||||
cst0 = build_int_cst (type, 0);
|
||||
cst1 = build_int_cst (type, 1);
|
||||
tmp = vect_recog_temp_ssa_var (type, NULL);
|
||||
cmp = build2 (NE_EXPR, boolean_type_node,
|
||||
var, build_int_cst (TREE_TYPE (var), 0));
|
||||
pattern_stmt = gimple_build_assign (tmp, COND_EXPR, cmp, cst1, cst0);
|
||||
|
||||
if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
|
||||
{
|
||||
tree new_vectype = get_vectype_for_scalar_type (type);
|
||||
new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
|
||||
set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
|
||||
STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
|
||||
new_pattern_def_seq (stmt_vinfo, pattern_stmt);
|
||||
|
||||
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
|
||||
pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
*type_out = vectype;
|
||||
*type_in = vectype;
|
||||
stmts->safe_push (last_stmt);
|
||||
|
@ -3252,15 +3363,19 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
if (get_vectype_for_scalar_type (type) == NULL_TREE)
|
||||
return NULL;
|
||||
|
||||
if (!check_bool_pattern (var, vinfo))
|
||||
return NULL;
|
||||
if (check_bool_pattern (var, vinfo))
|
||||
{
|
||||
rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
|
||||
rhs = build2 (NE_EXPR, boolean_type_node,
|
||||
rhs, build_int_cst (type, 0));
|
||||
}
|
||||
else
|
||||
rhs = build2 (NE_EXPR, boolean_type_node,
|
||||
var, build_int_cst (TREE_TYPE (var), 0)),
|
||||
|
||||
rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
|
||||
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
|
||||
pattern_stmt
|
||||
= gimple_build_assign (lhs, COND_EXPR,
|
||||
build2 (NE_EXPR, boolean_type_node,
|
||||
rhs, build_int_cst (type, 0)),
|
||||
= gimple_build_assign (lhs, COND_EXPR, rhs,
|
||||
gimple_assign_rhs2 (last_stmt),
|
||||
gimple_assign_rhs3 (last_stmt));
|
||||
*type_out = vectype;
|
||||
|
@ -3280,16 +3395,43 @@ vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
|
|||
gcc_assert (vectype != NULL_TREE);
|
||||
if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
|
||||
return NULL;
|
||||
if (!check_bool_pattern (var, vinfo))
|
||||
return NULL;
|
||||
|
||||
rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
|
||||
if (check_bool_pattern (var, vinfo))
|
||||
rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
|
||||
NULL_TREE, stmts);
|
||||
else
|
||||
{
|
||||
tree type = search_type_for_mask (var, vinfo);
|
||||
tree cst0, cst1, cmp, new_vectype;
|
||||
|
||||
if (!type)
|
||||
return NULL;
|
||||
|
||||
if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
|
||||
type = TREE_TYPE (vectype);
|
||||
|
||||
cst0 = build_int_cst (type, 0);
|
||||
cst1 = build_int_cst (type, 1);
|
||||
new_vectype = get_vectype_for_scalar_type (type);
|
||||
|
||||
rhs = vect_recog_temp_ssa_var (type, NULL);
|
||||
cmp = build2 (NE_EXPR, boolean_type_node,
|
||||
var, build_int_cst (TREE_TYPE (var), 0));
|
||||
pattern_stmt = gimple_build_assign (rhs, COND_EXPR,
|
||||
cmp, cst1, cst0);
|
||||
|
||||
pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
|
||||
set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
|
||||
STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
|
||||
append_pattern_def_seq (stmt_vinfo, pattern_stmt);
|
||||
}
|
||||
|
||||
lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
|
||||
if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
|
||||
{
|
||||
tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
|
||||
gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
|
||||
new_pattern_def_seq (stmt_vinfo, cast_stmt);
|
||||
append_pattern_def_seq (stmt_vinfo, cast_stmt);
|
||||
rhs = rhs2;
|
||||
}
|
||||
pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
|
||||
|
|
|
@ -792,6 +792,7 @@ vect_build_slp_tree_1 (vec_info *vinfo,
|
|||
if (TREE_CODE_CLASS (rhs_code) != tcc_binary
|
||||
&& TREE_CODE_CLASS (rhs_code) != tcc_unary
|
||||
&& TREE_CODE_CLASS (rhs_code) != tcc_expression
|
||||
&& TREE_CODE_CLASS (rhs_code) != tcc_comparison
|
||||
&& rhs_code != CALL_EXPR)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
|
@ -2640,7 +2641,14 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|||
struct loop *loop;
|
||||
gimple_seq ctor_seq = NULL;
|
||||
|
||||
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
/* Check if vector type is a boolean vector. */
|
||||
if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
|
||||
&& (VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo))
|
||||
|| (code == COND_EXPR && op_num < 2)))
|
||||
vector_type
|
||||
= build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
|
||||
else
|
||||
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
|
||||
|
@ -2812,8 +2820,21 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|||
{
|
||||
if (CONSTANT_CLASS_P (op))
|
||||
{
|
||||
op = fold_unary (VIEW_CONVERT_EXPR,
|
||||
TREE_TYPE (vector_type), op);
|
||||
if (VECTOR_BOOLEAN_TYPE_P (vector_type))
|
||||
{
|
||||
/* Can't use VIEW_CONVERT_EXPR for booleans because
|
||||
of possibly different sizes of scalar value and
|
||||
vector element. */
|
||||
if (integer_zerop (op))
|
||||
op = build_int_cst (TREE_TYPE (vector_type), 0);
|
||||
else if (integer_onep (op))
|
||||
op = build_int_cst (TREE_TYPE (vector_type), 1);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
op = fold_unary (VIEW_CONVERT_EXPR,
|
||||
TREE_TYPE (vector_type), op);
|
||||
gcc_assert (op && CONSTANT_CLASS_P (op));
|
||||
}
|
||||
else
|
||||
|
|
|
@ -1300,7 +1300,7 @@ vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
|
|||
if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
|
||||
{
|
||||
if (CONSTANT_CLASS_P (val))
|
||||
val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
|
||||
val = fold_convert (TREE_TYPE (type), val);
|
||||
else
|
||||
{
|
||||
new_temp = make_ssa_name (TREE_TYPE (type));
|
||||
|
@ -1328,16 +1328,18 @@ vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
|
|||
STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
|
||||
|
||||
In case OP is an invariant or constant, a new stmt that creates a vector def
|
||||
needs to be introduced. */
|
||||
needs to be introduced. VECTYPE may be used to specify a required type for
|
||||
vector invariant. */
|
||||
|
||||
tree
|
||||
vect_get_vec_def_for_operand (tree op, gimple *stmt)
|
||||
vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
|
||||
{
|
||||
tree vec_oprnd;
|
||||
gimple *vec_stmt;
|
||||
gimple *def_stmt;
|
||||
stmt_vec_info def_stmt_info = NULL;
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
|
||||
tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
enum vect_def_type dt;
|
||||
bool is_simple_use;
|
||||
|
@ -1372,7 +1374,14 @@ vect_get_vec_def_for_operand (tree op, gimple *stmt)
|
|||
case vect_constant_def:
|
||||
case vect_external_def:
|
||||
{
|
||||
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
if (vectype)
|
||||
vector_type = vectype;
|
||||
else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
|
||||
&& VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
|
||||
vector_type = build_same_sized_truth_vector_type (stmt_vectype);
|
||||
else
|
||||
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
||||
|
||||
gcc_assert (vector_type);
|
||||
return vect_init_vector (stmt, op, vector_type, NULL);
|
||||
}
|
||||
|
@ -7329,13 +7338,14 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
gimple *gtemp;
|
||||
vec_cond_lhs =
|
||||
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
|
||||
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
|
||||
stmt, comp_vectype);
|
||||
vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
|
||||
loop_vinfo, >emp, &dts[0]);
|
||||
|
||||
vec_cond_rhs =
|
||||
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
|
||||
stmt);
|
||||
stmt, comp_vectype);
|
||||
vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
|
||||
loop_vinfo, >emp, &dts[1]);
|
||||
if (reduc_index == 1)
|
||||
|
@ -7416,6 +7426,185 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* vectorizable_comparison.
|
||||
|
||||
Check if STMT is comparison expression that can be vectorized.
|
||||
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
|
||||
comparison, put it in VEC_STMT, and insert it at GSI.
|
||||
|
||||
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
|
||||
|
||||
bool
|
||||
vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, tree reduc_def,
|
||||
slp_tree slp_node)
|
||||
{
|
||||
tree lhs, rhs1, rhs2;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
|
||||
tree new_temp;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
|
||||
unsigned nunits;
|
||||
int ncopies;
|
||||
enum tree_code code;
|
||||
stmt_vec_info prev_stmt_info = NULL;
|
||||
int i, j;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
vec<tree> vec_oprnds0 = vNULL;
|
||||
vec<tree> vec_oprnds1 = vNULL;
|
||||
gimple *def_stmt;
|
||||
tree mask_type;
|
||||
tree mask;
|
||||
|
||||
if (!VECTOR_BOOLEAN_TYPE_P (vectype))
|
||||
return false;
|
||||
|
||||
mask_type = vectype;
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (slp_node || PURE_SLP_STMT (stmt_info))
|
||||
ncopies = 1;
|
||||
else
|
||||
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
|
||||
|
||||
gcc_assert (ncopies >= 1);
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
|
||||
&& !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|
||||
&& reduc_def))
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_LIVE_P (stmt_info))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"value used after loop.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!is_gimple_assign (stmt))
|
||||
return false;
|
||||
|
||||
code = gimple_assign_rhs_code (stmt);
|
||||
|
||||
if (TREE_CODE_CLASS (code) != tcc_comparison)
|
||||
return false;
|
||||
|
||||
rhs1 = gimple_assign_rhs1 (stmt);
|
||||
rhs2 = gimple_assign_rhs2 (stmt);
|
||||
|
||||
if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
|
||||
&dts[0], &vectype1))
|
||||
return false;
|
||||
|
||||
if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
|
||||
&dts[1], &vectype2))
|
||||
return false;
|
||||
|
||||
if (vectype1 && vectype2
|
||||
&& TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
|
||||
return false;
|
||||
|
||||
vectype = vectype1 ? vectype1 : vectype2;
|
||||
|
||||
/* Invariant comparison. */
|
||||
if (!vectype)
|
||||
{
|
||||
vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
|
||||
if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
|
||||
return false;
|
||||
}
|
||||
else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
|
||||
return false;
|
||||
|
||||
if (!vec_stmt)
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
|
||||
vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
|
||||
return expand_vec_cmp_expr_p (vectype, mask_type);
|
||||
}
|
||||
|
||||
/* Transform. */
|
||||
if (!slp_node)
|
||||
{
|
||||
vec_oprnds0.create (1);
|
||||
vec_oprnds1.create (1);
|
||||
}
|
||||
|
||||
/* Handle def. */
|
||||
lhs = gimple_assign_lhs (stmt);
|
||||
mask = vect_create_destination_var (lhs, mask_type);
|
||||
|
||||
/* Handle cmp expr. */
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
gassign *new_stmt = NULL;
|
||||
if (j == 0)
|
||||
{
|
||||
if (slp_node)
|
||||
{
|
||||
auto_vec<tree, 2> ops;
|
||||
auto_vec<vec<tree>, 2> vec_defs;
|
||||
|
||||
ops.safe_push (rhs1);
|
||||
ops.safe_push (rhs2);
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
|
||||
vec_oprnds1 = vec_defs.pop ();
|
||||
vec_oprnds0 = vec_defs.pop ();
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, NULL);
|
||||
vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
|
||||
vec_oprnds0.pop ());
|
||||
vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
|
||||
vec_oprnds1.pop ());
|
||||
}
|
||||
|
||||
if (!slp_node)
|
||||
{
|
||||
vec_oprnds0.quick_push (vec_rhs1);
|
||||
vec_oprnds1.quick_push (vec_rhs2);
|
||||
}
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
|
||||
{
|
||||
vec_rhs2 = vec_oprnds1[i];
|
||||
|
||||
new_temp = make_ssa_name (mask);
|
||||
new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
if (slp_node)
|
||||
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
|
||||
}
|
||||
|
||||
if (slp_node)
|
||||
continue;
|
||||
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
|
||||
vec_oprnds0.release ();
|
||||
vec_oprnds1.release ();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Make sure the statement is vectorizable. */
|
||||
|
||||
|
@ -7619,7 +7808,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
|
|||
|| vectorizable_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_store (stmt, NULL, NULL, node)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL, node)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
|
||||
|| vectorizable_comparison (stmt, NULL, NULL, NULL, node));
|
||||
else
|
||||
{
|
||||
if (bb_vinfo)
|
||||
|
@ -7631,7 +7821,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
|
|||
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
|
||||
|| vectorizable_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_store (stmt, NULL, NULL, node)
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
|
||||
|| vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
|
||||
|| vectorizable_comparison (stmt, NULL, NULL, NULL, node));
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
|
@ -7747,6 +7938,11 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case comparison_vec_info_type:
|
||||
done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
case call_vec_info_type:
|
||||
done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
|
||||
stmt = gsi_stmt (*gsi);
|
||||
|
@ -8080,6 +8276,23 @@ get_vectype_for_scalar_type (tree scalar_type)
|
|||
return vectype;
|
||||
}
|
||||
|
||||
/* Function get_mask_type_for_scalar_type.
|
||||
|
||||
Returns the mask type corresponding to a result of comparison
|
||||
of vectors of specified SCALAR_TYPE as supported by target. */
|
||||
|
||||
tree
|
||||
get_mask_type_for_scalar_type (tree scalar_type)
|
||||
{
|
||||
tree vectype = get_vectype_for_scalar_type (scalar_type);
|
||||
|
||||
if (!vectype)
|
||||
return NULL;
|
||||
|
||||
return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
|
||||
current_vector_size);
|
||||
}
|
||||
|
||||
/* Function get_same_sized_vectype
|
||||
|
||||
Returns a vector type corresponding to SCALAR_TYPE of size
|
||||
|
|
|
@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see
|
|||
enum vect_var_kind {
|
||||
vect_simple_var,
|
||||
vect_pointer_var,
|
||||
vect_scalar_var
|
||||
vect_scalar_var,
|
||||
vect_mask_var
|
||||
};
|
||||
|
||||
/* Defines type of operation. */
|
||||
|
@ -420,6 +421,7 @@ enum stmt_vec_info_type {
|
|||
call_simd_clone_vec_info_type,
|
||||
assignment_vec_info_type,
|
||||
condition_vec_info_type,
|
||||
comparison_vec_info_type,
|
||||
reduc_vec_info_type,
|
||||
induc_vec_info_type,
|
||||
type_promotion_vec_info_type,
|
||||
|
@ -944,6 +946,7 @@ extern bool vect_can_advance_ivs_p (loop_vec_info);
|
|||
/* In tree-vect-stmts.c. */
|
||||
extern unsigned int current_vector_size;
|
||||
extern tree get_vectype_for_scalar_type (tree);
|
||||
extern tree get_mask_type_for_scalar_type (tree);
|
||||
extern tree get_same_sized_vectype (tree, tree);
|
||||
extern bool vect_is_simple_use (tree, vec_info *, gimple **,
|
||||
enum vect_def_type *);
|
||||
|
@ -975,7 +978,7 @@ extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
|
|||
extern void vect_finish_stmt_generation (gimple *, gimple *,
|
||||
gimple_stmt_iterator *);
|
||||
extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
|
||||
extern tree vect_get_vec_def_for_operand (tree, gimple *);
|
||||
extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL);
|
||||
extern tree vect_init_vector (gimple *, tree, tree,
|
||||
gimple_stmt_iterator *);
|
||||
extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree);
|
||||
|
@ -985,6 +988,8 @@ extern void vect_remove_stores (gimple *);
|
|||
extern bool vect_analyze_stmt (gimple *, bool *, slp_tree);
|
||||
extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *,
|
||||
gimple **, tree, int, slp_tree);
|
||||
extern bool vectorizable_comparison (gimple *, gimple_stmt_iterator *,
|
||||
gimple **, tree, int, slp_tree);
|
||||
extern void vect_get_load_cost (struct data_reference *, int, bool,
|
||||
unsigned int *, unsigned int *,
|
||||
stmt_vector_for_cost *,
|
||||
|
|
Loading…
Reference in New Issue