re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-11-27 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * tree-ssa-forwprop.c (simplify_vector_constructor): Handle CTORs with just a subset of the original vectors. * gcc.target/i386/pr92645-2.c: New testcase. * gcc.target/i386/pr92645-3.c: Likewise. From-SVN: r278758
This commit is contained in:
parent
cd3f923b0d
commit
1fa715db54
@ -1,3 +1,9 @@
|
||||
2019-11-27 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92645
|
||||
* tree-ssa-forwprop.c (simplify_vector_constructor): Handle
|
||||
CTORs with just a subset of the original vectors.
|
||||
|
||||
2019-11-27 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/92674
|
||||
|
@ -1,3 +1,9 @@
|
||||
2019-11-27 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92645
|
||||
* gcc.target/i386/pr92645-2.c: New testcase.
|
||||
* gcc.target/i386/pr92645-3.c: Likewise.
|
||||
|
||||
2019-11-26 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
* g++.dg/cpp1z/bool-increment1.C: Test location(s) too.
|
||||
|
34
gcc/testsuite/gcc.target/i386/pr92645-2.c
Normal file
34
gcc/testsuite/gcc.target/i386/pr92645-2.c
Normal file
@ -0,0 +1,34 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2 -fdump-tree-cddce1" } */
|
||||
|
||||
typedef int v4si __attribute__((vector_size(16)));
|
||||
typedef int v2si __attribute__((vector_size(8)));
|
||||
|
||||
void low (v2si *dst, v4si *srcp)
|
||||
{
|
||||
v4si src = *srcp;
|
||||
*dst = (v2si) { src[0], src[1] };
|
||||
}
|
||||
|
||||
void high (v2si *dst, v4si *srcp)
|
||||
{
|
||||
v4si src = *srcp;
|
||||
*dst = (v2si) { src[2], src[3] };
|
||||
}
|
||||
|
||||
void even (v2si *dst, v4si *srcp)
|
||||
{
|
||||
v4si src = *srcp;
|
||||
*dst = (v2si) { src[0], src[2] };
|
||||
}
|
||||
|
||||
void odd (v2si *dst, v4si *srcp)
|
||||
{
|
||||
v4si src = *srcp;
|
||||
*dst = (v2si) { src[1], src[3] };
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */
|
||||
/* Ideally highpart extraction would elide the permutation as well. */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */
|
37
gcc/testsuite/gcc.target/i386/pr92645-3.c
Normal file
37
gcc/testsuite/gcc.target/i386/pr92645-3.c
Normal file
@ -0,0 +1,37 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2 -fdump-tree-cddce1" } */
|
||||
|
||||
typedef int v8si __attribute__((vector_size(32)));
|
||||
typedef float v4sf __attribute__((vector_size(16)));
|
||||
|
||||
void low (v4sf *dst, v8si *srcp)
|
||||
{
|
||||
v8si src = *srcp;
|
||||
*dst = (v4sf) { src[0], src[1], src[2], src[3] };
|
||||
}
|
||||
|
||||
void high (v4sf *dst, v8si *srcp)
|
||||
{
|
||||
v8si src = *srcp;
|
||||
*dst = (v4sf) { src[4], src[5], src[6], src[7] };
|
||||
}
|
||||
|
||||
void even (v4sf *dst, v8si *srcp)
|
||||
{
|
||||
v8si src = *srcp;
|
||||
*dst = (v4sf) { src[0], src[2], src[4], src[6] };
|
||||
}
|
||||
|
||||
void odd (v4sf *dst, v8si *srcp)
|
||||
{
|
||||
v8si src = *srcp;
|
||||
*dst = (v4sf) { src[1], src[3], src[5], src[7] };
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */
|
||||
/* Four conversions, on the smaller vector type, to not convert excess
|
||||
elements. */
|
||||
/* { dg-final { scan-tree-dump-times " = \\\(vector\\\(4\\\) float\\\)" 4 "cddce1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */
|
||||
/* Ideally highpart extraction would elide the VEC_PERM_EXPR as well. */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */
|
@ -2037,6 +2037,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
tree op, op2, orig[2], type, elem_type;
|
||||
unsigned elem_size, i;
|
||||
unsigned HOST_WIDE_INT nelts;
|
||||
unsigned HOST_WIDE_INT refnelts;
|
||||
enum tree_code conv_code;
|
||||
constructor_elt *elt;
|
||||
bool maybe_ident;
|
||||
@ -2052,7 +2053,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
elem_type = TREE_TYPE (type);
|
||||
elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
|
||||
|
||||
vec_perm_builder sel (nelts, nelts, 1);
|
||||
orig[0] = NULL;
|
||||
orig[1] = NULL;
|
||||
conv_code = ERROR_MARK;
|
||||
@ -2061,6 +2061,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
tree one_nonconstant = NULL_TREE;
|
||||
auto_vec<tree> constants;
|
||||
constants.safe_grow_cleared (nelts);
|
||||
auto_vec<std::pair<unsigned, unsigned>, 64> elts;
|
||||
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
|
||||
{
|
||||
tree ref, op1;
|
||||
@ -2079,7 +2080,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
TREE_TYPE (TREE_TYPE (ref)))
|
||||
&& known_eq (bit_field_size (op1), elem_size)
|
||||
&& constant_multiple_p (bit_field_offset (op1),
|
||||
elem_size, &elem))
|
||||
elem_size, &elem)
|
||||
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts))
|
||||
{
|
||||
unsigned int j;
|
||||
for (j = 0; j < 2; ++j)
|
||||
@ -2098,11 +2100,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
if (j < 2)
|
||||
{
|
||||
orig[j] = ref;
|
||||
if (j)
|
||||
elem += nelts;
|
||||
if (elem != i)
|
||||
if (elem != i || j != 0)
|
||||
maybe_ident = false;
|
||||
sel.quick_push (elem);
|
||||
elts.safe_push (std::make_pair (j, elem));
|
||||
continue;
|
||||
}
|
||||
/* Else fallthru. */
|
||||
@ -2131,28 +2131,41 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
else if (!operand_equal_p (one_nonconstant, elt->value, 0))
|
||||
return false;
|
||||
}
|
||||
sel.quick_push (i + nelts);
|
||||
elts.safe_push (std::make_pair (1, i));
|
||||
maybe_ident = false;
|
||||
}
|
||||
if (i < nelts)
|
||||
return false;
|
||||
|
||||
if (! orig[0]
|
||||
|| ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
|
||||
|| maybe_ne (TYPE_VECTOR_SUBPARTS (type),
|
||||
TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
|
||||
return false;
|
||||
|
||||
tree tem;
|
||||
if (conv_code != ERROR_MARK
|
||||
&& (! supportable_convert_operation (conv_code, type,
|
||||
TREE_TYPE (orig[0]),
|
||||
&tem, &conv_code)
|
||||
|| conv_code == CALL_EXPR))
|
||||
|| ! VECTOR_TYPE_P (TREE_TYPE (orig[0])))
|
||||
return false;
|
||||
refnelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])).to_constant ();
|
||||
|
||||
if (maybe_ident)
|
||||
{
|
||||
tree conv_src_type
|
||||
= (nelts != refnelts
|
||||
? (conv_code != ERROR_MARK
|
||||
? build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), nelts)
|
||||
: type)
|
||||
: TREE_TYPE (orig[0]));
|
||||
tree tem;
|
||||
if (conv_code != ERROR_MARK
|
||||
&& (!supportable_convert_operation (conv_code, type, conv_src_type,
|
||||
&tem, &conv_code)
|
||||
|| conv_code == CALL_EXPR))
|
||||
return false;
|
||||
if (nelts != refnelts)
|
||||
{
|
||||
gassign *lowpart
|
||||
= gimple_build_assign (make_ssa_name (conv_src_type),
|
||||
build3 (BIT_FIELD_REF, conv_src_type,
|
||||
orig[0], TYPE_SIZE (conv_src_type),
|
||||
bitsize_zero_node));
|
||||
gsi_insert_before (gsi, lowpart, GSI_SAME_STMT);
|
||||
orig[0] = gimple_assign_lhs (lowpart);
|
||||
}
|
||||
if (conv_code == ERROR_MARK)
|
||||
gimple_assign_set_rhs_from_tree (gsi, orig[0]);
|
||||
else
|
||||
@ -2161,34 +2174,66 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
}
|
||||
else
|
||||
{
|
||||
tree mask_type;
|
||||
tree mask_type, perm_type, conv_src_type;
|
||||
if (orig[1] == error_mark_node && conv_code != ERROR_MARK)
|
||||
{
|
||||
/* ??? For subsetting a larger vector we need to permute the original
|
||||
but then the constants are in the converted type already which is
|
||||
why for that case we first convert and then permute. */
|
||||
if (nelts != refnelts)
|
||||
return false;
|
||||
conv_src_type = TREE_TYPE (orig[0]);
|
||||
perm_type = type;
|
||||
}
|
||||
else
|
||||
{
|
||||
perm_type = TREE_TYPE (orig[0]);
|
||||
conv_src_type = (nelts == refnelts
|
||||
? perm_type
|
||||
: build_vector_type (TREE_TYPE (perm_type), nelts));
|
||||
}
|
||||
tree tem;
|
||||
if (conv_code != ERROR_MARK
|
||||
&& (!supportable_convert_operation (conv_code, type, conv_src_type,
|
||||
&tem, &conv_code)
|
||||
|| conv_code == CALL_EXPR))
|
||||
return false;
|
||||
|
||||
vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
|
||||
if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
|
||||
/* Now that we know the number of elements of the source build the
|
||||
permute vector. */
|
||||
vec_perm_builder sel (refnelts, refnelts, 1);
|
||||
for (i = 0; i < elts.length (); ++i)
|
||||
sel.quick_push (elts[i].second + elts[i].first * refnelts);
|
||||
/* And fill the tail with "something". It's really don't care,
|
||||
and ideally we'd allow VEC_PERM to have a smaller destination
|
||||
vector. */
|
||||
for (; i < refnelts; ++i)
|
||||
sel.quick_push (i - elts.length ());
|
||||
vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts);
|
||||
if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices))
|
||||
return false;
|
||||
mask_type
|
||||
= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
|
||||
nelts);
|
||||
refnelts);
|
||||
if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
|
||||
|| maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)),
|
||||
GET_MODE_SIZE (TYPE_MODE (type))))
|
||||
GET_MODE_SIZE (TYPE_MODE (perm_type))))
|
||||
return false;
|
||||
op2 = vec_perm_indices_to_tree (mask_type, indices);
|
||||
bool convert_orig0 = false;
|
||||
gimple_seq stmts = NULL;
|
||||
if (!orig[1])
|
||||
orig[1] = orig[0];
|
||||
else if (orig[1] == error_mark_node
|
||||
&& one_nonconstant)
|
||||
{
|
||||
gimple_seq seq = NULL;
|
||||
orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION,
|
||||
type, one_nonconstant);
|
||||
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
|
||||
convert_orig0 = true;
|
||||
orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION,
|
||||
perm_type, one_nonconstant);
|
||||
convert_orig0 = conv_code != ERROR_MARK;
|
||||
}
|
||||
else if (orig[1] == error_mark_node)
|
||||
{
|
||||
tree_vector_builder vec (type, nelts, 1);
|
||||
tree_vector_builder vec (perm_type, nelts, 1);
|
||||
for (unsigned i = 0; i < nelts; ++i)
|
||||
if (constants[i])
|
||||
vec.quick_push (constants[i]);
|
||||
@ -2196,30 +2241,29 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
||||
/* ??? Push a don't-care value. */
|
||||
vec.quick_push (one_constant);
|
||||
orig[1] = vec.build ();
|
||||
convert_orig0 = true;
|
||||
convert_orig0 = conv_code != ERROR_MARK;
|
||||
}
|
||||
if (conv_code == ERROR_MARK)
|
||||
gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
|
||||
orig[1], op2);
|
||||
else if (convert_orig0)
|
||||
tree res;
|
||||
if (convert_orig0)
|
||||
{
|
||||
gimple *conv
|
||||
= gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);
|
||||
orig[0] = gimple_assign_lhs (conv);
|
||||
gsi_insert_before (gsi, conv, GSI_SAME_STMT);
|
||||
gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR,
|
||||
orig[0], orig[1], op2);
|
||||
gcc_assert (nelts == refnelts);
|
||||
res = gimple_build (&stmts, conv_code, type, orig[0]);
|
||||
res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
|
||||
res, orig[1], op2);
|
||||
}
|
||||
else
|
||||
{
|
||||
gimple *perm
|
||||
= gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])),
|
||||
VEC_PERM_EXPR, orig[0], orig[1], op2);
|
||||
orig[0] = gimple_assign_lhs (perm);
|
||||
gsi_insert_before (gsi, perm, GSI_SAME_STMT);
|
||||
gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
|
||||
NULL_TREE, NULL_TREE);
|
||||
res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
|
||||
orig[0], orig[1], op2);
|
||||
if (nelts != refnelts)
|
||||
res = gimple_build (&stmts, BIT_FIELD_REF,
|
||||
conv_code != ERROR_MARK ? conv_src_type : type,
|
||||
res, TYPE_SIZE (type), bitsize_zero_node);
|
||||
if (conv_code != ERROR_MARK)
|
||||
res = gimple_build (&stmts, conv_code, type, res);
|
||||
}
|
||||
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||
gimple_assign_set_rhs_with_ops (gsi, SSA_NAME, res);
|
||||
}
|
||||
update_stmt (gsi_stmt (*gsi));
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user