re PR tree-optimization/90510 (Unnecessary permutation)

2019-05-21  Richard Biener  <rguenther@suse.de>

	PR middle-end/90510
	* fold-const.c (fold_read_from_vector): New function.
	* fold-const.h (fold_read_from_vector): Declare.
	* match.pd (VEC_PERM_EXPR): Build BIT_INSERT_EXPRs for
	single-element insert permutations.  Canonicalize selector
	further and fix issue with last commit.

	* gcc.target/i386/pr90510.c: New testcase.

From-SVN: r271463
This commit is contained in:
Richard Biener 2019-05-21 12:01:00 +00:00 committed by Richard Biener
parent 3b0657dce5
commit 4f8b89f092
6 changed files with 109 additions and 6 deletions

View File

@ -1,3 +1,12 @@
2019-05-21 Richard Biener <rguenther@suse.de>
PR middle-end/90510
* fold-const.c (fold_read_from_vector): New function.
* fold-const.h (fold_read_from_vector): Declare.
* match.pd (VEC_PERM_EXPR): Build BIT_INSERT_EXPRs for
single-element insert permutations. Canonicalize selector
further and fix issue with last commit.
2019-05-21 Vladislav Ivanishin <vlad@ispras.ru>
* tree-cfg.h (split_critical_edges): Add for_edge_insertion_p

View File

@ -13793,6 +13793,28 @@ fold_read_from_constant_string (tree exp)
return NULL;
}
/* Folds a read from vector element at IDX of vector ARG. */
tree
fold_read_from_vector (tree arg, poly_uint64 idx)
{
unsigned HOST_WIDE_INT i;
if (known_lt (idx, TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)))
&& known_ge (idx, 0u)
&& idx.is_constant (&i))
{
if (TREE_CODE (arg) == VECTOR_CST)
return VECTOR_CST_ELT (arg, i);
else if (TREE_CODE (arg) == CONSTRUCTOR)
{
if (i >= CONSTRUCTOR_NELTS (arg))
return build_zero_cst (TREE_TYPE (TREE_TYPE (arg)));
return CONSTRUCTOR_ELT (arg, i)->value;
}
}
return NULL_TREE;
}
/* Return the tree for neg (ARG0) when ARG0 is known to be either
an integer constant, real, or fixed-point constant.

View File

@ -100,6 +100,7 @@ extern tree fold_bit_and_mask (tree, tree, enum tree_code,
tree, enum tree_code, tree, tree,
tree, enum tree_code, tree, tree, tree *);
extern tree fold_read_from_constant_string (tree);
extern tree fold_read_from_vector (tree, poly_uint64);
#if GCC_VEC_PERN_INDICES_H
extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
#endif

View File

@ -5406,6 +5406,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
op0 = op1;
sel.rotate_inputs (1);
}
else if (known_ge (poly_uint64 (sel[0]), nelts))
{
std::swap (op0, op1);
sel.rotate_inputs (1);
}
}
gassign *def;
tree cop0 = op0, cop1 = op1;
@ -5429,9 +5434,46 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(with
{
bool changed = (op0 == op1 && !single_arg);
tree ins = NULL_TREE;
unsigned at = 0;
/* See if the permutation is performing a single element
insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR
in that case. But only if the vector mode is supported,
otherwise this is invalid GIMPLE. */
if (TYPE_MODE (type) != BLKmode
&& (TREE_CODE (cop0) == VECTOR_CST
|| TREE_CODE (cop0) == CONSTRUCTOR
|| TREE_CODE (cop1) == VECTOR_CST
|| TREE_CODE (cop1) == CONSTRUCTOR))
{
if (sel.series_p (1, 1, nelts + 1, 1))
{
/* After canonicalizing the first elt to come from the
first vector we only can insert the first elt from
the first vector. */
at = 0;
ins = fold_read_from_vector (cop0, 0);
op0 = op1;
}
else
{
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
for (at = 0; at < encoded_nelts; ++at)
if (maybe_ne (sel[at], at))
break;
if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
{
if (known_lt (at, nelts))
ins = fold_read_from_vector (cop0, sel[at]);
else
ins = fold_read_from_vector (cop1, sel[at] - nelts);
}
}
}
/* Generate a canonical form of the selector. */
if (sel.encoding () != builder)
if (!ins && sel.encoding () != builder)
{
/* Some targets are deficient and fail to expand a single
argument permutation while still allowing an equivalent
@ -5450,10 +5492,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
so use the preferred form. */
op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
}
/* Differences in the encoder do not necessarily mean
differences in the resulting vector. */
changed = !operand_equal_p (op2, oldop2, 0);
if (!operand_equal_p (op2, oldop2, 0))
changed = true;
}
}
(if (changed)
(vec_perm { op0; } { op1; } { op2; })))))))))
(if (ins)
(bit_insert { op0; } { ins; }
{ bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
(if (changed)
(vec_perm { op0; } { op1; } { op2; }))))))))))

View File

@ -1,3 +1,8 @@
2019-05-21 Richard Biener <rguenther@suse.de>
PR middle-end/90510
* gcc.target/i386/pr90510.c: New testcase.
2019-05-21 Martin Liska <mliska@suse.cz>
* gcc.target/i386/pr90500-1.c: Add missing '""'.

View File

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
typedef double __v2df __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
__v2df
_mm_add_sd_A (__v2df x, __v2df y)
{
double tem = x[0] + y[0];
return __builtin_shuffle ( x, (__v2df) { tem, tem }, (__v2di) { 2, 1 } );
}
__v2df
_mm_add_sd_B (__v2df x, __v2df y)
{
__v2df z = { (x[0] + y[0]), x[1] };
return z;
}
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "optimized" } } */
/* { dg-final { scan-assembler-not "unpck" } } */