re PR target/29756 (SSE intrinsics hard to use without redundant temporaries appearing)

2016-05-20  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/29756
	* tree.def (BIT_INSERT_EXPR): New tcc_expression tree code.
	* expr.c (expand_expr_real_2): Handle BIT_INSERT_EXPR.
	* fold-const.c (operand_equal_p): Likewise.
	(fold_ternary_loc): Add constant folding of BIT_INSERT_EXPR.
	* gimplify.c (gimplify_expr): Handle BIT_INSERT_EXPR.
	* tree-inline.c (estimate_operator_cost): Likewise.
	* tree-pretty-print.c (dump_generic_node): Likewise.
	* tree-ssa-operands.c (get_expr_operands): Likewise.
	* cfgexpand.c (expand_debug_expr): Likewise.
	* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
	* gimple.c (get_gimple_rhs_num_ops): Handle BIT_INSERT_EXPR.
	* tree-cfg.c (verify_gimple_assign_ternary): Verify BIT_INSERT_EXPR.

	* tree-ssa.c (non_rewritable_lvalue_p): We can rewrite
	vector inserts using BIT_FIELD_REF or MEM_REF on the lhs.
	(execute_update_addresses_taken): Do it.

	* gcc.dg/tree-ssa/vector-6.c: New testcase.

From-SVN: r236501
This commit is contained in:
Richard Guenther 2016-05-20 09:17:16 +00:00 committed by Richard Biener
parent eb066284cb
commit 483c642948
15 changed files with 312 additions and 6 deletions

View File

@ -1,3 +1,22 @@
2016-05-20 Richard Guenther <rguenther@suse.de>
PR tree-optimization/29756
* tree.def (BIT_INSERT_EXPR): New tcc_expression tree code.
* expr.c (expand_expr_real_2): Handle BIT_INSERT_EXPR.
* fold-const.c (operand_equal_p): Likewise.
(fold_ternary_loc): Add constant folding of BIT_INSERT_EXPR.
* gimplify.c (gimplify_expr): Handle BIT_INSERT_EXPR.
* tree-inline.c (estimate_operator_cost): Likewise.
* tree-pretty-print.c (dump_generic_node): Likewise.
* tree-ssa-operands.c (get_expr_operands): Likewise.
* cfgexpand.c (expand_debug_expr): Likewise.
* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
* gimple.c (get_gimple_rhs_num_ops): Handle BIT_INSERT_EXPR.
* tree-cfg.c (verify_gimple_assign_ternary): Verify BIT_INSERT_EXPR.
* tree-ssa.c (non_rewritable_lvalue_p): We can rewrite
vector inserts using BIT_FIELD_REF or MEM_REF on the lhs.
(execute_update_addresses_taken): Do it.
2016-05-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/71185

View File

@ -5025,6 +5025,7 @@ expand_debug_expr (tree exp)
case FIXED_CONVERT_EXPR:
case OBJ_TYPE_REF:
case WITH_SIZE_EXPR:
case BIT_INSERT_EXPR:
return NULL;
case DOT_PROD_EXPR:

View File

@ -9225,6 +9225,23 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
return target;
case BIT_INSERT_EXPR:
{
unsigned bitpos = tree_to_uhwi (treeop2);
unsigned bitsize;
if (INTEGRAL_TYPE_P (TREE_TYPE (treeop1)))
bitsize = TYPE_PRECISION (TREE_TYPE (treeop1));
else
bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (treeop1)));
rtx op0 = expand_normal (treeop0);
rtx op1 = expand_normal (treeop1);
rtx dst = gen_reg_rtx (mode);
emit_move_insn (dst, op0);
store_bit_field (dst, bitsize, bitpos, 0, 0,
TYPE_MODE (TREE_TYPE (treeop1)), op1, false);
return dst;
}
default:
gcc_unreachable ();
}

View File

@ -3163,6 +3163,7 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags)
case VEC_COND_EXPR:
case DOT_PROD_EXPR:
case BIT_INSERT_EXPR:
return OP_SAME (0) && OP_SAME (1) && OP_SAME (2);
default:
@ -11860,6 +11861,46 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
}
return NULL_TREE;
case BIT_INSERT_EXPR:
/* Perform (partial) constant folding of BIT_INSERT_EXPR. */
if (TREE_CODE (arg0) == INTEGER_CST
&& TREE_CODE (arg1) == INTEGER_CST)
{
unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (op2);
unsigned bitsize = TYPE_PRECISION (TREE_TYPE (arg1));
wide_int tem = wi::bit_and (arg0,
wi::shifted_mask (bitpos, bitsize, true,
TYPE_PRECISION (type)));
wide_int tem2
= wi::lshift (wi::zext (wi::to_wide (arg1, TYPE_PRECISION (type)),
bitsize), bitpos);
return wide_int_to_tree (type, wi::bit_or (tem, tem2));
}
else if (TREE_CODE (arg0) == VECTOR_CST
&& CONSTANT_CLASS_P (arg1)
&& types_compatible_p (TREE_TYPE (TREE_TYPE (arg0)),
TREE_TYPE (arg1)))
{
unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (op2);
unsigned HOST_WIDE_INT elsize
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg1)));
if (bitpos % elsize == 0)
{
unsigned k = bitpos / elsize;
if (operand_equal_p (VECTOR_CST_ELT (arg0, k), arg1, 0))
return arg0;
else
{
tree *elts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (type));
memcpy (elts, VECTOR_CST_ELTS (arg0),
sizeof (tree) * TYPE_VECTOR_SUBPARTS (type));
elts[k] = arg1;
return build_vector (type, elts);
}
}
}
return NULL_TREE;
default:
return NULL_TREE;
} /* switch (code) */

View File

@ -478,6 +478,24 @@ dump_ternary_rhs (pretty_printer *buffer, gassign *gs, int spc, int flags)
pp_greater (buffer);
break;
case BIT_INSERT_EXPR:
pp_string (buffer, "BIT_INSERT_EXPR <");
dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
pp_string (buffer, ", ");
dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
pp_string (buffer, ", ");
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
pp_string (buffer, " (");
if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs2 (gs))))
pp_decimal_int (buffer,
TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs2 (gs))));
else
dump_generic_node (buffer,
TYPE_SIZE (TREE_TYPE (gimple_assign_rhs2 (gs))),
spc, flags, false);
pp_string (buffer, " bits)>");
break;
default:
gcc_unreachable ();
}

View File

@ -2043,6 +2043,7 @@ get_gimple_rhs_num_ops (enum tree_code code)
|| (SYM) == REALIGN_LOAD_EXPR \
|| (SYM) == VEC_COND_EXPR \
|| (SYM) == VEC_PERM_EXPR \
|| (SYM) == BIT_INSERT_EXPR \
|| (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == CONSTRUCTOR \
|| (SYM) == OBJ_TYPE_REF \

View File

@ -10931,6 +10931,10 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
/* Classified as tcc_expression. */
goto expr_3;
case BIT_INSERT_EXPR:
/* Argument 3 is a constant. */
goto expr_2;
case POINTER_PLUS_EXPR:
{
enum gimplify_status r0, r1;

View File

@ -1,3 +1,8 @@
2016-05-20 Richard Guenther <rguenther@suse.de>
PR tree-optimization/29756
* gcc.dg/tree-ssa/vector-6.c: New testcase.
2016-05-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/71185

View File

@ -0,0 +1,33 @@
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-ccp1" } */
typedef int v4si __attribute__((vector_size (4 * sizeof (int))));
v4si test1 (v4si v, int i)
{
((int *)&v)[0] = i;
return v;
}
v4si test2 (v4si v, int i)
{
int *p = (int *)&v;
*p = i;
return v;
}
v4si test3 (v4si v, int i)
{
((int *)&v)[3] = i;
return v;
}
v4si test4 (v4si v, int i)
{
int *p = (int *)&v;
p += 3;
*p = i;
return v;
}
/* { dg-final { scan-tree-dump-times "Now a gimple register: v" 4 "ccp1" } } */

View File

@ -4134,6 +4134,53 @@ verify_gimple_assign_ternary (gassign *stmt)
return false;
case BIT_INSERT_EXPR:
if (! useless_type_conversion_p (lhs_type, rhs1_type))
{
error ("type mismatch in BIT_INSERT_EXPR");
debug_generic_expr (lhs_type);
debug_generic_expr (rhs1_type);
return true;
}
if (! ((INTEGRAL_TYPE_P (rhs1_type)
&& INTEGRAL_TYPE_P (rhs2_type))
|| (VECTOR_TYPE_P (rhs1_type)
&& types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))))
{
error ("not allowed type combination in BIT_INSERT_EXPR");
debug_generic_expr (rhs1_type);
debug_generic_expr (rhs2_type);
return true;
}
if (! tree_fits_uhwi_p (rhs3)
|| ! tree_fits_uhwi_p (TYPE_SIZE (rhs2_type)))
{
error ("invalid position or size in BIT_INSERT_EXPR");
return true;
}
if (INTEGRAL_TYPE_P (rhs1_type))
{
unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (rhs3);
if (bitpos >= TYPE_PRECISION (rhs1_type)
|| (bitpos + TYPE_PRECISION (rhs2_type)
> TYPE_PRECISION (rhs1_type)))
{
error ("insertion out of range in BIT_INSERT_EXPR");
return true;
}
}
else if (VECTOR_TYPE_P (rhs1_type))
{
unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (rhs3);
unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TYPE_SIZE (rhs2_type));
if (bitpos % bitsize != 0)
{
error ("vector insertion not at element boundary");
return true;
}
}
return false;
case DOT_PROD_EXPR:
case REALIGN_LOAD_EXPR:
/* FIXME. */

View File

@ -3941,6 +3941,10 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
return weights->div_mod_cost;
return 1;
/* Bit-field insertion needs several shift and mask operations. */
case BIT_INSERT_EXPR:
return 3;
default:
/* We expect a copy assignment with no operator. */
gcc_assert (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS);

View File

@ -1876,6 +1876,23 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, int flags,
pp_greater (pp);
break;
case BIT_INSERT_EXPR:
pp_string (pp, "BIT_INSERT_EXPR <");
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
pp_string (pp, ", ");
dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false);
pp_string (pp, ", ");
dump_generic_node (pp, TREE_OPERAND (node, 2), spc, flags, false);
pp_string (pp, " (");
if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (node, 1))))
pp_decimal_int (pp,
TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (node, 1))));
else
dump_generic_node (pp, TYPE_SIZE (TREE_TYPE (TREE_OPERAND (node, 1))),
spc, flags, false);
pp_string (pp, " bits)>");
break;
case ARRAY_REF:
case ARRAY_RANGE_REF:
op0 = TREE_OPERAND (node, 0);

View File

@ -833,6 +833,7 @@ get_expr_operands (struct function *fn, gimple *stmt, tree *expr_p, int flags)
get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 0), flags);
return;
case BIT_INSERT_EXPR:
case COMPOUND_EXPR:
case OBJ_TYPE_REF:
case ASSERT_EXPR:

View File

@ -1275,21 +1275,48 @@ non_rewritable_lvalue_p (tree lhs)
&& DECL_P (TREE_OPERAND (lhs, 0)))
return false;
/* A decl that is wrapped inside a MEM-REF that covers
it full is also rewritable.
??? The following could be relaxed allowing component
/* ??? The following could be relaxed allowing component
references that do not change the access size. */
if (TREE_CODE (lhs) == MEM_REF
&& TREE_CODE (TREE_OPERAND (lhs, 0)) == ADDR_EXPR
&& integer_zerop (TREE_OPERAND (lhs, 1)))
&& TREE_CODE (TREE_OPERAND (lhs, 0)) == ADDR_EXPR)
{
tree decl = TREE_OPERAND (TREE_OPERAND (lhs, 0), 0);
if (DECL_P (decl)
/* A decl that is wrapped inside a MEM-REF that covers
it full is also rewritable. */
if (integer_zerop (TREE_OPERAND (lhs, 1))
&& DECL_P (decl)
&& DECL_SIZE (decl) == TYPE_SIZE (TREE_TYPE (lhs))
&& (TREE_THIS_VOLATILE (decl) == TREE_THIS_VOLATILE (lhs)))
return false;
/* A vector-insert using a MEM_REF or ARRAY_REF is rewritable
using a BIT_INSERT_EXPR. */
if (DECL_P (decl)
&& VECTOR_TYPE_P (TREE_TYPE (decl))
&& TYPE_MODE (TREE_TYPE (decl)) != BLKmode
&& types_compatible_p (TREE_TYPE (lhs),
TREE_TYPE (TREE_TYPE (decl)))
&& tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
&& tree_int_cst_lt (TREE_OPERAND (lhs, 1),
TYPE_SIZE_UNIT (TREE_TYPE (decl)))
&& (tree_to_uhwi (TREE_OPERAND (lhs, 1))
% tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
return false;
}
/* A vector-insert using a BIT_FIELD_REF is rewritable using
BIT_INSERT_EXPR. */
if (TREE_CODE (lhs) == BIT_FIELD_REF
&& DECL_P (TREE_OPERAND (lhs, 0))
&& VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (lhs, 0)))
&& TYPE_MODE (TREE_TYPE (TREE_OPERAND (lhs, 0))) != BLKmode
&& types_compatible_p (TREE_TYPE (lhs),
TREE_TYPE (TREE_TYPE (TREE_OPERAND (lhs, 0))))
&& (tree_to_uhwi (TREE_OPERAND (lhs, 2))
% tree_to_uhwi (TYPE_SIZE (TREE_TYPE (lhs)))) == 0)
return false;
return true;
}
@ -1511,6 +1538,62 @@ execute_update_addresses_taken (void)
continue;
}
/* Rewrite a vector insert via a BIT_FIELD_REF on the LHS
into a BIT_INSERT_EXPR. */
if (TREE_CODE (lhs) == BIT_FIELD_REF
&& DECL_P (TREE_OPERAND (lhs, 0))
&& bitmap_bit_p (suitable_for_renaming,
DECL_UID (TREE_OPERAND (lhs, 0)))
&& VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (lhs, 0)))
&& TYPE_MODE (TREE_TYPE (TREE_OPERAND (lhs, 0))) != BLKmode
&& types_compatible_p (TREE_TYPE (lhs),
TREE_TYPE (TREE_TYPE
(TREE_OPERAND (lhs, 0))))
&& (tree_to_uhwi (TREE_OPERAND (lhs, 2))
% tree_to_uhwi (TYPE_SIZE (TREE_TYPE (lhs))) == 0))
{
tree var = TREE_OPERAND (lhs, 0);
tree val = gimple_assign_rhs1 (stmt);
tree bitpos = TREE_OPERAND (lhs, 2);
gimple_assign_set_lhs (stmt, var);
gimple_assign_set_rhs_with_ops
(&gsi, BIT_INSERT_EXPR, var, val, bitpos);
stmt = gsi_stmt (gsi);
unlink_stmt_vdef (stmt);
update_stmt (stmt);
continue;
}
/* Rewrite a vector insert using a MEM_REF on the LHS
into a BIT_INSERT_EXPR. */
if (TREE_CODE (lhs) == MEM_REF
&& TREE_CODE (TREE_OPERAND (lhs, 0)) == ADDR_EXPR
&& (sym = TREE_OPERAND (TREE_OPERAND (lhs, 0), 0))
&& DECL_P (sym)
&& bitmap_bit_p (suitable_for_renaming, DECL_UID (sym))
&& VECTOR_TYPE_P (TREE_TYPE (sym))
&& TYPE_MODE (TREE_TYPE (sym)) != BLKmode
&& types_compatible_p (TREE_TYPE (lhs),
TREE_TYPE (TREE_TYPE (sym)))
&& tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
&& tree_int_cst_lt (TREE_OPERAND (lhs, 1),
TYPE_SIZE_UNIT (TREE_TYPE (sym)))
&& (tree_to_uhwi (TREE_OPERAND (lhs, 1))
% tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
{
tree val = gimple_assign_rhs1 (stmt);
tree bitpos
= wide_int_to_tree (bitsizetype,
mem_ref_offset (lhs) * BITS_PER_UNIT);
gimple_assign_set_lhs (stmt, sym);
gimple_assign_set_rhs_with_ops
(&gsi, BIT_INSERT_EXPR, sym, val, bitpos);
stmt = gsi_stmt (gsi);
unlink_stmt_vdef (stmt);
update_stmt (stmt);
continue;
}
/* We shouldn't have any fancy wrapping of
component-refs on the LHS, but look through
VIEW_CONVERT_EXPRs as that is easy. */

View File

@ -852,6 +852,21 @@ DEFTREECODE (ADDR_EXPR, "addr_expr", tcc_expression, 1)
descriptor of type ptr_mode. */
DEFTREECODE (FDESC_EXPR, "fdesc_expr", tcc_expression, 2)
/* Given a container value, a replacement value and a bit position within
the container, produce the value that results from replacing the part of
the container starting at the bit position with the replacement value.
Operand 0 is a tree for the container value of integral or vector type;
Operand 1 is a tree for the replacement value of another integral or
the vector element type;
Operand 2 is a tree giving the constant bit position;
The number of bits replaced is given by the precision of the type of the
replacement value if it is integral or by its size if it is non-integral.
??? The reason to make the size of the replacement implicit is to avoid
introducing a quaternary operation.
The replaced bits shall be fully inside the container. If the container
is of vector type, then these bits shall be aligned with its elements. */
DEFTREECODE (BIT_INSERT_EXPR, "bit_field_insert", tcc_expression, 3)
/* Given two real or integer operands of the same type,
returns a complex value of the corresponding complex type. */
DEFTREECODE (COMPLEX_EXPR, "complex_expr", tcc_binary, 2)