tree.c (tree_ctz): New function.
* tree.c (tree_ctz): New function. * tree.h (tree_ctz): New prototype. * tree-ssanames.h (get_range_info, get_nonzero_bits): Change first argument from tree to const_tree. * tree-ssanames.c (get_range_info, get_nonzero_bits): Likewise. * tree-vectorizer.h (vect_generate_tmps_on_preheader): New prototype. * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): No longer static. * expr.c (highest_pow2_factor): Reimplemented using tree_ctz. * tree-vect-loop.c (vect_analyze_loop_operations, vect_transform_loop): Don't force scalar loop for bound just because number of iterations is unknown, only do it if it is not known to be a multiple of vectorization_factor. * builtins.c (get_object_alignment_2): Use tree_ctz on offset. From-SVN: r204257
This commit is contained in:
parent
c853f62af3
commit
eae76e53d9
@ -1,5 +1,20 @@
|
||||
2013-10-31 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* tree.c (tree_ctz): New function.
|
||||
* tree.h (tree_ctz): New prototype.
|
||||
* tree-ssanames.h (get_range_info, get_nonzero_bits): Change
|
||||
first argument from tree to const_tree.
|
||||
* tree-ssanames.c (get_range_info, get_nonzero_bits): Likewise.
|
||||
* tree-vectorizer.h (vect_generate_tmps_on_preheader): New prototype.
|
||||
* tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): No longer
|
||||
static.
|
||||
* expr.c (highest_pow2_factor): Reimplemented using tree_ctz.
|
||||
* tree-vect-loop.c (vect_analyze_loop_operations,
|
||||
vect_transform_loop): Don't force scalar loop for bound just because
|
||||
number of iterations is unknown, only do it if it is not known to be
|
||||
a multiple of vectorization_factor.
|
||||
* builtins.c (get_object_alignment_2): Use tree_ctz on offset.
|
||||
|
||||
* gimple-pretty-print.c (dump_ssaname_info): Print newline also
|
||||
in case of VR_VARYING. Print get_nonzero_bits if not all ones.
|
||||
* tree-ssanames.h (struct range_info_def): Add nonzero_bits field.
|
||||
|
@ -314,7 +314,7 @@ get_object_alignment_2 (tree exp, unsigned int *alignp,
|
||||
tree offset;
|
||||
enum machine_mode mode;
|
||||
int unsignedp, volatilep;
|
||||
unsigned int inner, align = BITS_PER_UNIT;
|
||||
unsigned int align = BITS_PER_UNIT;
|
||||
bool known_alignment = false;
|
||||
|
||||
/* Get the innermost object and the constant (bitpos) and possibly
|
||||
@ -423,50 +423,16 @@ get_object_alignment_2 (tree exp, unsigned int *alignp,
|
||||
|
||||
/* If there is a non-constant offset part extract the maximum
|
||||
alignment that can prevail. */
|
||||
inner = ~0U;
|
||||
while (offset)
|
||||
if (offset)
|
||||
{
|
||||
tree next_offset;
|
||||
|
||||
if (TREE_CODE (offset) == PLUS_EXPR)
|
||||
int trailing_zeros = tree_ctz (offset);
|
||||
if (trailing_zeros < HOST_BITS_PER_INT)
|
||||
{
|
||||
next_offset = TREE_OPERAND (offset, 0);
|
||||
offset = TREE_OPERAND (offset, 1);
|
||||
unsigned int inner = (1U << trailing_zeros) * BITS_PER_UNIT;
|
||||
if (inner)
|
||||
align = MIN (align, inner);
|
||||
}
|
||||
else
|
||||
next_offset = NULL;
|
||||
if (host_integerp (offset, 1))
|
||||
{
|
||||
/* Any overflow in calculating offset_bits won't change
|
||||
the alignment. */
|
||||
unsigned offset_bits
|
||||
= ((unsigned) tree_low_cst (offset, 1) * BITS_PER_UNIT);
|
||||
|
||||
if (offset_bits)
|
||||
inner = MIN (inner, (offset_bits & -offset_bits));
|
||||
}
|
||||
else if (TREE_CODE (offset) == MULT_EXPR
|
||||
&& host_integerp (TREE_OPERAND (offset, 1), 1))
|
||||
{
|
||||
/* Any overflow in calculating offset_factor won't change
|
||||
the alignment. */
|
||||
unsigned offset_factor
|
||||
= ((unsigned) tree_low_cst (TREE_OPERAND (offset, 1), 1)
|
||||
* BITS_PER_UNIT);
|
||||
|
||||
if (offset_factor)
|
||||
inner = MIN (inner, (offset_factor & -offset_factor));
|
||||
}
|
||||
else
|
||||
{
|
||||
inner = MIN (inner, BITS_PER_UNIT);
|
||||
break;
|
||||
}
|
||||
offset = next_offset;
|
||||
}
|
||||
/* Alignment is innermost object alignment adjusted by the constant
|
||||
and non-constant offset parts. */
|
||||
align = MIN (align, inner);
|
||||
|
||||
*alignp = align;
|
||||
*bitposp = bitpos & (*alignp - 1);
|
||||
|
76
gcc/expr.c
76
gcc/expr.c
@ -7283,74 +7283,14 @@ safe_from_p (const_rtx x, tree exp, int top_p)
|
||||
unsigned HOST_WIDE_INT
|
||||
highest_pow2_factor (const_tree exp)
|
||||
{
|
||||
unsigned HOST_WIDE_INT c0, c1;
|
||||
|
||||
switch (TREE_CODE (exp))
|
||||
{
|
||||
case INTEGER_CST:
|
||||
/* We can find the lowest bit that's a one. If the low
|
||||
HOST_BITS_PER_WIDE_INT bits are zero, return BIGGEST_ALIGNMENT.
|
||||
We need to handle this case since we can find it in a COND_EXPR,
|
||||
a MIN_EXPR, or a MAX_EXPR. If the constant overflows, we have an
|
||||
erroneous program, so return BIGGEST_ALIGNMENT to avoid any
|
||||
later ICE. */
|
||||
if (TREE_OVERFLOW (exp))
|
||||
return BIGGEST_ALIGNMENT;
|
||||
else
|
||||
{
|
||||
/* Note: tree_low_cst is intentionally not used here,
|
||||
we don't care about the upper bits. */
|
||||
c0 = TREE_INT_CST_LOW (exp);
|
||||
c0 &= -c0;
|
||||
return c0 ? c0 : BIGGEST_ALIGNMENT;
|
||||
}
|
||||
break;
|
||||
|
||||
case PLUS_EXPR: case MINUS_EXPR: case MIN_EXPR: case MAX_EXPR:
|
||||
c0 = highest_pow2_factor (TREE_OPERAND (exp, 0));
|
||||
c1 = highest_pow2_factor (TREE_OPERAND (exp, 1));
|
||||
return MIN (c0, c1);
|
||||
|
||||
case MULT_EXPR:
|
||||
c0 = highest_pow2_factor (TREE_OPERAND (exp, 0));
|
||||
c1 = highest_pow2_factor (TREE_OPERAND (exp, 1));
|
||||
return c0 * c1;
|
||||
|
||||
case ROUND_DIV_EXPR: case TRUNC_DIV_EXPR: case FLOOR_DIV_EXPR:
|
||||
case CEIL_DIV_EXPR:
|
||||
if (integer_pow2p (TREE_OPERAND (exp, 1))
|
||||
&& host_integerp (TREE_OPERAND (exp, 1), 1))
|
||||
{
|
||||
c0 = highest_pow2_factor (TREE_OPERAND (exp, 0));
|
||||
c1 = tree_low_cst (TREE_OPERAND (exp, 1), 1);
|
||||
return MAX (1, c0 / c1);
|
||||
}
|
||||
break;
|
||||
|
||||
case BIT_AND_EXPR:
|
||||
/* The highest power of two of a bit-and expression is the maximum of
|
||||
that of its operands. We typically get here for a complex LHS and
|
||||
a constant negative power of two on the RHS to force an explicit
|
||||
alignment, so don't bother looking at the LHS. */
|
||||
return highest_pow2_factor (TREE_OPERAND (exp, 1));
|
||||
|
||||
CASE_CONVERT:
|
||||
case SAVE_EXPR:
|
||||
return highest_pow2_factor (TREE_OPERAND (exp, 0));
|
||||
|
||||
case COMPOUND_EXPR:
|
||||
return highest_pow2_factor (TREE_OPERAND (exp, 1));
|
||||
|
||||
case COND_EXPR:
|
||||
c0 = highest_pow2_factor (TREE_OPERAND (exp, 1));
|
||||
c1 = highest_pow2_factor (TREE_OPERAND (exp, 2));
|
||||
return MIN (c0, c1);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
unsigned HOST_WIDE_INT ret;
|
||||
int trailing_zeros = tree_ctz (exp);
|
||||
if (trailing_zeros >= HOST_BITS_PER_WIDE_INT)
|
||||
return BIGGEST_ALIGNMENT;
|
||||
ret = (unsigned HOST_WIDE_INT) 1 << trailing_zeros;
|
||||
if (ret > BIGGEST_ALIGNMENT)
|
||||
return BIGGEST_ALIGNMENT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Similar, except that the alignment requirements of TARGET are
|
||||
|
@ -221,7 +221,7 @@ set_range_info (tree name, double_int min, double_int max)
|
||||
is used to determine if MIN and MAX are valid values. */
|
||||
|
||||
enum value_range_type
|
||||
get_range_info (tree name, double_int *min, double_int *max)
|
||||
get_range_info (const_tree name, double_int *min, double_int *max)
|
||||
{
|
||||
enum value_range_type range_type;
|
||||
gcc_assert (!POINTER_TYPE_P (TREE_TYPE (name)));
|
||||
@ -271,7 +271,7 @@ set_nonzero_bits (tree name, double_int mask)
|
||||
NAME, or double_int_minus_one if unknown. */
|
||||
|
||||
double_int
|
||||
get_nonzero_bits (tree name)
|
||||
get_nonzero_bits (const_tree name)
|
||||
{
|
||||
if (POINTER_TYPE_P (TREE_TYPE (name)))
|
||||
{
|
||||
|
@ -72,9 +72,10 @@ enum value_range_type { VR_UNDEFINED, VR_RANGE, VR_ANTI_RANGE, VR_VARYING };
|
||||
/* Sets the value range to SSA. */
|
||||
extern void set_range_info (tree, double_int, double_int);
|
||||
/* Gets the value range from SSA. */
|
||||
extern enum value_range_type get_range_info (tree, double_int *, double_int *);
|
||||
extern enum value_range_type get_range_info (const_tree, double_int *,
|
||||
double_int *);
|
||||
extern void set_nonzero_bits (tree, double_int);
|
||||
extern double_int get_nonzero_bits (tree);
|
||||
extern double_int get_nonzero_bits (const_tree);
|
||||
extern void init_ssanames (struct function *, int);
|
||||
extern void fini_ssanames (void);
|
||||
extern void ssanames_print_statistics (void);
|
||||
|
@ -1437,7 +1437,7 @@ vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
|
||||
and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
|
||||
if that is non-NULL. */
|
||||
|
||||
static void
|
||||
void
|
||||
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
||||
tree *ni_name_ptr,
|
||||
tree *ratio_mult_vf_name_ptr,
|
||||
|
@ -1586,9 +1586,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
|| LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
|
||||
|| LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
|
||||
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
|
||||
|| ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
||||
< exact_log2 (vectorization_factor)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
|
||||
@ -5656,15 +5656,20 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||
will remain scalar and will compute the remaining (n%VF) iterations.
|
||||
(VF is the vectorization factor). */
|
||||
|
||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
|| (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
&& LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
|
||||
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
||||
if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
||||
< exact_log2 (vectorization_factor)
|
||||
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
||||
vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
|
||||
th, check_profitability);
|
||||
else
|
||||
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
||||
ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
|
||||
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
|
||||
else
|
||||
{
|
||||
tree ni_name, ratio_mult_vf;
|
||||
vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf,
|
||||
&ratio, NULL);
|
||||
}
|
||||
|
||||
/* 1) Make sure the loop header has exactly two entries
|
||||
2) Make sure we have a preheader basic block. */
|
||||
|
@ -901,6 +901,8 @@ extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
|
||||
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
|
||||
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge);
|
||||
extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
|
||||
extern void vect_generate_tmps_on_preheader (loop_vec_info, tree *, tree *,
|
||||
tree *, gimple_seq);
|
||||
extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
|
||||
unsigned int, bool);
|
||||
extern void vect_do_peeling_for_alignment (loop_vec_info, unsigned int, bool);
|
||||
|
113
gcc/tree.c
113
gcc/tree.c
@ -2214,6 +2214,119 @@ tree_floor_log2 (const_tree expr)
|
||||
: floor_log2 (low));
|
||||
}
|
||||
|
||||
/* Return number of known trailing zero bits in EXPR, or, if the value of
|
||||
EXPR is known to be zero, the precision of it's type. */
|
||||
|
||||
unsigned int
|
||||
tree_ctz (const_tree expr)
|
||||
{
|
||||
if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
|
||||
&& !POINTER_TYPE_P (TREE_TYPE (expr)))
|
||||
return 0;
|
||||
|
||||
unsigned int ret1, ret2, prec = TYPE_PRECISION (TREE_TYPE (expr));
|
||||
switch (TREE_CODE (expr))
|
||||
{
|
||||
case INTEGER_CST:
|
||||
ret1 = tree_to_double_int (expr).trailing_zeros ();
|
||||
return MIN (ret1, prec);
|
||||
case SSA_NAME:
|
||||
ret1 = get_nonzero_bits (expr).trailing_zeros ();
|
||||
return MIN (ret1, prec);
|
||||
case PLUS_EXPR:
|
||||
case MINUS_EXPR:
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
case MIN_EXPR:
|
||||
case MAX_EXPR:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
if (ret1 == 0)
|
||||
return ret1;
|
||||
ret2 = tree_ctz (TREE_OPERAND (expr, 1));
|
||||
return MIN (ret1, ret2);
|
||||
case POINTER_PLUS_EXPR:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
ret2 = tree_ctz (TREE_OPERAND (expr, 1));
|
||||
/* Second operand is sizetype, which could be in theory
|
||||
wider than pointer's precision. Make sure we never
|
||||
return more than prec. */
|
||||
ret2 = MIN (ret2, prec);
|
||||
return MIN (ret1, ret2);
|
||||
case BIT_AND_EXPR:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
ret2 = tree_ctz (TREE_OPERAND (expr, 1));
|
||||
return MAX (ret1, ret2);
|
||||
case MULT_EXPR:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
ret2 = tree_ctz (TREE_OPERAND (expr, 1));
|
||||
return MIN (ret1 + ret2, prec);
|
||||
case LSHIFT_EXPR:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
if (host_integerp (TREE_OPERAND (expr, 1), 1)
|
||||
&& ((unsigned HOST_WIDE_INT) tree_low_cst (TREE_OPERAND (expr, 1), 1)
|
||||
< (unsigned HOST_WIDE_INT) prec))
|
||||
{
|
||||
ret2 = tree_low_cst (TREE_OPERAND (expr, 1), 1);
|
||||
return MIN (ret1 + ret2, prec);
|
||||
}
|
||||
return ret1;
|
||||
case RSHIFT_EXPR:
|
||||
if (host_integerp (TREE_OPERAND (expr, 1), 1)
|
||||
&& ((unsigned HOST_WIDE_INT) tree_low_cst (TREE_OPERAND (expr, 1), 1)
|
||||
< (unsigned HOST_WIDE_INT) prec))
|
||||
{
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
ret2 = tree_low_cst (TREE_OPERAND (expr, 1), 1);
|
||||
if (ret1 > ret2)
|
||||
return ret1 - ret2;
|
||||
}
|
||||
return 0;
|
||||
case TRUNC_DIV_EXPR:
|
||||
case CEIL_DIV_EXPR:
|
||||
case FLOOR_DIV_EXPR:
|
||||
case ROUND_DIV_EXPR:
|
||||
case EXACT_DIV_EXPR:
|
||||
if (TREE_CODE (TREE_OPERAND (expr, 1)) == INTEGER_CST
|
||||
&& tree_int_cst_sgn (TREE_OPERAND (expr, 1)) == 1)
|
||||
{
|
||||
int l = tree_log2 (TREE_OPERAND (expr, 1));
|
||||
if (l >= 0)
|
||||
{
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
ret2 = l;
|
||||
if (ret1 > ret2)
|
||||
return ret1 - ret2;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
CASE_CONVERT:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 0));
|
||||
if (ret1 && ret1 == TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (expr, 0))))
|
||||
ret1 = prec;
|
||||
return MIN (ret1, prec);
|
||||
case SAVE_EXPR:
|
||||
return tree_ctz (TREE_OPERAND (expr, 0));
|
||||
case COND_EXPR:
|
||||
ret1 = tree_ctz (TREE_OPERAND (expr, 1));
|
||||
if (ret1 == 0)
|
||||
return 0;
|
||||
ret2 = tree_ctz (TREE_OPERAND (expr, 2));
|
||||
return MIN (ret1, ret2);
|
||||
case COMPOUND_EXPR:
|
||||
return tree_ctz (TREE_OPERAND (expr, 1));
|
||||
case ADDR_EXPR:
|
||||
ret1 = get_pointer_alignment (CONST_CAST_TREE (expr));
|
||||
if (ret1 > BITS_PER_UNIT)
|
||||
{
|
||||
ret1 = ctz_hwi (ret1 / BITS_PER_UNIT);
|
||||
return MIN (ret1, prec);
|
||||
}
|
||||
return 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return 1 if EXPR is the real constant zero. Trailing zeroes matter for
|
||||
decimal float constants, so don't return 1 for them. */
|
||||
|
||||
|
@ -4564,6 +4564,7 @@ extern void get_type_static_bounds (const_tree, mpz_t, mpz_t);
|
||||
extern bool variably_modified_type_p (tree, tree);
|
||||
extern int tree_log2 (const_tree);
|
||||
extern int tree_floor_log2 (const_tree);
|
||||
extern unsigned int tree_ctz (const_tree);
|
||||
extern int simple_cst_equal (const_tree, const_tree);
|
||||
extern hashval_t iterative_hash_expr (const_tree, hashval_t);
|
||||
extern hashval_t iterative_hash_exprs_commutative (const_tree,
|
||||
|
Loading…
Reference in New Issue
Block a user