re PR tree-optimization/52272 (Performance regression of 410.bwaves on x86.)
PR tree-optimization/52272 * tree-ssa-loop-ivopts.c (struct iv_common_cand): New struct. (struct iv_common_cand_hasher): New struct. (iv_common_cand_hasher::hash): New function. (iv_common_cand_hasher::equal): New function. (struct ivopts_data): New fields, iv_common_cand_tab and iv_common_cands. (tree_ssa_iv_optimize_init): Initialize above fields. (record_common_cand, common_cand_cmp): New functions. (add_iv_candidate_derived_from_uses): New function. (add_iv_candidate_for_use): Record iv_common_cands derived from iv use in hash table, instead of adding candidates directly. (add_iv_candidate_for_uses): Call add_iv_candidate_derived_from_uses. (record_important_candidates): Add important candidates to iv uses' related_cands. Always keep related_cands for future use. (try_add_cand_for): Use iv uses' related_cands. (free_loop_data, tree_ssa_iv_optimize_finalize): Release new fields in struct ivopts_data, iv_common_cand_tab and iv_common_cands. From-SVN: r230647
This commit is contained in:
parent
ec5a0fe042
commit
cf5b92ef1e
@ -1,3 +1,24 @@
|
||||
2015-11-20 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/52272
|
||||
* tree-ssa-loop-ivopts.c (struct iv_common_cand): New struct.
|
||||
(struct iv_common_cand_hasher): New struct.
|
||||
(iv_common_cand_hasher::hash): New function.
|
||||
(iv_common_cand_hasher::equal): New function.
|
||||
(struct ivopts_data): New fields, iv_common_cand_tab and
|
||||
iv_common_cands.
|
||||
(tree_ssa_iv_optimize_init): Initialize above fields.
|
||||
(record_common_cand, common_cand_cmp): New functions.
|
||||
(add_iv_candidate_derived_from_uses): New function.
|
||||
(add_iv_candidate_for_use): Record iv_common_cands derived from
|
||||
iv use in hash table, instead of adding candidates directly.
|
||||
(add_iv_candidate_for_uses): Call add_iv_candidate_derived_from_uses.
|
||||
(record_important_candidates): Add important candidates to iv uses'
|
||||
related_cands. Always keep related_cands for future use.
|
||||
(try_add_cand_for): Use iv uses' related_cands.
|
||||
(free_loop_data, tree_ssa_iv_optimize_finalize): Release new fields
|
||||
in struct ivopts_data, iv_common_cand_tab and iv_common_cands.
|
||||
|
||||
2015-11-20 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
|
||||
|
||||
* config/s390/s390.md ("bswaphi2"): New pattern.
|
||||
|
@ -247,6 +247,45 @@ struct iv_cand
|
||||
smaller type. */
|
||||
};
|
||||
|
||||
/* Hashtable entry for common candidate derived from iv uses. */
|
||||
struct iv_common_cand
|
||||
{
|
||||
tree base;
|
||||
tree step;
|
||||
/* IV uses from which this common candidate is derived. */
|
||||
vec<iv_use *> uses;
|
||||
hashval_t hash;
|
||||
};
|
||||
|
||||
/* Hashtable helpers. */
|
||||
|
||||
struct iv_common_cand_hasher : free_ptr_hash <iv_common_cand>
|
||||
{
|
||||
static inline hashval_t hash (const iv_common_cand *);
|
||||
static inline bool equal (const iv_common_cand *, const iv_common_cand *);
|
||||
};
|
||||
|
||||
/* Hash function for possible common candidates. */
|
||||
|
||||
inline hashval_t
|
||||
iv_common_cand_hasher::hash (const iv_common_cand *ccand)
|
||||
{
|
||||
return ccand->hash;
|
||||
}
|
||||
|
||||
/* Hash table equality function for common candidates. */
|
||||
|
||||
inline bool
|
||||
iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
|
||||
const iv_common_cand *ccand2)
|
||||
{
|
||||
return (ccand1->hash == ccand2->hash
|
||||
&& operand_equal_p (ccand1->base, ccand2->base, 0)
|
||||
&& operand_equal_p (ccand1->step, ccand2->step, 0)
|
||||
&& (TYPE_PRECISION (TREE_TYPE (ccand1->base))
|
||||
== TYPE_PRECISION (TREE_TYPE (ccand2->base))));
|
||||
}
|
||||
|
||||
/* Loop invariant expression hashtable entry. */
|
||||
struct iv_inv_expr_ent
|
||||
{
|
||||
@ -255,8 +294,6 @@ struct iv_inv_expr_ent
|
||||
hashval_t hash;
|
||||
};
|
||||
|
||||
/* The data used by the induction variable optimizations. */
|
||||
|
||||
/* Hashtable helpers. */
|
||||
|
||||
struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
|
||||
@ -323,6 +360,12 @@ struct ivopts_data
|
||||
/* Cache used by tree_to_aff_combination_expand. */
|
||||
hash_map<tree, name_expansion *> *name_expansion_cache;
|
||||
|
||||
/* The hashtable of common candidates derived from iv uses. */
|
||||
hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
|
||||
|
||||
/* The common candidates. */
|
||||
vec<iv_common_cand *> iv_common_cands;
|
||||
|
||||
/* The maximum invariant id. */
|
||||
unsigned max_inv_id;
|
||||
|
||||
@ -894,6 +937,8 @@ tree_ssa_iv_optimize_init (struct ivopts_data *data)
|
||||
data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
|
||||
data->inv_expr_id = 0;
|
||||
data->name_expansion_cache = NULL;
|
||||
data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
|
||||
data->iv_common_cands.create (20);
|
||||
decl_rtl_to_reset.create (20);
|
||||
gcc_obstack_init (&data->iv_obstack);
|
||||
}
|
||||
@ -3063,6 +3108,96 @@ add_iv_candidate_for_bivs (struct ivopts_data *data)
|
||||
}
|
||||
}
|
||||
|
||||
/* Record common candidate {BASE, STEP} derived from USE in hashtable. */
|
||||
|
||||
static void
|
||||
record_common_cand (struct ivopts_data *data, tree base,
|
||||
tree step, struct iv_use *use)
|
||||
{
|
||||
struct iv_common_cand ent;
|
||||
struct iv_common_cand **slot;
|
||||
|
||||
gcc_assert (use != NULL);
|
||||
|
||||
ent.base = base;
|
||||
ent.step = step;
|
||||
ent.hash = iterative_hash_expr (base, 0);
|
||||
ent.hash = iterative_hash_expr (step, ent.hash);
|
||||
|
||||
slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
|
||||
if (*slot == NULL)
|
||||
{
|
||||
*slot = XNEW (struct iv_common_cand);
|
||||
(*slot)->base = base;
|
||||
(*slot)->step = step;
|
||||
(*slot)->uses.create (8);
|
||||
(*slot)->hash = ent.hash;
|
||||
data->iv_common_cands.safe_push ((*slot));
|
||||
}
|
||||
(*slot)->uses.safe_push (use);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Comparison function used to sort common candidates. */
|
||||
|
||||
static int
|
||||
common_cand_cmp (const void *p1, const void *p2)
|
||||
{
|
||||
unsigned n1, n2;
|
||||
const struct iv_common_cand *const *const ccand1
|
||||
= (const struct iv_common_cand *const *)p1;
|
||||
const struct iv_common_cand *const *const ccand2
|
||||
= (const struct iv_common_cand *const *)p2;
|
||||
|
||||
n1 = (*ccand1)->uses.length ();
|
||||
n2 = (*ccand2)->uses.length ();
|
||||
return n2 - n1;
|
||||
}
|
||||
|
||||
/* Adds IV candidates based on common candidated recorded. */
|
||||
|
||||
static void
|
||||
add_iv_candidate_derived_from_uses (struct ivopts_data *data)
|
||||
{
|
||||
unsigned i, j;
|
||||
struct iv_cand *cand_1, *cand_2;
|
||||
|
||||
data->iv_common_cands.qsort (common_cand_cmp);
|
||||
for (i = 0; i < data->iv_common_cands.length (); i++)
|
||||
{
|
||||
struct iv_common_cand *ptr = data->iv_common_cands[i];
|
||||
|
||||
/* Only add IV candidate if it's derived from multiple uses. */
|
||||
if (ptr->uses.length () <= 1)
|
||||
break;
|
||||
|
||||
cand_1 = NULL;
|
||||
cand_2 = NULL;
|
||||
if (ip_normal_pos (data->current_loop))
|
||||
cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
|
||||
false, IP_NORMAL, NULL, NULL);
|
||||
|
||||
if (ip_end_pos (data->current_loop)
|
||||
&& allow_ip_end_pos_p (data->current_loop))
|
||||
cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
|
||||
false, IP_END, NULL, NULL);
|
||||
|
||||
/* Bind deriving uses and the new candidates. */
|
||||
for (j = 0; j < ptr->uses.length (); j++)
|
||||
{
|
||||
struct iv_use *use = ptr->uses[j];
|
||||
if (cand_1)
|
||||
bitmap_set_bit (use->related_cands, cand_1->id);
|
||||
if (cand_2)
|
||||
bitmap_set_bit (use->related_cands, cand_2->id);
|
||||
}
|
||||
}
|
||||
|
||||
/* Release data since it is useless from this point. */
|
||||
data->iv_common_cand_tab->empty ();
|
||||
data->iv_common_cands.truncate (0);
|
||||
}
|
||||
|
||||
/* Adds candidates based on the value of USE's iv. */
|
||||
|
||||
static void
|
||||
@ -3075,19 +3210,59 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
|
||||
|
||||
add_candidate (data, iv->base, iv->step, false, use);
|
||||
|
||||
/* The same, but with initial value zero. Make such variable important,
|
||||
since it is generic enough so that possibly many uses may be based
|
||||
on it. */
|
||||
/* Record common candidate for use in case it can be shared by others. */
|
||||
record_common_cand (data, iv->base, iv->step, use);
|
||||
|
||||
/* Record common candidate with initial value zero. */
|
||||
basetype = TREE_TYPE (iv->base);
|
||||
if (POINTER_TYPE_P (basetype))
|
||||
basetype = sizetype;
|
||||
add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
|
||||
record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
|
||||
|
||||
/* Third, try removing the constant offset. Make sure to even
|
||||
add a candidate for &a[0] vs. (T *)&a. */
|
||||
base = strip_offset (iv->base, &offset);
|
||||
if (offset || base != iv->base)
|
||||
add_candidate (data, base, iv->step, false, use);
|
||||
/* Record common candidate with constant offset stripped in base. */
|
||||
{
|
||||
base = strip_offset (iv->base, &offset);
|
||||
if (offset || base != iv->base)
|
||||
record_common_cand (data, base, iv->step, use);
|
||||
}
|
||||
|
||||
/* Record common candidate with base_object removed in base. */
|
||||
if (iv->base_object != NULL)
|
||||
{
|
||||
unsigned i;
|
||||
aff_tree aff_base;
|
||||
tree step, base_object = iv->base_object;
|
||||
|
||||
base = iv->base;
|
||||
step = iv->step;
|
||||
STRIP_NOPS (base);
|
||||
STRIP_NOPS (step);
|
||||
STRIP_NOPS (base_object);
|
||||
tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
|
||||
for (i = 0; i < aff_base.n; i++)
|
||||
{
|
||||
if (aff_base.elts[i].coef != 1)
|
||||
continue;
|
||||
|
||||
if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
|
||||
break;
|
||||
}
|
||||
if (i < aff_base.n)
|
||||
{
|
||||
aff_combination_remove_elt (&aff_base, i);
|
||||
base = aff_combination_to_tree (&aff_base);
|
||||
basetype = TREE_TYPE (base);
|
||||
if (POINTER_TYPE_P (basetype))
|
||||
basetype = sizetype;
|
||||
|
||||
step = fold_convert (basetype, step);
|
||||
record_common_cand (data, base, step, use);
|
||||
/* Also record common candidate with offset stripped. */
|
||||
base = strip_offset (base, &offset);
|
||||
if (offset)
|
||||
record_common_cand (data, base, step, use);
|
||||
}
|
||||
}
|
||||
|
||||
/* At last, add auto-incremental candidates. Make such variables
|
||||
important since other iv uses with same base object may be based
|
||||
@ -3123,10 +3298,10 @@ add_iv_candidate_for_uses (struct ivopts_data *data)
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
add_iv_candidate_derived_from_uses (data);
|
||||
}
|
||||
|
||||
/* Record important candidates and add them to related_cands bitmaps
|
||||
if needed. */
|
||||
/* Record important candidates and add them to related_cands bitmaps. */
|
||||
|
||||
static void
|
||||
record_important_candidates (struct ivopts_data *data)
|
||||
@ -3145,22 +3320,11 @@ record_important_candidates (struct ivopts_data *data)
|
||||
data->consider_all_candidates = (n_iv_cands (data)
|
||||
<= CONSIDER_ALL_CANDIDATES_BOUND);
|
||||
|
||||
if (data->consider_all_candidates)
|
||||
/* Add important candidates to uses' related_cands bitmaps. */
|
||||
for (i = 0; i < n_iv_uses (data); i++)
|
||||
{
|
||||
/* We will not need "related_cands" bitmaps in this case,
|
||||
so release them to decrease peak memory consumption. */
|
||||
for (i = 0; i < n_iv_uses (data); i++)
|
||||
{
|
||||
use = iv_use (data, i);
|
||||
BITMAP_FREE (use->related_cands);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Add important candidates to the related_cands bitmaps. */
|
||||
for (i = 0; i < n_iv_uses (data); i++)
|
||||
bitmap_ior_into (iv_use (data, i)->related_cands,
|
||||
data->important_candidates);
|
||||
use = iv_use (data, i);
|
||||
bitmap_ior_into (use->related_cands, data->important_candidates);
|
||||
}
|
||||
}
|
||||
|
||||
@ -6532,7 +6696,7 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
|
||||
too many ivs. The approach from few ivs to more seems more likely to be
|
||||
successful -- starting from few ivs, replacing an expensive use by a
|
||||
specific iv should always be a win. */
|
||||
EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
|
||||
EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
|
||||
{
|
||||
cand = iv_cand (data, i);
|
||||
|
||||
@ -7441,6 +7605,9 @@ free_loop_data (struct ivopts_data *data)
|
||||
|
||||
data->inv_expr_tab->empty ();
|
||||
data->inv_expr_id = 0;
|
||||
|
||||
data->iv_common_cand_tab->empty ();
|
||||
data->iv_common_cands.truncate (0);
|
||||
}
|
||||
|
||||
/* Finalizes data structures used by the iv optimization pass. LOOPS is the
|
||||
@ -7460,6 +7627,9 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
|
||||
delete data->inv_expr_tab;
|
||||
data->inv_expr_tab = NULL;
|
||||
free_affine_expand_cache (&data->name_expansion_cache);
|
||||
delete data->iv_common_cand_tab;
|
||||
data->iv_common_cand_tab = NULL;
|
||||
data->iv_common_cands.release ();
|
||||
obstack_free (&data->iv_obstack, NULL);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user