* expmed.h (alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST)
	(algorithm, alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved
	from expmed.c.
	(target_expmed): Add x_alg_hash and x_alg_hash_used_p.
	(alg_hash, alg_hash_used_p): New macros.
	* expmed.c (init_expmed): Clear alg_hash if reinitializing.
	(alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST, algorithm)
	(alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved to expmed.h.

From-SVN: r162104
This commit is contained in:
Richard Sandiford 2010-07-12 19:03:25 +00:00 committed by Richard Sandiford
parent aa1c5d72e9
commit c371bb7380
3 changed files with 129 additions and 107 deletions

View File

@ -1,3 +1,14 @@
2010-07-12 Richard Sandiford <rdsandiford@googlemail.com>
* expmed.h (alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST)
(algorithm, alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved
from expmed.c.
(target_expmed): Add x_alg_hash and x_alg_hash_used_p.
(alg_hash, alg_hash_used_p): New macros.
* expmed.c (init_expmed): Clear alg_hash if reinitializing.
(alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST, algorithm)
(alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved to expmed.h.
2010-07-12 Richard Sandiford <rdsandiford@googlemail.com>
* ira-int.h (target_ira_int): Add x_max_struct_costs_size, x_init_cost,

View File

@ -260,6 +260,10 @@ init_expmed (void)
}
}
}
if (alg_hash_used_p)
memset (alg_hash, 0, sizeof (alg_hash));
else
alg_hash_used_p = true;
default_rtl_profile ();
}
@ -2283,113 +2287,6 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
return temp;
}
enum alg_code {
alg_unknown,
alg_zero,
alg_m, alg_shift,
alg_add_t_m2,
alg_sub_t_m2,
alg_add_factor,
alg_sub_factor,
alg_add_t2_m,
alg_sub_t2_m,
alg_impossible
};
/* This structure holds the "cost" of a multiply sequence. The
"cost" field holds the total rtx_cost of every operator in the
synthetic multiplication sequence, hence cost(a op b) is defined
as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
The "latency" field holds the minimum possible latency of the
synthetic multiply, on a hypothetical infinitely parallel CPU.
This is the critical path, or the maximum height, of the expression
tree which is the sum of rtx_costs on the most expensive path from
any leaf to the root. Hence latency(a op b) is defined as zero for
leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
struct mult_cost {
short cost; /* Total rtx_cost of the multiplication sequence. */
short latency; /* The latency of the multiplication sequence. */
};
/* This macro is used to compare a pointer to a mult_cost against an
single integer "rtx_cost" value. This is equivalent to the macro
CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
|| ((X)->cost == (Y) && (X)->latency < (Y)))
/* This macro is used to compare two pointers to mult_costs against
each other. The macro returns true if X is cheaper than Y.
Currently, the cheaper of two mult_costs is the one with the
lower "cost". If "cost"s are tied, the lower latency is cheaper. */
#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
|| ((X)->cost == (Y)->cost \
&& (X)->latency < (Y)->latency))
/* This structure records a sequence of operations.
`ops' is the number of operations recorded.
`cost' is their total cost.
The operations are stored in `op' and the corresponding
logarithms of the integer coefficients in `log'.
These are the operations:
alg_zero total := 0;
alg_m total := multiplicand;
alg_shift total := total * coeff
alg_add_t_m2 total := total + multiplicand * coeff;
alg_sub_t_m2 total := total - multiplicand * coeff;
alg_add_factor total := total * coeff + total;
alg_sub_factor total := total * coeff - total;
alg_add_t2_m total := total * coeff + multiplicand;
alg_sub_t2_m total := total * coeff - multiplicand;
The first operand must be either alg_zero or alg_m. */
struct algorithm
{
struct mult_cost cost;
short ops;
/* The size of the OP and LOG fields are not directly related to the
word size, but the worst-case algorithms will be if we have few
consecutive ones or zeros, i.e., a multiplicand like 10101010101...
In that case we will generate shift-by-2, add, shift-by-2, add,...,
in total wordsize operations. */
enum alg_code op[MAX_BITS_PER_WORD];
char log[MAX_BITS_PER_WORD];
};
/* The entry for our multiplication cache/hash table. */
struct alg_hash_entry {
/* The number we are multiplying by. */
unsigned HOST_WIDE_INT t;
/* The mode in which we are multiplying something by T. */
enum machine_mode mode;
/* The best multiplication algorithm for t. */
enum alg_code alg;
/* The cost of multiplication if ALG_CODE is not alg_impossible.
Otherwise, the cost within which multiplication by T is
impossible. */
struct mult_cost cost;
/* OPtimized for speed? */
bool speed;
};
/* The number of cache/hash entries. */
#if HOST_BITS_PER_WIDE_INT == 64
#define NUM_ALG_HASH_ENTRIES 1031
#else
#define NUM_ALG_HASH_ENTRIES 307
#endif
/* Each entry of ALG_HASH caches alg_code for some integer. This is
actually a hash table. If we have a collision, that the older
entry is kicked out. */
static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
/* Indicates the type of fixup needed after a constant multiplication.
BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
the result should be negated, and ADD_VARIANT means that the

View File

@ -22,8 +22,118 @@ along with GCC; see the file COPYING3. If not see
#ifndef EXPMED_H
#define EXPMED_H 1
enum alg_code {
alg_unknown,
alg_zero,
alg_m, alg_shift,
alg_add_t_m2,
alg_sub_t_m2,
alg_add_factor,
alg_sub_factor,
alg_add_t2_m,
alg_sub_t2_m,
alg_impossible
};
/* This structure holds the "cost" of a multiply sequence. The
"cost" field holds the total rtx_cost of every operator in the
synthetic multiplication sequence, hence cost(a op b) is defined
as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
The "latency" field holds the minimum possible latency of the
synthetic multiply, on a hypothetical infinitely parallel CPU.
This is the critical path, or the maximum height, of the expression
tree which is the sum of rtx_costs on the most expensive path from
any leaf to the root. Hence latency(a op b) is defined as zero for
leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
struct mult_cost {
short cost; /* Total rtx_cost of the multiplication sequence. */
short latency; /* The latency of the multiplication sequence. */
};
/* This macro is used to compare a pointer to a mult_cost against an
single integer "rtx_cost" value. This is equivalent to the macro
CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
|| ((X)->cost == (Y) && (X)->latency < (Y)))
/* This macro is used to compare two pointers to mult_costs against
each other. The macro returns true if X is cheaper than Y.
Currently, the cheaper of two mult_costs is the one with the
lower "cost". If "cost"s are tied, the lower latency is cheaper. */
#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
|| ((X)->cost == (Y)->cost \
&& (X)->latency < (Y)->latency))
/* This structure records a sequence of operations.
`ops' is the number of operations recorded.
`cost' is their total cost.
The operations are stored in `op' and the corresponding
logarithms of the integer coefficients in `log'.
These are the operations:
alg_zero total := 0;
alg_m total := multiplicand;
alg_shift total := total * coeff
alg_add_t_m2 total := total + multiplicand * coeff;
alg_sub_t_m2 total := total - multiplicand * coeff;
alg_add_factor total := total * coeff + total;
alg_sub_factor total := total * coeff - total;
alg_add_t2_m total := total * coeff + multiplicand;
alg_sub_t2_m total := total * coeff - multiplicand;
The first operand must be either alg_zero or alg_m. */
struct algorithm
{
struct mult_cost cost;
short ops;
/* The size of the OP and LOG fields are not directly related to the
word size, but the worst-case algorithms will be if we have few
consecutive ones or zeros, i.e., a multiplicand like 10101010101...
In that case we will generate shift-by-2, add, shift-by-2, add,...,
in total wordsize operations. */
enum alg_code op[MAX_BITS_PER_WORD];
char log[MAX_BITS_PER_WORD];
};
/* The entry for our multiplication cache/hash table. */
struct alg_hash_entry {
/* The number we are multiplying by. */
unsigned HOST_WIDE_INT t;
/* The mode in which we are multiplying something by T. */
enum machine_mode mode;
/* The best multiplication algorithm for t. */
enum alg_code alg;
/* The cost of multiplication if ALG_CODE is not alg_impossible.
Otherwise, the cost within which multiplication by T is
impossible. */
struct mult_cost cost;
/* Optimized for speed? */
bool speed;
};
/* The number of cache/hash entries. */
#if HOST_BITS_PER_WIDE_INT == 64
#define NUM_ALG_HASH_ENTRIES 1031
#else
#define NUM_ALG_HASH_ENTRIES 307
#endif
/* Target-dependent globals. */
struct target_expmed {
/* Each entry of ALG_HASH caches alg_code for some integer. This is
actually a hash table. If we have a collision, that the older
entry is kicked out. */
struct alg_hash_entry x_alg_hash[NUM_ALG_HASH_ENTRIES];
/* True if x_alg_hash might already have been used. */
bool x_alg_hash_used_p;
/* Nonzero means divides or modulus operations are relatively cheap for
powers of two, so don't use branches; emit the operation instead.
Usually, this will mean that the MD file will emit non-branch
@ -54,6 +164,10 @@ extern struct target_expmed *this_target_expmed;
#define this_target_expmed (&default_target_expmed)
#endif
#define alg_hash \
(this_target_expmed->x_alg_hash)
#define alg_hash_used_p \
(this_target_expmed->x_alg_hash_used_p)
#define sdiv_pow2_cheap \
(this_target_expmed->x_sdiv_pow2_cheap)
#define smod_pow2_cheap \