gcc/
* expmed.h (alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST) (algorithm, alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved from expmed.c. (target_expmed): Add x_alg_hash and x_alg_hash_used_p. (alg_hash, alg_hash_used_p): New macros. * expmed.c (init_expmed): Clear alg_hash if reinitializing. (alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST, algorithm) (alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved to expmed.h. From-SVN: r162104
This commit is contained in:
parent
aa1c5d72e9
commit
c371bb7380
@ -1,3 +1,14 @@
|
||||
2010-07-12 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* expmed.h (alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST)
|
||||
(algorithm, alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved
|
||||
from expmed.c.
|
||||
(target_expmed): Add x_alg_hash and x_alg_hash_used_p.
|
||||
(alg_hash, alg_hash_used_p): New macros.
|
||||
* expmed.c (init_expmed): Clear alg_hash if reinitializing.
|
||||
(alg_code, mult_cost, MULT_COST_LESS, CHEAPER_MULT_COST, algorithm)
|
||||
(alg_hash_entry, NUM_ALG_HASH_ENTRIES, alg_hash): Moved to expmed.h.
|
||||
|
||||
2010-07-12 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* ira-int.h (target_ira_int): Add x_max_struct_costs_size, x_init_cost,
|
||||
|
111
gcc/expmed.c
111
gcc/expmed.c
@ -260,6 +260,10 @@ init_expmed (void)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (alg_hash_used_p)
|
||||
memset (alg_hash, 0, sizeof (alg_hash));
|
||||
else
|
||||
alg_hash_used_p = true;
|
||||
default_rtl_profile ();
|
||||
}
|
||||
|
||||
@ -2283,113 +2287,6 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
|
||||
return temp;
|
||||
}
|
||||
|
||||
enum alg_code {
|
||||
alg_unknown,
|
||||
alg_zero,
|
||||
alg_m, alg_shift,
|
||||
alg_add_t_m2,
|
||||
alg_sub_t_m2,
|
||||
alg_add_factor,
|
||||
alg_sub_factor,
|
||||
alg_add_t2_m,
|
||||
alg_sub_t2_m,
|
||||
alg_impossible
|
||||
};
|
||||
|
||||
/* This structure holds the "cost" of a multiply sequence. The
|
||||
"cost" field holds the total rtx_cost of every operator in the
|
||||
synthetic multiplication sequence, hence cost(a op b) is defined
|
||||
as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
|
||||
The "latency" field holds the minimum possible latency of the
|
||||
synthetic multiply, on a hypothetical infinitely parallel CPU.
|
||||
This is the critical path, or the maximum height, of the expression
|
||||
tree which is the sum of rtx_costs on the most expensive path from
|
||||
any leaf to the root. Hence latency(a op b) is defined as zero for
|
||||
leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
|
||||
|
||||
struct mult_cost {
|
||||
short cost; /* Total rtx_cost of the multiplication sequence. */
|
||||
short latency; /* The latency of the multiplication sequence. */
|
||||
};
|
||||
|
||||
/* This macro is used to compare a pointer to a mult_cost against an
|
||||
single integer "rtx_cost" value. This is equivalent to the macro
|
||||
CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
|
||||
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
|
||||
|| ((X)->cost == (Y) && (X)->latency < (Y)))
|
||||
|
||||
/* This macro is used to compare two pointers to mult_costs against
|
||||
each other. The macro returns true if X is cheaper than Y.
|
||||
Currently, the cheaper of two mult_costs is the one with the
|
||||
lower "cost". If "cost"s are tied, the lower latency is cheaper. */
|
||||
#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
|
||||
|| ((X)->cost == (Y)->cost \
|
||||
&& (X)->latency < (Y)->latency))
|
||||
|
||||
/* This structure records a sequence of operations.
|
||||
`ops' is the number of operations recorded.
|
||||
`cost' is their total cost.
|
||||
The operations are stored in `op' and the corresponding
|
||||
logarithms of the integer coefficients in `log'.
|
||||
|
||||
These are the operations:
|
||||
alg_zero total := 0;
|
||||
alg_m total := multiplicand;
|
||||
alg_shift total := total * coeff
|
||||
alg_add_t_m2 total := total + multiplicand * coeff;
|
||||
alg_sub_t_m2 total := total - multiplicand * coeff;
|
||||
alg_add_factor total := total * coeff + total;
|
||||
alg_sub_factor total := total * coeff - total;
|
||||
alg_add_t2_m total := total * coeff + multiplicand;
|
||||
alg_sub_t2_m total := total * coeff - multiplicand;
|
||||
|
||||
The first operand must be either alg_zero or alg_m. */
|
||||
|
||||
struct algorithm
|
||||
{
|
||||
struct mult_cost cost;
|
||||
short ops;
|
||||
/* The size of the OP and LOG fields are not directly related to the
|
||||
word size, but the worst-case algorithms will be if we have few
|
||||
consecutive ones or zeros, i.e., a multiplicand like 10101010101...
|
||||
In that case we will generate shift-by-2, add, shift-by-2, add,...,
|
||||
in total wordsize operations. */
|
||||
enum alg_code op[MAX_BITS_PER_WORD];
|
||||
char log[MAX_BITS_PER_WORD];
|
||||
};
|
||||
|
||||
/* The entry for our multiplication cache/hash table. */
|
||||
struct alg_hash_entry {
|
||||
/* The number we are multiplying by. */
|
||||
unsigned HOST_WIDE_INT t;
|
||||
|
||||
/* The mode in which we are multiplying something by T. */
|
||||
enum machine_mode mode;
|
||||
|
||||
/* The best multiplication algorithm for t. */
|
||||
enum alg_code alg;
|
||||
|
||||
/* The cost of multiplication if ALG_CODE is not alg_impossible.
|
||||
Otherwise, the cost within which multiplication by T is
|
||||
impossible. */
|
||||
struct mult_cost cost;
|
||||
|
||||
/* OPtimized for speed? */
|
||||
bool speed;
|
||||
};
|
||||
|
||||
/* The number of cache/hash entries. */
|
||||
#if HOST_BITS_PER_WIDE_INT == 64
|
||||
#define NUM_ALG_HASH_ENTRIES 1031
|
||||
#else
|
||||
#define NUM_ALG_HASH_ENTRIES 307
|
||||
#endif
|
||||
|
||||
/* Each entry of ALG_HASH caches alg_code for some integer. This is
|
||||
actually a hash table. If we have a collision, that the older
|
||||
entry is kicked out. */
|
||||
static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
|
||||
|
||||
/* Indicates the type of fixup needed after a constant multiplication.
|
||||
BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
|
||||
the result should be negated, and ADD_VARIANT means that the
|
||||
|
114
gcc/expmed.h
114
gcc/expmed.h
@ -22,8 +22,118 @@ along with GCC; see the file COPYING3. If not see
|
||||
#ifndef EXPMED_H
|
||||
#define EXPMED_H 1
|
||||
|
||||
enum alg_code {
|
||||
alg_unknown,
|
||||
alg_zero,
|
||||
alg_m, alg_shift,
|
||||
alg_add_t_m2,
|
||||
alg_sub_t_m2,
|
||||
alg_add_factor,
|
||||
alg_sub_factor,
|
||||
alg_add_t2_m,
|
||||
alg_sub_t2_m,
|
||||
alg_impossible
|
||||
};
|
||||
|
||||
/* This structure holds the "cost" of a multiply sequence. The
|
||||
"cost" field holds the total rtx_cost of every operator in the
|
||||
synthetic multiplication sequence, hence cost(a op b) is defined
|
||||
as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
|
||||
The "latency" field holds the minimum possible latency of the
|
||||
synthetic multiply, on a hypothetical infinitely parallel CPU.
|
||||
This is the critical path, or the maximum height, of the expression
|
||||
tree which is the sum of rtx_costs on the most expensive path from
|
||||
any leaf to the root. Hence latency(a op b) is defined as zero for
|
||||
leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
|
||||
|
||||
struct mult_cost {
|
||||
short cost; /* Total rtx_cost of the multiplication sequence. */
|
||||
short latency; /* The latency of the multiplication sequence. */
|
||||
};
|
||||
|
||||
/* This macro is used to compare a pointer to a mult_cost against an
|
||||
single integer "rtx_cost" value. This is equivalent to the macro
|
||||
CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
|
||||
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
|
||||
|| ((X)->cost == (Y) && (X)->latency < (Y)))
|
||||
|
||||
/* This macro is used to compare two pointers to mult_costs against
|
||||
each other. The macro returns true if X is cheaper than Y.
|
||||
Currently, the cheaper of two mult_costs is the one with the
|
||||
lower "cost". If "cost"s are tied, the lower latency is cheaper. */
|
||||
#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
|
||||
|| ((X)->cost == (Y)->cost \
|
||||
&& (X)->latency < (Y)->latency))
|
||||
|
||||
/* This structure records a sequence of operations.
|
||||
`ops' is the number of operations recorded.
|
||||
`cost' is their total cost.
|
||||
The operations are stored in `op' and the corresponding
|
||||
logarithms of the integer coefficients in `log'.
|
||||
|
||||
These are the operations:
|
||||
alg_zero total := 0;
|
||||
alg_m total := multiplicand;
|
||||
alg_shift total := total * coeff
|
||||
alg_add_t_m2 total := total + multiplicand * coeff;
|
||||
alg_sub_t_m2 total := total - multiplicand * coeff;
|
||||
alg_add_factor total := total * coeff + total;
|
||||
alg_sub_factor total := total * coeff - total;
|
||||
alg_add_t2_m total := total * coeff + multiplicand;
|
||||
alg_sub_t2_m total := total * coeff - multiplicand;
|
||||
|
||||
The first operand must be either alg_zero or alg_m. */
|
||||
|
||||
struct algorithm
|
||||
{
|
||||
struct mult_cost cost;
|
||||
short ops;
|
||||
/* The size of the OP and LOG fields are not directly related to the
|
||||
word size, but the worst-case algorithms will be if we have few
|
||||
consecutive ones or zeros, i.e., a multiplicand like 10101010101...
|
||||
In that case we will generate shift-by-2, add, shift-by-2, add,...,
|
||||
in total wordsize operations. */
|
||||
enum alg_code op[MAX_BITS_PER_WORD];
|
||||
char log[MAX_BITS_PER_WORD];
|
||||
};
|
||||
|
||||
/* The entry for our multiplication cache/hash table. */
|
||||
struct alg_hash_entry {
|
||||
/* The number we are multiplying by. */
|
||||
unsigned HOST_WIDE_INT t;
|
||||
|
||||
/* The mode in which we are multiplying something by T. */
|
||||
enum machine_mode mode;
|
||||
|
||||
/* The best multiplication algorithm for t. */
|
||||
enum alg_code alg;
|
||||
|
||||
/* The cost of multiplication if ALG_CODE is not alg_impossible.
|
||||
Otherwise, the cost within which multiplication by T is
|
||||
impossible. */
|
||||
struct mult_cost cost;
|
||||
|
||||
/* Optimized for speed? */
|
||||
bool speed;
|
||||
};
|
||||
|
||||
/* The number of cache/hash entries. */
|
||||
#if HOST_BITS_PER_WIDE_INT == 64
|
||||
#define NUM_ALG_HASH_ENTRIES 1031
|
||||
#else
|
||||
#define NUM_ALG_HASH_ENTRIES 307
|
||||
#endif
|
||||
|
||||
/* Target-dependent globals. */
|
||||
struct target_expmed {
|
||||
/* Each entry of ALG_HASH caches alg_code for some integer. This is
|
||||
actually a hash table. If we have a collision, that the older
|
||||
entry is kicked out. */
|
||||
struct alg_hash_entry x_alg_hash[NUM_ALG_HASH_ENTRIES];
|
||||
|
||||
/* True if x_alg_hash might already have been used. */
|
||||
bool x_alg_hash_used_p;
|
||||
|
||||
/* Nonzero means divides or modulus operations are relatively cheap for
|
||||
powers of two, so don't use branches; emit the operation instead.
|
||||
Usually, this will mean that the MD file will emit non-branch
|
||||
@ -54,6 +164,10 @@ extern struct target_expmed *this_target_expmed;
|
||||
#define this_target_expmed (&default_target_expmed)
|
||||
#endif
|
||||
|
||||
#define alg_hash \
|
||||
(this_target_expmed->x_alg_hash)
|
||||
#define alg_hash_used_p \
|
||||
(this_target_expmed->x_alg_hash_used_p)
|
||||
#define sdiv_pow2_cheap \
|
||||
(this_target_expmed->x_sdiv_pow2_cheap)
|
||||
#define smod_pow2_cheap \
|
||||
|
Loading…
Reference in New Issue
Block a user