i386.c (ix86_rtx_costs): Make difference between x87 and SSE operations.
* i386.c (ix86_rtx_costs): Make difference between x87 and SSE operations. * i386.h (struct processor_costs): Add addss, mulss, mulsd, divss, divsd, sqrtss and sqrtsd * x86-tune-costs.h: Add new entries to all costs. (znver1_cost): Fix to match real instruction latencies. From-SVN: r253769
This commit is contained in:
parent
31677224bc
commit
6065f4446c
@ -1,3 +1,12 @@
|
||||
2017-10-14 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* i386.c (ix86_rtx_costs): Make difference between x87 and SSE
|
||||
operations.
|
||||
* i386.h (struct processor_costs): Add addss, mulss, mulsd, divss,
|
||||
divsd, sqrtss and sqrtsd
|
||||
* x86-tune-costs.h: Add new entries to all costs.
|
||||
(znver1_cost): Fix to match real instruction latencies.
|
||||
|
||||
2017-10-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
Michael Collison <michael.collison@arm.com>
|
||||
|
||||
|
@ -38812,6 +38812,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
|
||||
const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
|
||||
int src_cost;
|
||||
machine_mode inner_mode = mode;
|
||||
if (VECTOR_MODE_P (mode))
|
||||
inner_mode = GET_MODE_INNER (mode);
|
||||
|
||||
switch (code)
|
||||
{
|
||||
@ -39012,7 +39015,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
|
||||
/* ??? SSE scalar/vector cost should be used here. */
|
||||
/* ??? Bald assumption that fma has the same cost as fmul. */
|
||||
*total = cost->fmul;
|
||||
*total = mode == SFmode ? cost->mulss : cost->mulsd;
|
||||
*total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
|
||||
|
||||
/* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
|
||||
@ -39031,8 +39034,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
case MULT:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
{
|
||||
/* ??? SSE scalar cost should be used here. */
|
||||
*total = cost->fmul;
|
||||
*total = inner_mode == DFmode ? cost->mulsd : cost->mulss;
|
||||
return false;
|
||||
}
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
@ -39043,7 +39045,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
{
|
||||
/* ??? SSE vector cost should be used here. */
|
||||
*total = cost->fmul;
|
||||
*total = inner_mode == DFmode ? cost->mulsd : cost->mulss;
|
||||
return false;
|
||||
}
|
||||
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
@ -39071,7 +39073,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
|
||||
*total = cost->fmul * 2 + cost->fabs * 5;
|
||||
else
|
||||
*total = cost->fmul;
|
||||
*total = inner_mode == DFmode ? cost->mulsd : cost->mulss;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
@ -39125,13 +39127,12 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
case MOD:
|
||||
case UMOD:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
/* ??? SSE cost should be used here. */
|
||||
*total = cost->fdiv;
|
||||
*total = inner_mode == DFmode ? cost->divsd : cost->divss;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
*total = cost->fdiv;
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
/* ??? SSE vector cost should be used here. */
|
||||
*total = cost->fdiv;
|
||||
*total = inner_mode == DFmode ? cost->divsd : cost->divss;
|
||||
else
|
||||
*total = cost->divide[MODE_INDEX (mode)];
|
||||
return false;
|
||||
@ -39210,8 +39211,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
{
|
||||
/* ??? SSE cost should be used here. */
|
||||
*total = cost->fadd;
|
||||
*total = cost->addss;
|
||||
return false;
|
||||
}
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
@ -39221,8 +39221,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
}
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
{
|
||||
/* ??? SSE vector cost should be used here. */
|
||||
*total = cost->fadd;
|
||||
/* We should account if registers are split. */
|
||||
*total = cost->addss;
|
||||
return false;
|
||||
}
|
||||
/* FALLTHRU */
|
||||
@ -39317,13 +39317,12 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
||||
|
||||
case SQRT:
|
||||
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
||||
/* ??? SSE cost should be used here. */
|
||||
*total = cost->fsqrt;
|
||||
*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
|
||||
else if (X87_FLOAT_MODE_P (mode))
|
||||
*total = cost->fsqrt;
|
||||
else if (FLOAT_MODE_P (mode))
|
||||
/* ??? SSE vector cost should be used here. */
|
||||
*total = cost->fsqrt;
|
||||
*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
|
||||
return false;
|
||||
|
||||
case UNSPEC:
|
||||
|
@ -257,6 +257,13 @@ struct processor_costs {
|
||||
const int fsqrt; /* cost of FSQRT instruction. */
|
||||
/* Specify what algorithm
|
||||
to use for stringops on unknown size. */
|
||||
const int addss; /* cost of ADDSS/SD SUBSS/SD instructions. */
|
||||
const int mulss; /* cost of MULSS instructions. */
|
||||
const int mulsd; /* cost of MULSD instructions. */
|
||||
const int divss; /* cost of DIVSS instructions. */
|
||||
const int divsd; /* cost of DIVSD instructions. */
|
||||
const int sqrtss; /* cost of SQRTSS instructions. */
|
||||
const int sqrtsd; /* cost of SQRTSD instructions. */
|
||||
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
|
||||
/* Specify reassociation width for integer,
|
||||
fp, vector integer and vector fp
|
||||
|
@ -65,6 +65,14 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
|
||||
COSTS_N_BYTES (2), /* cost of FABS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_BYTES (2), /* cost of MULSS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of MULSD instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
ix86_size_memcpy,
|
||||
ix86_size_memset,
|
||||
@ -142,6 +150,14 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
||||
COSTS_N_INSNS (22), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (24), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (27), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (27), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (88), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
i386_memcpy,
|
||||
i386_memset,
|
||||
@ -220,6 +236,14 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
||||
COSTS_N_INSNS (3), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (16), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (16), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (73), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
i486_memcpy,
|
||||
i486_memset,
|
||||
@ -296,6 +320,14 @@ struct processor_costs pentium_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentium_memcpy,
|
||||
pentium_memset,
|
||||
@ -365,6 +397,14 @@ struct processor_costs lakemont_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (5), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentium_memcpy,
|
||||
pentium_memset,
|
||||
@ -449,6 +489,14 @@ struct processor_costs pentiumpro_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentiumpro_memcpy,
|
||||
pentiumpro_memset,
|
||||
@ -525,6 +573,14 @@ struct processor_costs geode_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (11), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (11), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (47), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
geode_memcpy,
|
||||
geode_memset,
|
||||
@ -603,6 +659,14 @@ struct processor_costs k6_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (2), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
k6_memcpy,
|
||||
k6_memset,
|
||||
@ -681,6 +745,15 @@ struct processor_costs athlon_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
/* 11-16 */
|
||||
COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
athlon_memcpy,
|
||||
athlon_memset,
|
||||
@ -768,6 +841,15 @@ struct processor_costs k8_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
/* 11-16 */
|
||||
COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
k8_memcpy,
|
||||
k8_memset,
|
||||
@ -862,6 +944,15 @@ struct processor_costs amdfam10_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
/* 11-16 */
|
||||
COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
amdfam10_memcpy,
|
||||
amdfam10_memset,
|
||||
@ -957,6 +1048,16 @@ const struct processor_costs bdver1_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSD instruction. */
|
||||
/* 9-24 */
|
||||
COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
|
||||
/* 9-27 */
|
||||
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver1_memcpy,
|
||||
bdver1_memset,
|
||||
@ -1053,6 +1154,16 @@ const struct processor_costs bdver2_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSD instruction. */
|
||||
/* 9-24 */
|
||||
COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
|
||||
/* 9-27 */
|
||||
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver2_memcpy,
|
||||
bdver2_memset,
|
||||
@ -1140,6 +1251,16 @@ struct processor_costs bdver3_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSD instruction. */
|
||||
/* 9-24 */
|
||||
COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
|
||||
/* 9-27 */
|
||||
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver3_memcpy,
|
||||
bdver3_memset,
|
||||
@ -1226,6 +1347,16 @@ struct processor_costs bdver4_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSD instruction. */
|
||||
/* 9-24 */
|
||||
COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
|
||||
/* 9-27 */
|
||||
COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
bdver4_memcpy,
|
||||
bdver4_memset,
|
||||
@ -1264,15 +1395,17 @@ struct processor_costs znver1_cost = {
|
||||
{COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
|
||||
COSTS_N_INSNS (3), /* HI. */
|
||||
COSTS_N_INSNS (3), /* SI. */
|
||||
COSTS_N_INSNS (4), /* DI. */
|
||||
COSTS_N_INSNS (4)}, /* other. */
|
||||
COSTS_N_INSNS (3), /* DI. */
|
||||
COSTS_N_INSNS (3)}, /* other. */
|
||||
0, /* cost of multiply per each bit
|
||||
set. */
|
||||
{COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
|
||||
COSTS_N_INSNS (35), /* HI. */
|
||||
COSTS_N_INSNS (51), /* SI. */
|
||||
COSTS_N_INSNS (83), /* DI. */
|
||||
COSTS_N_INSNS (83)}, /* other. */
|
||||
/* Depending on parameters, idiv can get faster on ryzen. This is upper
|
||||
bound. */
|
||||
{COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
|
||||
COSTS_N_INSNS (22), /* HI. */
|
||||
COSTS_N_INSNS (30), /* SI. */
|
||||
COSTS_N_INSNS (45), /* DI. */
|
||||
COSTS_N_INSNS (45)}, /* other. */
|
||||
COSTS_N_INSNS (1), /* cost of movsx. */
|
||||
COSTS_N_INSNS (1), /* cost of movzx. */
|
||||
8, /* "large" insn. */
|
||||
@ -1310,12 +1443,23 @@ struct processor_costs znver1_cost = {
|
||||
time). */
|
||||
100, /* number of parallel prefetches. */
|
||||
3, /* Branch cost. */
|
||||
COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (6), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (42), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (5), /* cost of FMUL instruction. */
|
||||
/* Latency of fdiv is 8-15. */
|
||||
COSTS_N_INSNS (15), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
/* Latency of fsqrt is 4-10. */
|
||||
COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
|
||||
/* 9-13 */
|
||||
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
|
||||
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
|
||||
and it can execute 2 integer additions and 2 multiplications thus
|
||||
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
|
||||
@ -1413,6 +1557,14 @@ const struct processor_costs btver1_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (2), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
btver1_memcpy,
|
||||
btver1_memset,
|
||||
@ -1499,6 +1651,14 @@ const struct processor_costs btver2_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (2), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
btver2_memcpy,
|
||||
btver2_memset,
|
||||
@ -1576,6 +1736,14 @@ struct processor_costs pentium4_cost = {
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (6), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (23), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
pentium4_memcpy,
|
||||
pentium4_memset,
|
||||
@ -1656,6 +1824,14 @@ struct processor_costs nocona_cost = {
|
||||
COSTS_N_INSNS (3), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (7), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (7), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (32), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
|
||||
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
nocona_memcpy,
|
||||
nocona_memset,
|
||||
@ -1734,6 +1910,14 @@ struct processor_costs atom_cost = {
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
|
||||
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
atom_memcpy,
|
||||
atom_memset,
|
||||
@ -1812,6 +1996,14 @@ struct processor_costs slm_cost = {
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
slm_memcpy,
|
||||
slm_memset,
|
||||
@ -1890,6 +2082,14 @@ struct processor_costs intel_cost = {
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (8), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
|
||||
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
intel_memcpy,
|
||||
intel_memset,
|
||||
@ -1978,6 +2178,14 @@ struct processor_costs generic_cost = {
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (8), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
|
||||
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
generic_memcpy,
|
||||
generic_memset,
|
||||
@ -2065,6 +2273,14 @@ struct processor_costs core_cost = {
|
||||
COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (4), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
|
||||
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
core_memcpy,
|
||||
core_memset,
|
||||
|
Loading…
Reference in New Issue
Block a user