From c53c148c9155c1c26bf35b2763bf34d2ae26bc4b Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Mon, 16 Oct 2017 00:10:22 +0200 Subject: [PATCH] i386.c (ix86_vec_cost): New function. * i386.c (ix86_vec_cost): New function. (ix86_rtx_costs): Handle vector operations better. * i386.h (struct processor_costs): Add sse_op, fmasd, fmass. * x86-tune-costs.h: Add new costs to all tables. From-SVN: r253771 --- gcc/ChangeLog | 7 ++ gcc/config/i386/i386.c | 106 ++++++++++++++++++++----------- gcc/config/i386/i386.h | 3 + gcc/config/i386/x86-tune-costs.h | 75 ++++++++++++++++++++++ 4 files changed, 155 insertions(+), 36 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4a582bd9445..5bbd24fdac0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-10-14 Jan Hubicka + + * i386.c (ix86_vec_cost): New function. + (ix86_rtx_costs): Handle vector operations better. + * i386.h (struct processor_costs): Add sse_op, fmasd, fmass. + * x86-tune-costs.h: Add new costs to all tables. + 2017-10-14 Jan Hubicka * i386.c (ix86_rtx_costs): Make difference between x87 and SSE diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 28462c0fc9a..26b22d6d7aa 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -38799,6 +38799,27 @@ ix86_set_reg_reg_cost (machine_mode mode) return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); } +/* Return cost of vector operation in MODE given that scalar version has + COST. If PARALLEL is true assume that CPU has more than one unit + performing the operation. */ + +static int +ix86_vec_cost (machine_mode mode, int cost, bool parallel) +{ + if (!VECTOR_MODE_P (mode)) + return cost; + + if (!parallel) + return cost * GET_MODE_NUNITS (mode); + if (GET_MODE_BITSIZE (mode) == 128 + && TARGET_SSE_SPLIT_REGS) + return cost * 2; + if (GET_MODE_BITSIZE (mode) > 128 + && TARGET_AVX128_OPTIMAL) + return cost * GET_MODE_BITSIZE (mode) / 128; + return cost; +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -38959,19 +38980,20 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, shift with one insn set the cost to prefer paddb. */ if (CONSTANT_P (XEXP (x, 1))) { - *total = (cost->fabs + *total = ix86_vec_cost (mode, + cost->sse_op + rtx_cost (XEXP (x, 0), mode, code, 0, speed) - + (speed ? 2 : COSTS_N_BYTES (16))); + + (speed ? 2 : COSTS_N_BYTES (16)), true); return true; } count = 3; } else if (TARGET_SSSE3) count = 7; - *total = cost->fabs * count; + *total = ix86_vec_cost (mode, cost->sse_op * count, true); } else - *total = cost->fabs; + *total = ix86_vec_cost (mode, cost->sse_op, true); } else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) { @@ -39013,9 +39035,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, gcc_assert (FLOAT_MODE_P (mode)); gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); - /* ??? SSE scalar/vector cost should be used here. */ - /* ??? Bald assumption that fma has the same cost as fmul. */ - *total = mode == SFmode ? cost->mulss : cost->mulsd; + *total = ix86_vec_cost (mode, + mode == SFmode ? cost->fmass : cost->fmasd, + true); *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ @@ -39044,8 +39066,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } else if (FLOAT_MODE_P (mode)) { - /* ??? SSE vector cost should be used here. */ - *total = inner_mode == DFmode ? cost->mulsd : cost->mulss; + *total = ix86_vec_cost (mode, + inner_mode == DFmode + ? cost->mulsd : cost->mulss, true); return false; } else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) @@ -39058,22 +39081,29 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, extra = 5; else if (TARGET_SSSE3) extra = 6; - *total = cost->fmul * 2 + cost->fabs * extra; + *total = ix86_vec_cost (mode, + cost->mulss * 2 + cost->sse_op * extra, + true); } /* V*DImode is emulated with 5-8 insns. */ else if (mode == V2DImode || mode == V4DImode) { if (TARGET_XOP && mode == V2DImode) - *total = cost->fmul * 2 + cost->fabs * 3; + *total = ix86_vec_cost (mode, + cost->mulss * 2 + cost->sse_op * 3, + true); else - *total = cost->fmul * 3 + cost->fabs * 5; + *total = ix86_vec_cost (mode, + cost->mulss * 3 + cost->sse_op * 5, + true); } /* Without sse4.1, we don't have PMULLD; it's emulated with 7 insns, including two PMULUDQ. */ else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) - *total = cost->fmul * 2 + cost->fabs * 5; + *total = ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5, + true); else - *total = inner_mode == DFmode ? cost->mulsd : cost->mulss; + *total = ix86_vec_cost (mode, cost->mulss, true); return false; } else @@ -39131,8 +39161,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else if (X87_FLOAT_MODE_P (mode)) *total = cost->fdiv; else if (FLOAT_MODE_P (mode)) - /* ??? SSE vector cost should be used here. */ - *total = inner_mode == DFmode ? cost->divsd : cost->divss; + *total = ix86_vec_cost (mode, + inner_mode == DFmode ? cost->divsd : cost->divss, + true); else *total = cost->divide[MODE_INDEX (mode)]; return false; @@ -39221,8 +39252,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } else if (FLOAT_MODE_P (mode)) { - /* We should account if registers are split. */ - *total = cost->addss; + *total = ix86_vec_cost (mode, cost->addss, true); return false; } /* FALLTHRU */ @@ -39245,8 +39275,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case NEG: if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { - /* ??? SSE cost should be used here. */ - *total = cost->fchs; + *total = cost->sse_op; return false; } else if (X87_FLOAT_MODE_P (mode)) @@ -39256,20 +39285,14 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } else if (FLOAT_MODE_P (mode)) { - /* ??? SSE vector cost should be used here. */ - *total = cost->fchs; + *total = ix86_vec_cost (mode, cost->sse_op, true); return false; } /* FALLTHRU */ case NOT: if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - { - /* ??? Should be SSE vector operation cost. */ - /* At least for published AMD latencies, this really is the same - as the latency for a simple fpu operation like fabs. */ - *total = cost->fabs; - } + *total = ix86_vec_cost (mode, cost->sse_op, true); else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) *total = cost->add * 2; else @@ -39302,17 +39325,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case FLOAT_EXTEND: if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) *total = 0; + else + *total = ix86_vec_cost (mode, cost->addss, true); + return false; + + case FLOAT_TRUNCATE: + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + *total = cost->fadd; + else + *total = ix86_vec_cost (mode, cost->addss, true); return false; case ABS: + /* SSE requires memory load for the constant operand. It may make + sense to account for this. Of course the constant operand may or + may not be reused. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - /* ??? SSE cost should be used here. */ - *total = cost->fabs; + *total = cost->sse_op; else if (X87_FLOAT_MODE_P (mode)) *total = cost->fabs; else if (FLOAT_MODE_P (mode)) - /* ??? SSE vector cost should be used here. */ - *total = cost->fabs; + *total = ix86_vec_cost (mode, cost->sse_op, true); return false; case SQRT: @@ -39321,8 +39354,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else if (X87_FLOAT_MODE_P (mode)) *total = cost->fsqrt; else if (FLOAT_MODE_P (mode)) - /* ??? SSE vector cost should be used here. */ - *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; + *total = ix86_vec_cost (mode, + mode == SFmode ? cost->sqrtss : cost->sqrtsd, + true); return false; case UNSPEC: @@ -39336,7 +39370,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* ??? Assume all of these vector manipulation patterns are recognizable. In which case they all pretty much have the same cost. */ - *total = cost->fabs; + *total = cost->sse_op; return true; case VEC_MERGE: mask = XEXP (x, 2); @@ -39345,7 +39379,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); else - *total = cost->fabs; + *total = cost->sse_op; return true; default: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a602650c332..1196ed9b503 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -257,9 +257,12 @@ struct processor_costs { const int fsqrt; /* cost of FSQRT instruction. */ /* Specify what algorithm to use for stringops on unknown size. */ + const int sse_op; /* cost of cheap SSE instruction. */ const int addss; /* cost of ADDSS/SD SUBSS/SD instructions. */ const int mulss; /* cost of MULSS instructions. */ const int mulsd; /* cost of MULSD instructions. */ + const int fmass; /* cost of FMASS instructions. */ + const int fmasd; /* cost of FMASD instructions. */ const int divss; /* cost of DIVSS instructions. */ const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 1a5702f0d74..c5ded939237 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -66,9 +66,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ + COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_BYTES (2), /* cost of MULSS instruction. */ COSTS_N_BYTES (2), /* cost of MULSD instruction. */ + COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ @@ -151,9 +154,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (24), /* cost of FCHS instruction. */ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (27), /* cost of MULSS instruction. */ COSTS_N_INSNS (27), /* cost of MULSD instruction. */ + COSTS_N_INSNS (27), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (27), /* cost of FMA SD instruction. */ COSTS_N_INSNS (88), /* cost of DIVSS instruction. */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ @@ -237,9 +243,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (16), /* cost of MULSS instruction. */ COSTS_N_INSNS (16), /* cost of MULSD instruction. */ + COSTS_N_INSNS (16), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (16), /* cost of FMA SD instruction. */ COSTS_N_INSNS (73), /* cost of DIVSS instruction. */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ @@ -321,9 +330,12 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ COSTS_N_INSNS (39), /* cost of DIVSS instruction. */ COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ @@ -398,9 +410,12 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (5), /* cost of MULSS instruction. */ COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (10), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (10), /* cost of FMA SD instruction. */ COSTS_N_INSNS (31), /* cost of DIVSS instruction. */ COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ @@ -490,9 +505,12 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (7), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (7), /* cost of FMA SD instruction. */ COSTS_N_INSNS (18), /* cost of DIVSS instruction. */ COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ @@ -574,9 +592,12 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (1), /* cost of FCHS instruction. */ COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (11), /* cost of MULSS instruction. */ COSTS_N_INSNS (11), /* cost of MULSD instruction. */ + COSTS_N_INSNS (17), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (17), /* cost of FMA SD instruction. */ COSTS_N_INSNS (47), /* cost of DIVSS instruction. */ COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ @@ -660,9 +681,12 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (2), /* cost of MULSS instruction. */ COSTS_N_INSNS (2), /* cost of MULSD instruction. */ + COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ COSTS_N_INSNS (56), /* cost of DIVSS instruction. */ COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ @@ -746,9 +770,12 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ /* 11-16 */ COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ @@ -842,9 +869,12 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ /* 11-16 */ COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ @@ -945,9 +975,12 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ /* 11-16 */ COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ @@ -1049,9 +1082,12 @@ const struct processor_costs bdver1_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (6), /* cost of MULSS instruction. */ COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ /* 9-24 */ COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ /* 9-27 */ @@ -1155,9 +1191,12 @@ const struct processor_costs bdver2_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (6), /* cost of MULSS instruction. */ COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ /* 9-24 */ COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ /* 9-27 */ @@ -1252,9 +1291,12 @@ struct processor_costs bdver3_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (6), /* cost of MULSS instruction. */ COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ /* 9-24 */ COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ /* 9-27 */ @@ -1348,9 +1390,12 @@ struct processor_costs bdver4_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (6), /* cost of MULSS instruction. */ COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ /* 9-24 */ COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ /* 9-27 */ @@ -1452,9 +1497,12 @@ struct processor_costs znver1_cost = { /* Latency of fsqrt is 4-10. */ COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ /* 9-13 */ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ @@ -1558,9 +1606,12 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (2), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ @@ -1652,9 +1703,12 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (2), /* cost of MULSS instruction. */ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ @@ -1737,9 +1791,12 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (2), /* cost of FCHS instruction. */ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (6), /* cost of MULSS instruction. */ COSTS_N_INSNS (6), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ COSTS_N_INSNS (23), /* cost of DIVSS instruction. */ COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ @@ -1825,9 +1882,12 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (3), /* cost of FCHS instruction. */ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (7), /* cost of MULSS instruction. */ COSTS_N_INSNS (7), /* cost of MULSD instruction. */ + COSTS_N_INSNS (7), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (7), /* cost of FMA SD instruction. */ COSTS_N_INSNS (32), /* cost of DIVSS instruction. */ COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ @@ -1911,9 +1971,12 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ COSTS_N_INSNS (31), /* cost of DIVSS instruction. */ COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ @@ -1997,9 +2060,12 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ COSTS_N_INSNS (39), /* cost of DIVSS instruction. */ COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ @@ -2083,9 +2149,12 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (8), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (8), /* cost of MULSS instruction. */ COSTS_N_INSNS (8), /* cost of MULSD instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ @@ -2179,9 +2248,12 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (8), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (8), /* cost of MULSS instruction. */ COSTS_N_INSNS (8), /* cost of MULSD instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ @@ -2274,9 +2346,12 @@ struct processor_costs core_cost = { COSTS_N_INSNS (8), /* cost of FCHS instruction. */ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ COSTS_N_INSNS (4), /* cost of MULSS instruction. */ COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ COSTS_N_INSNS (18), /* cost of DIVSS instruction. */ COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */