Revert "Add the member integer_to_sse to processor_cost as a cost simulation for movd/pinsrd. It will be used to calculate the cost of vec_construct."
This reverts commit 872da9a6f6
.
PR target/101936
PR target/101929
This commit is contained in:
parent
6e529985d8
commit
1db70e61a9
|
@ -22203,11 +22203,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
|||
case vec_construct:
|
||||
{
|
||||
/* N element inserts into SSE vectors. */
|
||||
int cost
|
||||
= TYPE_VECTOR_SUBPARTS (vectype) * (fp ?
|
||||
ix86_cost->sse_op
|
||||
: ix86_cost->integer_to_sse);
|
||||
|
||||
int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
|
||||
/* One vinserti128 for combining two SSE vectors for AVX256. */
|
||||
if (GET_MODE_BITSIZE (mode) == 256)
|
||||
cost += ix86_vec_cost (mode, ix86_cost->addss);
|
||||
|
|
|
@ -165,7 +165,6 @@ struct processor_costs {
|
|||
const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
|
||||
zmm_move;
|
||||
const int sse_to_integer; /* cost of moving SSE register to integer. */
|
||||
const int integer_to_sse; /* cost of moving integer to SSE register. */
|
||||
const int gather_static, gather_per_elt; /* Cost of gather load is computed
|
||||
as static + per_item * nelts. */
|
||||
const int scatter_static, scatter_per_elt; /* Cost of gather store is
|
||||
|
|
|
@ -102,7 +102,6 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
|
|||
in 128bit, 256bit and 512bit */
|
||||
3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_BYTES (2), /* cost of moving integer to sse register. */
|
||||
5, 0, /* Gather load static, per_elt. */
|
||||
5, 0, /* Gather store static, per_elt. */
|
||||
0, /* size of l1 cache */
|
||||
|
@ -212,7 +211,6 @@ struct processor_costs i386_cost = { /* 386 specific costs */
|
|||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
0, /* size of l1 cache */
|
||||
|
@ -321,7 +319,6 @@ struct processor_costs i486_cost = { /* 486 specific costs */
|
|||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
4, /* size of l1 cache. 486 has 8kB cache
|
||||
|
@ -432,7 +429,6 @@ struct processor_costs pentium_cost = {
|
|||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
8, /* size of l1 cache. */
|
||||
|
@ -534,7 +530,6 @@ struct processor_costs lakemont_cost = {
|
|||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
8, /* size of l1 cache. */
|
||||
|
@ -651,7 +646,6 @@ struct processor_costs pentiumpro_cost = {
|
|||
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
8, /* size of l1 cache. */
|
||||
|
@ -759,7 +753,6 @@ struct processor_costs geode_cost = {
|
|||
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
2, 2, /* Gather load static, per_elt. */
|
||||
2, 2, /* Gather store static, per_elt. */
|
||||
64, /* size of l1 cache. */
|
||||
|
@ -867,7 +860,6 @@ struct processor_costs k6_cost = {
|
|||
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
2, 2, /* Gather load static, per_elt. */
|
||||
2, 2, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -981,7 +973,6 @@ struct processor_costs athlon_cost = {
|
|||
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
5, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
64, /* size of l1 cache. */
|
||||
|
@ -1097,7 +1088,6 @@ struct processor_costs k8_cost = {
|
|||
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
5, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
64, /* size of l1 cache. */
|
||||
|
@ -1226,7 +1216,6 @@ struct processor_costs amdfam10_cost = {
|
|||
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
3, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2), /* cost of moving integer to sse register. */
|
||||
4, 4, /* Gather load static, per_elt. */
|
||||
4, 4, /* Gather store static, per_elt. */
|
||||
64, /* size of l1 cache. */
|
||||
|
@ -1347,7 +1336,6 @@ const struct processor_costs bdver_cost = {
|
|||
{10, 10, 10, 40, 60}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
16, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2), /* cost of moving integer to sse register. */
|
||||
12, 12, /* Gather load static, per_elt. */
|
||||
10, 10, /* Gather store static, per_elt. */
|
||||
16, /* size of l1 cache. */
|
||||
|
@ -1489,7 +1477,6 @@ struct processor_costs znver1_cost = {
|
|||
{8, 8, 8, 16, 32}, /* cost of unaligned stores. */
|
||||
2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
|
||||
throughput 12. Approx 9 uops do not depend on vector size and every load
|
||||
is 7 uops. */
|
||||
|
@ -1646,7 +1633,6 @@ struct processor_costs znver2_cost = {
|
|||
2, 2, 3, /* cost of moving XMM,YMM,ZMM
|
||||
register. */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
|
||||
throughput 12. Approx 9 uops do not depend on vector size and every load
|
||||
is 7 uops. */
|
||||
|
@ -1779,7 +1765,6 @@ struct processor_costs znver3_cost = {
|
|||
2, 2, 3, /* cost of moving XMM,YMM,ZMM
|
||||
register. */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
/* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
|
||||
throughput 9. Approx 7 uops do not depend on vector size and every load
|
||||
is 4 uops. */
|
||||
|
@ -1924,7 +1909,6 @@ struct processor_costs skylake_cost = {
|
|||
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
|
||||
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2)+1, /* cost of moving integer to sse register. */
|
||||
20, 8, /* Gather load static, per_elt. */
|
||||
22, 10, /* Gather store static, per_elt. */
|
||||
64, /* size of l1 cache. */
|
||||
|
@ -2051,7 +2035,6 @@ struct processor_costs icelake_cost = {
|
|||
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
|
||||
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
20, 8, /* Gather load static, per_elt. */
|
||||
22, 10, /* Gather store static, per_elt. */
|
||||
64, /* size of l1 cache. */
|
||||
|
@ -2165,7 +2148,6 @@ const struct processor_costs btver1_cost = {
|
|||
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
14, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
10, 10, /* Gather load static, per_elt. */
|
||||
10, 10, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -2276,7 +2258,6 @@ const struct processor_costs btver2_cost = {
|
|||
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
14, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
10, 10, /* Gather load static, per_elt. */
|
||||
10, 10, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -2386,7 +2367,6 @@ struct processor_costs pentium4_cost = {
|
|||
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
|
||||
12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
|
||||
20, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2), /* cost of moving integer to sse register. */
|
||||
16, 16, /* Gather load static, per_elt. */
|
||||
16, 16, /* Gather store static, per_elt. */
|
||||
8, /* size of l1 cache. */
|
||||
|
@ -2499,7 +2479,6 @@ struct processor_costs nocona_cost = {
|
|||
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
|
||||
6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
|
||||
20, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (2), /* cost of moving integer to sse register. */
|
||||
12, 12, /* Gather load static, per_elt. */
|
||||
12, 12, /* Gather store static, per_elt. */
|
||||
8, /* size of l1 cache. */
|
||||
|
@ -2610,7 +2589,6 @@ struct processor_costs atom_cost = {
|
|||
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
8, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
8, 8, /* Gather load static, per_elt. */
|
||||
8, 8, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -2721,7 +2699,6 @@ struct processor_costs slm_cost = {
|
|||
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
|
||||
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
|
||||
8, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
8, 8, /* Gather load static, per_elt. */
|
||||
8, 8, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -2832,7 +2809,6 @@ struct processor_costs intel_cost = {
|
|||
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
|
||||
2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
|
||||
4, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
6, 6, /* Gather load static, per_elt. */
|
||||
6, 6, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -2950,7 +2926,6 @@ struct processor_costs generic_cost = {
|
|||
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
|
||||
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
|
||||
6, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
18, 6, /* Gather load static, per_elt. */
|
||||
18, 6, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
|
@ -3074,7 +3049,6 @@ struct processor_costs core_cost = {
|
|||
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
|
||||
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
|
||||
2, /* cost of moving SSE register to integer. */
|
||||
COSTS_N_INSNS (1), /* cost of moving integer to sse register. */
|
||||
/* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
|
||||
rec. throughput 6.
|
||||
So 5 uops statically and one uops per load. */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* PR target/99881. */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-Ofast -march=skylake" } */
|
||||
/* { dg-final { scan-assembler-not "xmm\[0-9\]" } } */
|
||||
/* { dg-final { scan-assembler-not "xmm\[0-9\]" { xfail *-*-* } } } */
|
||||
|
||||
void
|
||||
foo (int* __restrict a, int n, int c)
|
||||
|
|
Loading…
Reference in New Issue