x86: Add cost model for operation of mask registers.

gcc/

	PR target/71453
	* config/i386/i386.h (struct processor_costs): Add member
	mask_to_integer, integer_to_mask, mask_load[3], mask_store[3],
	mask_move.
	* config/i386/x86-tune-costs.h (ix86_size_cost, i386_cost,
	i386_cost, pentium_cost, lakemont_cost, pentiumpro_cost,
	geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost,
	bdver_cost, znver1_cost, znver2_cost, skylake_cost,
	btver1_cost, btver2_cost, pentium4_cost, nocona_cost,
	atom_cost, slm_cost, intel_cost, generic_cost, core_cost):
	Initialize mask_load[3], mask_store[3], mask_move,
	integer_to_mask, mask_to_integer for all target costs.
	* config/i386/i386.c (ix86_register_move_cost): Using cost
	model of mask registers.
	(inline_memory_move_cost): Ditto.
	(ix86_register_move_cost): Ditto.
This commit is contained in:
H.J. Lu 2019-09-03 14:41:02 -07:00 committed by liuhongt
parent 6b31b6b526
commit 00cb3494ca
3 changed files with 185 additions and 0 deletions

View File

@ -18740,6 +18740,29 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
return in ? ix86_cost->hard_register.sse_load [index]
: ix86_cost->hard_register.sse_store [index];
}
if (MASK_CLASS_P (regclass))
{
int index;
switch (GET_MODE_SIZE (mode))
{
case 1:
index = 0;
break;
case 2:
index = 1;
break;
/* DImode loads and stores assumed to cost the same as SImode. */
default:
index = 2;
break;
}
if (in == 2)
return MAX (ix86_cost->hard_register.mask_load[index],
ix86_cost->hard_register.mask_store[index]);
return in ? ix86_cost->hard_register.mask_load[2]
: ix86_cost->hard_register.mask_store[2];
}
if (MMX_CLASS_P (regclass))
{
int index;
@ -18865,6 +18888,17 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
? ix86_cost->hard_register.sse_to_integer
: ix86_cost->hard_register.integer_to_sse);
/* Moves between mask register and GPR. */
if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
{
return (MASK_CLASS_P (class1)
? ix86_cost->hard_register.mask_to_integer
: ix86_cost->hard_register.integer_to_mask);
}
/* Moving between mask registers. */
if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
return ix86_cost->hard_register.mask_move;
if (MAYBE_FLOAT_CLASS_P (class1))
return ix86_cost->hard_register.fp_move;
if (MAYBE_SSE_CLASS_P (class1))

View File

@ -279,6 +279,13 @@ struct processor_costs {
in SImode, DImode and TImode. */
const int sse_to_integer; /* cost of moving SSE register to integer. */
const int integer_to_sse; /* cost of moving integer register to SSE. */
const int mask_to_integer; /* cost of moving mask register to integer. */
const int integer_to_mask; /* cost of moving integer register to mask. */
const int mask_load[3]; /* cost of loading mask registers
in QImode, HImode and SImode. */
const int mask_store[3]; /* cost of storing mask register
in QImode, HImode and SImode. */
const int mask_move; /* cost of moving mask register. */
} hard_register;
const int add; /* cost of an add instruction */

View File

@ -59,6 +59,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
{3, 3, 3, 3, 3}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{2, 2, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 2, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -164,6 +170,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -266,6 +278,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -370,6 +388,12 @@ struct processor_costs pentium_cost = {
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -465,6 +489,12 @@ struct processor_costs lakemont_cost = {
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{2, 4, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 4, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -575,6 +605,12 @@ struct processor_costs pentiumpro_cost = {
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 2, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -676,6 +712,12 @@ struct processor_costs geode_cost = {
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{2, 2, 2}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 2, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -777,6 +819,12 @@ struct processor_costs k6_cost = {
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 5, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 3, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -884,6 +932,12 @@ struct processor_costs athlon_cost = {
{4, 4, 10, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
5, 5, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{3, 4, 3}, /* cost of loading mask register
in QImode, HImode, SImode. */
{3, 4, 3}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -993,6 +1047,12 @@ struct processor_costs k8_cost = {
{4, 4, 10, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
5, 5, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{3, 4, 3}, /* cost of loading mask register
in QImode, HImode, SImode. */
{3, 4, 3}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1106,6 +1166,12 @@ struct processor_costs amdfam10_cost = {
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{3, 4, 3}, /* cost of loading mask register
in QImode, HImode, SImode. */
{3, 4, 3}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* On K8:
MOVD reg64, xmmreg Double FSTORE 4
@ -1229,6 +1295,12 @@ const struct processor_costs bdver_cost = {
{10, 10, 10, 40, 60}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
16, 20, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{8, 8, 8}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1360,6 +1432,12 @@ struct processor_costs znver1_cost = {
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
6, 6, /* SSE->integer and integer->SSE moves. */
2, 2, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1509,6 +1587,12 @@ struct processor_costs znver2_cost = {
in 32,64,128,256 and 512-bit. */
6, 6, /* SSE->integer and integer->SSE
moves. */
2, 2, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1643,6 +1727,12 @@ struct processor_costs skylake_cost = {
{8, 8, 8, 12, 24}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1751,6 +1841,12 @@ const struct processor_costs btver1_cost = {
{10, 10, 12, 48, 96}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
14, 14, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{6, 8, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 8, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1855,6 +1951,12 @@ const struct processor_costs btver2_cost = {
{10, 10, 12, 48, 96}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
14, 14, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{8, 8, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -1958,6 +2060,12 @@ struct processor_costs pentium4_cost = {
{16, 16, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
20, 12, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 5, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{2, 3, 2}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -2064,6 +2172,12 @@ struct processor_costs nocona_cost = {
{12, 12, 12, 24, 48}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
20, 12, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{4, 4, 4}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -2168,6 +2282,12 @@ struct processor_costs atom_cost = {
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
8, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -2272,6 +2392,12 @@ struct processor_costs slm_cost = {
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
8, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{8, 8, 8}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -2376,6 +2502,12 @@ struct processor_costs intel_cost = {
{6, 6, 6, 6, 6}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
4, 4, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -2484,6 +2616,12 @@ struct processor_costs generic_cost = {
{6, 6, 6, 10, 15}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@ -2597,6 +2735,12 @@ struct processor_costs core_cost = {
{6, 6, 6, 6, 12}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
2, 2, /* mask->integer and integer->mask moves */
{4, 4, 4}, /* cost of loading mask register
in QImode, HImode, SImode. */
{6, 6, 6}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},