Revert "Add the member integer_to_sse to processor_cost as a cost simulation for movd/pinsrd. It will be used to calculate the cost of vec_construct."

This reverts commit 872da9a6f6. PR target/101936 PR target/101929
2021-08-17 17:29:06 +08:00 · 2021-08-17 17:29:06 +08:00 · 1db70e61a9
parent 6e529985d8
commit 1db70e61a9
4 changed files with 2 additions and 33 deletions
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@ -22203,11 +22203,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
      case vec_construct:
 	{
 	  /* N element inserts into SSE vectors.  */
-	  int cost
-	    = TYPE_VECTOR_SUBPARTS (vectype) * (fp ?
-						ix86_cost->sse_op
-						: ix86_cost->integer_to_sse);
-
+	  int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
 	  /* One vinserti128 for combining two SSE vectors for AVX256.  */
 	  if (GET_MODE_BITSIZE (mode) == 256)
 	    cost += ix86_vec_cost (mode, ix86_cost->addss);
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@ -165,7 +165,6 @@ struct processor_costs {
  const int xmm_move, ymm_move, /* cost of moving XMM and YMM register.  */
 	    zmm_move;
  const int sse_to_integer;	/* cost of moving SSE register to integer.  */
-  const int integer_to_sse;	/* cost of moving integer to SSE register.  */
  const int gather_static, gather_per_elt; /* Cost of gather load is computed
 				   as static + per_item * nelts. */
  const int scatter_static, scatter_per_elt; /* Cost of gather store is
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@ -102,7 +102,6 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
 					   in 128bit, 256bit and 512bit */
  3, 3, 3,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_BYTES (2),			/* cost of moving integer to sse register.  */
  5, 0,					/* Gather load static, per_elt.  */
  5, 0,					/* Gather store static, per_elt.  */
  0,					/* size of l1 cache  */
@ -212,7 +211,6 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
  {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  0,					/* size of l1 cache  */
@ -321,7 +319,6 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
  {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  4,					/* size of l1 cache.  486 has 8kB cache
@ -432,7 +429,6 @@ struct processor_costs pentium_cost = {
  {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  8,					/* size of l1 cache.  */
@ -534,7 +530,6 @@ struct processor_costs lakemont_cost = {
  {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  8,					/* size of l1 cache.  */
@ -651,7 +646,6 @@ struct processor_costs pentiumpro_cost = {
  {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  8,					/* size of l1 cache.  */
@ -759,7 +753,6 @@ struct processor_costs geode_cost = {
  {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  2, 2,					/* Gather load static, per_elt.  */
  2, 2,					/* Gather store static, per_elt.  */
  64,					/* size of l1 cache.  */
@ -867,7 +860,6 @@ struct processor_costs k6_cost = {
  {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  2, 2,					/* Gather load static, per_elt.  */
  2, 2,					/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -981,7 +973,6 @@ struct processor_costs athlon_cost = {
  {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  5,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  64,					/* size of l1 cache.  */
@ -1097,7 +1088,6 @@ struct processor_costs k8_cost = {
  {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  5,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  64,					/* size of l1 cache.  */
@ -1226,7 +1216,6 @@ struct processor_costs amdfam10_cost = {
  {4, 4, 5, 10, 20},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  3,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2),			/* cost of moving integer to sse register.  */
  4, 4,					/* Gather load static, per_elt.  */
  4, 4,					/* Gather store static, per_elt.  */
  64,					/* size of l1 cache.  */
@ -1347,7 +1336,6 @@ const struct processor_costs bdver_cost = {
  {10, 10, 10, 40, 60},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  16,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2),			/* cost of moving integer to sse register.  */
  12, 12,				/* Gather load static, per_elt.  */
  10, 10,				/* Gather store static, per_elt.  */
  16,					/* size of l1 cache.  */
@ -1489,7 +1477,6 @@ struct processor_costs znver1_cost = {
  {8, 8, 8, 16, 32},			/* cost of unaligned stores.  */
  2, 3, 6,				/* cost of moving XMM,YMM,ZMM register.  */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
     throughput 12.  Approx 9 uops do not depend on vector size and every load
     is 7 uops.  */
@ -1646,7 +1633,6 @@ struct processor_costs znver2_cost = {
  2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
     throughput 12.  Approx 9 uops do not depend on vector size and every load
     is 7 uops.  */
@ -1779,7 +1765,6 @@ struct processor_costs znver3_cost = {
  2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
     throughput 9.  Approx 7 uops do not depend on vector size and every load
     is 4 uops.  */
@ -1924,7 +1909,6 @@ struct processor_costs skylake_cost = {
  {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
  2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2)+1,			/* cost of moving integer to sse register.  */
  20, 8,				/* Gather load static, per_elt.  */
  22, 10,				/* Gather store static, per_elt.  */
  64,					/* size of l1 cache.  */
@ -2051,7 +2035,6 @@ struct processor_costs icelake_cost = {
  {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
  2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  20, 8,				/* Gather load static, per_elt.  */
  22, 10,				/* Gather store static, per_elt.  */
  64,					/* size of l1 cache.  */
@ -2165,7 +2148,6 @@ const struct processor_costs btver1_cost = {
  {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  14,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  10, 10,				/* Gather load static, per_elt.  */
  10, 10,				/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -2276,7 +2258,6 @@ const struct processor_costs btver2_cost = {
  {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  14,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  10, 10,				/* Gather load static, per_elt.  */
  10, 10,				/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -2386,7 +2367,6 @@ struct processor_costs pentium4_cost = {
  {32, 32, 32, 64, 128},		/* cost of unaligned stores.  */
  12, 24, 48,				/* cost of moving XMM,YMM,ZMM register */
  20,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2),			/* cost of moving integer to sse register.  */
  16, 16,				/* Gather load static, per_elt.  */
  16, 16,				/* Gather store static, per_elt.  */
  8,					/* size of l1 cache.  */
@ -2499,7 +2479,6 @@ struct processor_costs nocona_cost = {
  {24, 24, 24, 48, 96},			/* cost of unaligned stores.  */
  6, 12, 24,				/* cost of moving XMM,YMM,ZMM register */
  20,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (2),			/* cost of moving integer to sse register.  */
  12, 12,				/* Gather load static, per_elt.  */
  12, 12,				/* Gather store static, per_elt.  */
  8,					/* size of l1 cache.  */
@ -2610,7 +2589,6 @@ struct processor_costs atom_cost = {
  {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  8,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  8, 8,					/* Gather load static, per_elt.  */
  8, 8,					/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -2721,7 +2699,6 @@ struct processor_costs slm_cost = {
  {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
  2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
  8,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  8, 8,					/* Gather load static, per_elt.  */
  8, 8,					/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -2832,7 +2809,6 @@ struct processor_costs intel_cost = {
  {10, 10, 10, 10, 10},			/* cost of unaligned loads.  */
  2, 2, 2,				/* cost of moving XMM,YMM,ZMM register */
  4,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  6, 6,					/* Gather load static, per_elt.  */
  6, 6,					/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -2950,7 +2926,6 @@ struct processor_costs generic_cost = {
  {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
  6,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  18, 6,				/* Gather load static, per_elt.  */
  18, 6,				/* Gather store static, per_elt.  */
  32,					/* size of l1 cache.  */
@ -3074,7 +3049,6 @@ struct processor_costs core_cost = {
  {6, 6, 6, 6, 12},			/* cost of unaligned stores.  */
  2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
  2,					/* cost of moving SSE register to integer.  */
-  COSTS_N_INSNS (1),			/* cost of moving integer to sse register.  */
  /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
     rec. throughput 6.
     So 5 uops statically and one uops per load.  */
--- a/gcc/testsuite/gcc.target/i386/pr99881.c
+++ b/gcc/testsuite/gcc.target/i386/pr99881.c
@ -1,7 +1,7 @@
 /* PR target/99881.  */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-Ofast -march=skylake" } */
-/* { dg-final { scan-assembler-not "xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "xmm\[0-9\]" { xfail *-*-* } } } */

 void
 foo (int* __restrict a, int n, int c)