diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7eb9f775033..63d2845187c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +Fri Dec 14 21:23:54 CET 2001 Jan Hubicka + + * config.gcc: Revamp target_cpu_default2 to strings; + support new x86 variants. + * i386.c (override_options): Default x86_cpu_string and x86_arch_string + properly; set prefetch_sse. + * i386.h (x86_prefetch_sse): Declare. + (TARGET_PREFETCH_SSE): New. + (CPP_CPU_DEFAULT_SPEC): Define according to the new macros. + (TARGET_CPU_DEFAULT_*): New. + +Thu Dec 13 21:57:13 CET 2001 Janis Johnson + Jan Hubicka + + * config/i386/i386.h (struct processor_costs): Add new members + prefetch_block and simultaneous_prefetches. + (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): New. + * config/i386/i386.c (processor_costs structs): Add values for + prefetch_block and simultaneous_prefetches. + * config/i386/i386.md (unspec values): Remove values for prefetch + operations, which now use the PREFETCH rtx code. + (prefetch_sse, prefetch_3dnow, prefetchw): Combine to use new + unified prefetch support. + 2001-12-14 Jason Merrill * diagnostic.c (sorry): Increment sorrycount before saving the diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 55113a5e67a..aa65281cf5f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -81,6 +81,8 @@ struct processor_costs size_cost = { /* costs for tunning for size */ {3, 3, 3}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ }; /* Processor costs (relative to an add) */ static const @@ -116,6 +118,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ {4, 8, 16}, /* cost of storing SSE registers in SImode, DImode and TImode */ 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ }; static const @@ -150,7 +154,9 @@ struct processor_costs i486_cost = { /* 486 specific costs */ in SImode, DImode and TImode */ {4, 8, 16}, /* cost of storing SSE registers in SImode, DImode and TImode */ - 3 /* MMX or SSE register to integer */ + 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ }; static const @@ -185,7 +191,9 @@ struct processor_costs pentium_cost = { in SImode, DImode and TImode */ {4, 8, 16}, /* cost of storing SSE registers in SImode, DImode and TImode */ - 3 /* MMX or SSE register to integer */ + 3, /* MMX or SSE register to integer */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ }; static const @@ -220,7 +228,9 @@ struct processor_costs pentiumpro_cost = { in SImode, DImode and TImode */ {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ - 3 /* MMX or SSE register to integer */ + 3, /* MMX or SSE register to integer */ + 32, /* size of prefetch block */ + 6, /* number of parallel prefetches */ }; static const @@ -255,7 +265,9 @@ struct processor_costs k6_cost = { in SImode, DImode and TImode */ {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ - 6 /* MMX or SSE register to integer */ + 6, /* MMX or SSE register to integer */ + 32, /* size of prefetch block */ + 1, /* number of parallel prefetches */ }; static const @@ -290,7 +302,9 @@ struct processor_costs athlon_cost = { in SImode, DImode and TImode */ {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ - 6 /* MMX or SSE register to integer */ + 6, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ }; static const @@ -326,6 +340,8 @@ struct processor_costs pentium4_cost = { {2, 2, 8}, /* cost of storing SSE registers in SImode, DImode and TImode */ 10, /* MMX or SSE register to integer */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ }; const struct processor_costs *ix86_cost = &pentium_cost; @@ -592,6 +608,9 @@ const char *ix86_fpmath_string; /* for -mfpmath= */ /* # of registers to use to pass arguments. */ const char *ix86_regparm_string; +/* true if sse prefetch instruction is not NOOP. */ +int x86_prefetch_sse; + /* ix86_regparm_string as a number */ int ix86_regparm; @@ -817,6 +836,7 @@ override_options () {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} }; + static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; static struct pta { const char *const name; /* processor name or nickname. */ @@ -826,7 +846,7 @@ override_options () PTA_SSE = 1, PTA_SSE2 = 2, PTA_MMX = 4, - PTA_SSEPREFETCH = 8, + PTA_PREFETCH_SSE = 8, PTA_3DNOW = 16, PTA_3DNOW_A = 64 } flags; @@ -841,21 +861,21 @@ override_options () {"i686", PROCESSOR_PENTIUMPRO, 0}, {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, - {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSEPREFETCH}, + {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | - PTA_MMX | PTA_SSEPREFETCH}, + PTA_MMX | PTA_PREFETCH_SSE}, {"k6", PROCESSOR_K6, PTA_MMX}, {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, - {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW + {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A}, - {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH + {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A}, - {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW + {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, - {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW + {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, - {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW + {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, }; @@ -865,8 +885,12 @@ override_options () SUBTARGET_OVERRIDE_OPTIONS; #endif - ix86_arch = PROCESSOR_I386; - ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT; + if (!ix86_cpu_string && ix86_arch_string) + ix86_cpu_string = ix86_arch_string; + if (!ix86_cpu_string) + ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; + if (!ix86_arch_string) + ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; if (ix86_cmodel_string != 0) { @@ -900,47 +924,45 @@ override_options () sorry ("%i-bit mode not compiled in", (target_flags & MASK_64BIT) ? 64 : 32); - if (ix86_arch_string != 0) - { - for (i = 0; i < pta_size; i++) - if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) - { - ix86_arch = processor_alias_table[i].processor; - /* Default cpu tuning to the architecture. */ - ix86_cpu = ix86_arch; - if (processor_alias_table[i].flags & PTA_MMX - && !(target_flags & MASK_MMX_SET)) - target_flags |= MASK_MMX; - if (processor_alias_table[i].flags & PTA_3DNOW - && !(target_flags & MASK_3DNOW_SET)) - target_flags |= MASK_3DNOW; - if (processor_alias_table[i].flags & PTA_3DNOW_A - && !(target_flags & MASK_3DNOW_A_SET)) - target_flags |= MASK_3DNOW_A; - if (processor_alias_table[i].flags & PTA_SSE - && !(target_flags & MASK_SSE_SET)) - target_flags |= MASK_SSE; - if (processor_alias_table[i].flags & PTA_SSE2 - && !(target_flags & MASK_SSE2_SET)) - target_flags |= MASK_SSE2; - break; - } + for (i = 0; i < pta_size; i++) + if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) + { + ix86_arch = processor_alias_table[i].processor; + /* Default cpu tuning to the architecture. */ + ix86_cpu = ix86_arch; + if (processor_alias_table[i].flags & PTA_MMX + && !(target_flags & MASK_MMX_SET)) + target_flags |= MASK_MMX; + if (processor_alias_table[i].flags & PTA_3DNOW + && !(target_flags & MASK_3DNOW_SET)) + target_flags |= MASK_3DNOW; + if (processor_alias_table[i].flags & PTA_3DNOW_A + && !(target_flags & MASK_3DNOW_A_SET)) + target_flags |= MASK_3DNOW_A; + if (processor_alias_table[i].flags & PTA_SSE + && !(target_flags & MASK_SSE_SET)) + target_flags |= MASK_SSE; + if (processor_alias_table[i].flags & PTA_SSE2 + && !(target_flags & MASK_SSE2_SET)) + target_flags |= MASK_SSE2; + if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) + x86_prefetch_sse = true; + break; + } - if (i == pta_size) - error ("bad value (%s) for -march= switch", ix86_arch_string); - } + if (i == pta_size) + error ("bad value (%s) for -march= switch", ix86_arch_string); - if (ix86_cpu_string != 0) - { - for (i = 0; i < pta_size; i++) - if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) - { - ix86_cpu = processor_alias_table[i].processor; - break; - } - if (i == pta_size) - error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); - } + for (i = 0; i < pta_size; i++) + if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) + { + ix86_cpu = processor_alias_table[i].processor; + break; + } + if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) + x86_prefetch_sse = true; + if (i == pta_size) + error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); if (optimize_size) ix86_cost = &size_cost; @@ -11857,22 +11879,13 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); case IX86_BUILTIN_PREFETCH_3DNOW: + case IX86_BUILTIN_PREFETCHW: icode = CODE_FOR_prefetch_3dnow; arg0 = TREE_VALUE (arglist); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx); mode0 = insn_data[icode].operand[0].mode; - pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0)); - if (! pat) - return NULL_RTX; - emit_insn (pat); - return NULL_RTX; - - case IX86_BUILTIN_PREFETCHW: - icode = CODE_FOR_prefetchw; - arg0 = TREE_VALUE (arglist); - op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - mode0 = insn_data[icode].operand[0].mode; - pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0)); + pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1); if (! pat) return NULL_RTX; emit_insn (pat); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b6e567ee1d7..cfbb10b18dd 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -86,6 +86,9 @@ struct processor_costs { in SImode, DImode and TImode*/ const int mmxsse_to_integer; /* cost of moving mmxsse register to integer and vice versa. */ + const int prefetch_block; /* bytes moved to cache for prefetch. */ + const int simultaneous_prefetches; /* number of parallel prefetch + operations. */ }; extern const struct processor_costs *ix86_cost; @@ -224,6 +227,7 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall; extern const int x86_accumulate_outgoing_args, x86_prologue_using_move; extern const int x86_epilogue_using_move, x86_decompose_lea; +extern int x86_prefetch_sse; #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK) @@ -262,6 +266,7 @@ extern const int x86_epilogue_using_move, x86_decompose_lea; #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK) #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK) #define TARGET_DECOMPOSE_LEA (x86_decompose_lea & CPUMASK) +#define TARGET_PREFETCH_SSE (x86_prefetch_sse) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) @@ -480,24 +485,61 @@ extern int ix86_arch; %n`-mpentiumpro' is deprecated. Use `-march=pentiumpro' or `-mcpu=pentiumpro' instead.\n}}" #endif +#define TARGET_CPU_DEFAULT_i386 0 +#define TARGET_CPU_DEFAULT_i486 1 +#define TARGET_CPU_DEFAULT_pentium 2 +#define TARGET_CPU_DEFAULT_pentiumpro 3 +#define TARGET_CPU_DEFAULT_pentium2 4 +#define TARGET_CPU_DEFAULT_pentium3 5 +#define TARGET_CPU_DEFAULT_pentium4 6 +#define TARGET_CPU_DEFAULT_k6 7 +#define TARGET_CPU_DEFAULT_k6_2 8 +#define TARGET_CPU_DEFAULT_k6_3 9 +#define TARGET_CPU_DEFAULT_athlon 10 +#define TARGET_CPU_DEFAULT_athlon_sse 11 + +#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\ + "pentiumpro", "pentium2", "pentium3", \ + "pentium4", "k6", "k6-2", "k6-3",\ + "athlon", "athlon-4"} #ifndef CPP_CPU_DEFAULT_SPEC -#if TARGET_CPU_DEFAULT == 1 +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_i486 #define CPP_CPU_DEFAULT_SPEC "-D__tune_i486__" #endif -#if TARGET_CPU_DEFAULT == 2 +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium #define CPP_CPU_DEFAULT_SPEC "-D__tune_i586__ -D__tune_pentium__" #endif -#if TARGET_CPU_DEFAULT == 3 +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium_mmx +#define CPP_CPU_DEFAULT_SPEC "-D__tune_i586__ -D__tune_pentium__ -D__tune_pentium_mmx__" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentiumpro #define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__" #endif -#if TARGET_CPU_DEFAULT == 4 +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium2 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__\ +-D__tune_pentium2__" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium3 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_i686__ -D__tune_pentiumpro__\ +-D__tune_pentium2__ -D__tune_pentium3__" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_pentium4 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6 #define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__" #endif -#if TARGET_CPU_DEFAULT == 5 +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6_2 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__ -D__tune_k6_2__" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_k6_3 +#define CPP_CPU_DEFAULT_SPEC "-D__tune_k6__ -D__tune_k6_3__" +#endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_athlon #define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__" #endif -#if TARGET_CPU_DEFAULT == 6 -#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__" +#if TARGET_CPU_DEFAULT == TARGET_CPU_DEFAULT_athlon_sse +#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__ -D__tune_athlon_sse__" #endif #ifndef CPP_CPU_DEFAULT_SPEC #define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__" @@ -531,30 +573,45 @@ extern int ix86_arch; %{march=i486:-D__i486 -D__i486__ %{!mcpu*:-D__tune_i486__ }}\ %{march=pentium|march=i586:-D__i586 -D__i586__ -D__pentium -D__pentium__ \ %{!mcpu*:-D__tune_i586__ -D__tune_pentium__ }}\ +%{march=pentium-mmx:-D__i586 -D__i586__ -D__pentium -D__pentium__ \ + -D__pentium__mmx__ \ + %{!mcpu*:-D__tune_i586__ -D__tune_pentium__ -D__tune_pentium_mmx__}}\ %{march=pentiumpro|march=i686:-D__i686 -D__i686__ \ -D__pentiumpro -D__pentiumpro__ \ %{!mcpu*:-D__tune_i686__ -D__tune_pentiumpro__ }}\ %{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\ -%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\ +%{march=k6-2:-D__k6 -D__k6__ -D__k6_2__ \ + %{!mcpu*:-D__tune_k6__ -D__tune_k6_2__ }}\ +%{march=k6-3:-D__k6 -D__k6__ -D__k6_3__ \ + %{!mcpu*:-D__tune_k6__ -D__tune_k6_3__ }}\ +%{march=athlon|march=athlon-tbird:-D__athlon -D__athlon__ \ + %{!mcpu*:-D__tune_athlon__ }}\ +%{march=athlon-4|march=athlon-xp|march=athlon-mp:-D__athlon -D__athlon__ \ + -D__athlon_sse__ \ + %{!mcpu*:-D__tune_athlon__ -D__tune_athlon_sse__ }}\ %{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\ %{m386|mcpu=i386:-D__tune_i386__ }\ %{m486|mcpu=i486:-D__tune_i486__ }\ %{mpentium|mcpu=pentium|mcpu=i586|mcpu=pentium-mmx:-D__tune_i586__ -D__tune_pentium__ }\ -%{mpentiumpro|mcpu=pentiumpro|mcpu=i686|cpu=pentium2|cpu=pentium3:-D__tune_i686__\ +%{mpentiumpro|mcpu=pentiumpro|mcpu=i686|cpu=pentium2|cpu=pentium3:-D__tune_i686__ \ -D__tune_pentiumpro__ }\ %{mcpu=k6|mcpu=k6-2|mcpu=k6-3:-D__tune_k6__ }\ %{mcpu=athlon|mcpu=athlon-tbird|mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\ -D__tune_athlon__ }\ +%{mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\ +-D__tune_athlon_sse__ }\ %{mcpu=pentium4:-D__tune_pentium4__ }\ %{march=march=athlon-tbird|march=athlon-xp|march=athlon-mp|march=pentium3|march=pentium4:\ -D__SSE__ }\ %{march=pentium-mmx|march=k6|march=k6-2|march=k6-3\ march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\ |march=athlon-mp|march=pentium2|march=pentium3|march=pentium4: -D__MMX__ }\ -%{march=k6|march=k6-2|march=k6-3\ +%{march=k6-2|march=k6-3\ march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\ |march=athlon-mp: -D__3dNOW__ }\ -%{mcpu=mcpu=pentium4: -D__SSE2__ }\ +%{march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\ +|march=athlon-mp: -D__3dNOW_A__ }\ +%{march=mcpu=pentium4: -D__SSE2__ }\ %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}" #ifndef CPP_CPU_SPEC @@ -2261,6 +2318,12 @@ while (0) /* Define this as 1 if `char' should by default be signed; else as 0. */ #define DEFAULT_SIGNED_CHAR 1 +/* Number of bytes moved into a data cache for a single prefetch operation. */ +#define PREFETCH_BLOCK ix86_cost->prefetch_block + +/* Number of prefetch operations that can be done in parallel. */ +#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches + /* Max number of bytes we can move from memory to memory in one reasonably fast instruction. */ #define MOVE_MAX 16 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index eee94a724a9..3da4cab3225 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -93,8 +93,6 @@ ;; 44 This is a `sfence' operation. ;; 45 This is a noop to prevent excessive combiner cleverness. ;; 46 This is a `femms' operation. -;; 47 This is a `prefetch' (3DNow) operation. -;; 48 This is a `prefetchw' operation. ;; 49 This is a 'pavgusb' operation. ;; 50 This is a `pfrcp' operation. ;; 51 This is a `pfrcpit1' operation. @@ -19400,10 +19398,58 @@ [(set_attr "type" "sse") (set_attr "memory" "unknown")]) +(define_expand "prefetch" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" + " +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + if (rw != 0 && rw != 1) + abort (); + if (locality < 0 || locality > 3) + abort (); + /* Use 3dNOW prefetch in case we are asking for write prefetch not + suported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW + && (!TARGET_PREFETCH_SSE || rw)) + { + emit_insn (gen_prefetch_3dnow (operands[0], operands[1])); + } + else + { + int i; + switch (locality) + { + case 0: /* No temporal locality. */ + i = 0; + break; + case 1: /* Lowest level of temporal locality. */ + i = 3; + break; + case 2: /* Moderate level of temporal locality. */ + i = 2; + break; + case 3: /* Highest level of temporal locality. */ + i = 1; + break; + default: + abort (); /* We already checked for valid values above. */ + break; + } + emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i))); + } + DONE; +}") + (define_insn "prefetch_sse" [(unspec [(match_operand:SI 0 "address_operand" "p") (match_operand:SI 1 "immediate_operand" "n")] 35)] - "TARGET_SSE || TARGET_3DNOW_A" + "TARGET_PREFETCH_SSE" { switch (INTVAL (operands[1])) { @@ -19579,15 +19625,16 @@ [(set_attr "type" "mmx")]) (define_insn "prefetch_3dnow" - [(unspec [(match_operand:SI 0 "address_operand" "p")] 47)] + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 0))] "TARGET_3DNOW" - "prefetch\\t%a0" - [(set_attr "type" "mmx")]) - -(define_insn "prefetchw" - [(unspec [(match_operand:SI 0 "address_operand" "p")] 48)] - "TARGET_3DNOW" - "prefetchw\\t%a0" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} [(set_attr "type" "mmx")]) (define_insn "pf2id"