[PATCH] x86_64: On Intel CPUs don't do an additional CPU sync before RDTSC
RDTSC serialization using cpuid is not needed for Intel platforms. This increases gettimeofday performance. Cc: vojtech@suse.cz Cc: rohit.seth@intel.com Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
7f6c5b0469
commit
c818a18146
|
@ -1042,6 +1042,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
||||||
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
||||||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
||||||
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
|
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
|
||||||
|
set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
||||||
c->x86_max_cores = intel_num_cpu_cores(c);
|
c->x86_max_cores = intel_num_cpu_cores(c);
|
||||||
|
|
||||||
srat_detect_node();
|
srat_detect_node();
|
||||||
|
|
|
@ -71,14 +71,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
|
||||||
struct timespec __xtime __section_xtime;
|
struct timespec __xtime __section_xtime;
|
||||||
struct timezone __sys_tz __section_sys_tz;
|
struct timezone __sys_tz __section_sys_tz;
|
||||||
|
|
||||||
static inline void rdtscll_sync(unsigned long *tsc)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
sync_core();
|
|
||||||
#endif
|
|
||||||
rdtscll(*tsc);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* do_gettimeoffset() returns microseconds since last timer interrupt was
|
* do_gettimeoffset() returns microseconds since last timer interrupt was
|
||||||
* triggered by hardware. A memory read of HPET is slower than a register read
|
* triggered by hardware. A memory read of HPET is slower than a register read
|
||||||
|
@ -93,7 +85,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
|
||||||
{
|
{
|
||||||
unsigned long t;
|
unsigned long t;
|
||||||
unsigned long x;
|
unsigned long x;
|
||||||
rdtscll_sync(&t);
|
t = get_cycles_sync();
|
||||||
if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
|
if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
|
||||||
x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
|
x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
|
||||||
return x;
|
return x;
|
||||||
|
@ -309,8 +301,7 @@ unsigned long long monotonic_clock(void)
|
||||||
last_offset = vxtime.last_tsc;
|
last_offset = vxtime.last_tsc;
|
||||||
base = monotonic_base;
|
base = monotonic_base;
|
||||||
} while (read_seqretry(&xtime_lock, seq));
|
} while (read_seqretry(&xtime_lock, seq));
|
||||||
sync_core();
|
this_offset = get_cycles_sync();
|
||||||
rdtscll(this_offset);
|
|
||||||
offset = (this_offset - last_offset)*1000/cpu_khz;
|
offset = (this_offset - last_offset)*1000/cpu_khz;
|
||||||
return base + offset;
|
return base + offset;
|
||||||
}
|
}
|
||||||
|
@ -391,7 +382,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
||||||
delay = LATCH - 1 - delay;
|
delay = LATCH - 1 - delay;
|
||||||
}
|
}
|
||||||
|
|
||||||
rdtscll_sync(&tsc);
|
tsc = get_cycles_sync();
|
||||||
|
|
||||||
if (vxtime.mode == VXTIME_HPET) {
|
if (vxtime.mode == VXTIME_HPET) {
|
||||||
if (offset - vxtime.last > hpet_tick) {
|
if (offset - vxtime.last > hpet_tick) {
|
||||||
|
@ -700,8 +691,7 @@ static unsigned int __init hpet_calibrate_tsc(void)
|
||||||
do {
|
do {
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
hpet_now = hpet_readl(HPET_COUNTER);
|
hpet_now = hpet_readl(HPET_COUNTER);
|
||||||
sync_core();
|
tsc_now = get_cycles_sync();
|
||||||
rdtscl(tsc_now);
|
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
} while ((tsc_now - tsc_start) < TICK_COUNT &&
|
} while ((tsc_now - tsc_start) < TICK_COUNT &&
|
||||||
(hpet_now - hpet_start) < TICK_COUNT);
|
(hpet_now - hpet_start) < TICK_COUNT);
|
||||||
|
@ -731,11 +721,9 @@ static unsigned int __init pit_calibrate_tsc(void)
|
||||||
outb(0xb0, 0x43);
|
outb(0xb0, 0x43);
|
||||||
outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
|
outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
|
||||||
outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
|
outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
|
||||||
rdtscll(start);
|
start = get_cycles_sync();
|
||||||
sync_core();
|
|
||||||
while ((inb(0x61) & 0x20) == 0);
|
while ((inb(0x61) & 0x20) == 0);
|
||||||
sync_core();
|
end = get_cycles_sync();
|
||||||
rdtscll(end);
|
|
||||||
|
|
||||||
spin_unlock_irqrestore(&i8253_lock, flags);
|
spin_unlock_irqrestore(&i8253_lock, flags);
|
||||||
|
|
||||||
|
@ -939,7 +927,7 @@ void __init time_init(void)
|
||||||
vxtime.mode = VXTIME_TSC;
|
vxtime.mode = VXTIME_TSC;
|
||||||
vxtime.quot = (1000000L << 32) / vxtime_hz;
|
vxtime.quot = (1000000L << 32) / vxtime_hz;
|
||||||
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
|
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
|
||||||
rdtscll_sync(&vxtime.last_tsc);
|
vxtime.last_tsc = get_cycles_sync();
|
||||||
setup_irq(0, &irq0);
|
setup_irq(0, &irq0);
|
||||||
|
|
||||||
set_cyc2ns_scale(cpu_khz);
|
set_cyc2ns_scale(cpu_khz);
|
||||||
|
|
|
@ -66,8 +66,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
|
||||||
(__jiffies - __wall_jiffies) * (1000000 / HZ);
|
(__jiffies - __wall_jiffies) * (1000000 / HZ);
|
||||||
|
|
||||||
if (__vxtime.mode != VXTIME_HPET) {
|
if (__vxtime.mode != VXTIME_HPET) {
|
||||||
sync_core();
|
t = get_cycles_sync();
|
||||||
rdtscll(t);
|
|
||||||
if (t < __vxtime.last_tsc)
|
if (t < __vxtime.last_tsc)
|
||||||
t = __vxtime.last_tsc;
|
t = __vxtime.last_tsc;
|
||||||
usec += ((t - __vxtime.last_tsc) *
|
usec += ((t - __vxtime.last_tsc) *
|
||||||
|
|
|
@ -63,6 +63,7 @@
|
||||||
#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||||
#define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */
|
#define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */
|
||||||
#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
|
#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
|
||||||
|
#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */
|
||||||
|
|
||||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||||
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
|
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
|
||||||
|
|
|
@ -10,6 +10,9 @@
|
||||||
#include <asm/msr.h>
|
#include <asm/msr.h>
|
||||||
#include <asm/vsyscall.h>
|
#include <asm/vsyscall.h>
|
||||||
#include <asm/hpet.h>
|
#include <asm/hpet.h>
|
||||||
|
#include <asm/system.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
#include <linux/compiler.h>
|
||||||
|
|
||||||
#define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */
|
#define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */
|
||||||
|
|
||||||
|
@ -23,6 +26,19 @@ static inline cycles_t get_cycles (void)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Like get_cycles, but make sure the CPU is synchronized. */
|
||||||
|
static __always_inline cycles_t get_cycles_sync(void)
|
||||||
|
{
|
||||||
|
unsigned long long ret;
|
||||||
|
unsigned eax;
|
||||||
|
/* Don't do an additional sync on CPUs where we know
|
||||||
|
RDTSC is already synchronous. */
|
||||||
|
alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC,
|
||||||
|
"=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
|
||||||
|
rdtscll(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
extern unsigned int cpu_khz;
|
extern unsigned int cpu_khz;
|
||||||
|
|
||||||
extern int read_current_timer(unsigned long *timer_value);
|
extern int read_current_timer(unsigned long *timer_value);
|
||||||
|
|
Loading…
Reference in New Issue