From 3365e7798760dc6c190a9bbb0945a38f02625438 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2009 10:12:41 +0200 Subject: [PATCH 1/3] lockdep: Use cpu_clock() for lockstat Some tracepoint magic (TRACE_EVENT(lock_acquired)) relies on the fact that lock hold times are positive and uses div64 on that. That triggered a build warning on MIPS, and probably causes bad output in certain circumstances as well. Make it truly positive. Reported-by: Andrew Morton Signed-off-by: Peter Zijlstra LKML-Reference: <1254818502.21044.112.camel@laptop> Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3815ac1d58b2..9af56723c096 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -142,6 +142,11 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); +static inline u64 lockstat_clock(void) +{ + return cpu_clock(smp_processor_id()); +} + static int lock_point(unsigned long points[], unsigned long ip) { int i; @@ -158,7 +163,7 @@ static int lock_point(unsigned long points[], unsigned long ip) return i; } -static void lock_time_inc(struct lock_time *lt, s64 time) +static void lock_time_inc(struct lock_time *lt, u64 time) { if (time > lt->max) lt->max = time; @@ -234,12 +239,12 @@ static void put_lock_stats(struct lock_class_stats *stats) static void lock_release_holdtime(struct held_lock *hlock) { struct lock_class_stats *stats; - s64 holdtime; + u64 holdtime; if (!lock_stat) return; - holdtime = sched_clock() - hlock->holdtime_stamp; + holdtime = lockstat_clock() - hlock->holdtime_stamp; stats = get_lock_stats(hlock_class(hlock)); if (hlock->read) @@ -2792,7 +2797,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->references = references; #ifdef CONFIG_LOCK_STAT hlock->waittime_stamp = 0; - hlock->holdtime_stamp = sched_clock(); + hlock->holdtime_stamp = lockstat_clock(); #endif if (check == 2 && !mark_irqflags(curr, hlock)) @@ -3322,7 +3327,7 @@ found_it: if (hlock->instance != lock) return; - hlock->waittime_stamp = sched_clock(); + hlock->waittime_stamp = lockstat_clock(); contention_point = lock_point(hlock_class(hlock)->contention_point, ip); contending_point = lock_point(hlock_class(hlock)->contending_point, @@ -3345,8 +3350,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - u64 now; - s64 waittime = 0; + u64 now, waittime = 0; int i, cpu; depth = curr->lockdep_depth; @@ -3374,7 +3378,7 @@ found_it: cpu = smp_processor_id(); if (hlock->waittime_stamp) { - now = sched_clock(); + now = lockstat_clock(); waittime = now - hlock->waittime_stamp; hlock->holdtime_stamp = now; } From 066b3aa8454bee3cdc665d86b5de812d8d0513b3 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 9 Sep 2009 15:02:33 +0200 Subject: [PATCH 2/3] oprofile: fix race condition in event_buffer free Looking at the 2.6.31-rc9 code, it appears there is a race condition in the event_buffer cleanup code path (shutdown). This could lead to kernel panic as some CPUs may be operating on the event buffer AFTER it has been freed. The attached patch solves the problem and makes sure CPUs check if the buffer is not NULL before they access it as some may have been spinning on the mutex while the buffer was being freed. The race may happen if the buffer is freed during pending reads. But it is not clear why there are races in add_event_entry() since all workqueues or handlers are canceled or flushed before the event buffer is freed. Signed-off-by: David Rientjes Signed-off-by: Stephane Eranian Signed-off-by: Robert Richter --- drivers/oprofile/event_buffer.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index 2b7ae366ceb1..c38adb389064 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -41,6 +41,12 @@ static atomic_t buffer_ready = ATOMIC_INIT(0); */ void add_event_entry(unsigned long value) { + /* + * catch potential error + */ + if (!event_buffer) + return; + if (buffer_pos == buffer_size) { atomic_inc(&oprofile_stats.event_lost_overflow); return; @@ -92,9 +98,10 @@ out: void free_event_buffer(void) { + mutex_lock(&buffer_mutex); vfree(event_buffer); - event_buffer = NULL; + mutex_unlock(&buffer_mutex); } @@ -167,6 +174,11 @@ static ssize_t event_buffer_read(struct file *file, char __user *buf, mutex_lock(&buffer_mutex); + if (!event_buffer) { + retval = -EINTR; + goto out; + } + atomic_set(&buffer_ready, 0); retval = -EFAULT; From c0868934e536e0ff508f2d359d006b25abc4970d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 9 Oct 2009 03:17:44 +0200 Subject: [PATCH 3/3] oprofile: warn on freeing event buffer too early A race shouldn't happen since all workqueues or handlers are canceled or flushed before the event buffer is freed. A warning is triggered now if the buffer is freed too early. Also, this patch adds some comments about event buffer protection, reworks some code and adds code to clear buffer_pos during alloc and free of the event buffer. Cc: David Rientjes Cc: Stephane Eranian Signed-off-by: Robert Richter --- drivers/oprofile/event_buffer.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index c38adb389064..5df60a6b6776 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -35,17 +35,22 @@ static size_t buffer_pos; /* atomic_t because wait_event checks it outside of buffer_mutex */ static atomic_t buffer_ready = ATOMIC_INIT(0); -/* Add an entry to the event buffer. When we - * get near to the end we wake up the process - * sleeping on the read() of the file. +/* + * Add an entry to the event buffer. When we get near to the end we + * wake up the process sleeping on the read() of the file. To protect + * the event_buffer this function may only be called when buffer_mutex + * is set. */ void add_event_entry(unsigned long value) { /* - * catch potential error + * This shouldn't happen since all workqueues or handlers are + * canceled or flushed before the event buffer is freed. */ - if (!event_buffer) + if (!event_buffer) { + WARN_ON_ONCE(1); return; + } if (buffer_pos == buffer_size) { atomic_inc(&oprofile_stats.event_lost_overflow); @@ -75,7 +80,6 @@ void wake_up_buffer_waiter(void) int alloc_event_buffer(void) { - int err = -ENOMEM; unsigned long flags; spin_lock_irqsave(&oprofilefs_lock, flags); @@ -86,13 +90,12 @@ int alloc_event_buffer(void) if (buffer_watershed >= buffer_size) return -EINVAL; + buffer_pos = 0; event_buffer = vmalloc(sizeof(unsigned long) * buffer_size); if (!event_buffer) - goto out; + return -ENOMEM; - err = 0; -out: - return err; + return 0; } @@ -100,6 +103,7 @@ void free_event_buffer(void) { mutex_lock(&buffer_mutex); vfree(event_buffer); + buffer_pos = 0; event_buffer = NULL; mutex_unlock(&buffer_mutex); } @@ -174,6 +178,7 @@ static ssize_t event_buffer_read(struct file *file, char __user *buf, mutex_lock(&buffer_mutex); + /* May happen if the buffer is freed during pending reads. */ if (!event_buffer) { retval = -EINTR; goto out;