From 3365e7798760dc6c190a9bbb0945a38f02625438 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 9 Oct 2009 10:12:41 +0200
Subject: [PATCH 1/3] lockdep: Use cpu_clock() for lockstat

Some tracepoint magic (TRACE_EVENT(lock_acquired)) relies on
the fact that lock hold times are positive and uses div64 on
that. That triggered a build warning on MIPS, and probably
causes bad output in certain circumstances as well.

Make it truly positive.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1254818502.21044.112.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/lockdep.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3815ac1d58b2..9af56723c096 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -142,6 +142,11 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 #ifdef CONFIG_LOCK_STAT
 static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
 
+static inline u64 lockstat_clock(void)
+{
+	return cpu_clock(smp_processor_id());
+}
+
 static int lock_point(unsigned long points[], unsigned long ip)
 {
 	int i;
@@ -158,7 +163,7 @@ static int lock_point(unsigned long points[], unsigned long ip)
 	return i;
 }
 
-static void lock_time_inc(struct lock_time *lt, s64 time)
+static void lock_time_inc(struct lock_time *lt, u64 time)
 {
 	if (time > lt->max)
 		lt->max = time;
@@ -234,12 +239,12 @@ static void put_lock_stats(struct lock_class_stats *stats)
 static void lock_release_holdtime(struct held_lock *hlock)
 {
 	struct lock_class_stats *stats;
-	s64 holdtime;
+	u64 holdtime;
 
 	if (!lock_stat)
 		return;
 
-	holdtime = sched_clock() - hlock->holdtime_stamp;
+	holdtime = lockstat_clock() - hlock->holdtime_stamp;
 
 	stats = get_lock_stats(hlock_class(hlock));
 	if (hlock->read)
@@ -2792,7 +2797,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	hlock->references = references;
 #ifdef CONFIG_LOCK_STAT
 	hlock->waittime_stamp = 0;
-	hlock->holdtime_stamp = sched_clock();
+	hlock->holdtime_stamp = lockstat_clock();
 #endif
 
 	if (check == 2 && !mark_irqflags(curr, hlock))
@@ -3322,7 +3327,7 @@ found_it:
 	if (hlock->instance != lock)
 		return;
 
-	hlock->waittime_stamp = sched_clock();
+	hlock->waittime_stamp = lockstat_clock();
 
 	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
 	contending_point = lock_point(hlock_class(hlock)->contending_point,
@@ -3345,8 +3350,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 	struct held_lock *hlock, *prev_hlock;
 	struct lock_class_stats *stats;
 	unsigned int depth;
-	u64 now;
-	s64 waittime = 0;
+	u64 now, waittime = 0;
 	int i, cpu;
 
 	depth = curr->lockdep_depth;
@@ -3374,7 +3378,7 @@ found_it:
 
 	cpu = smp_processor_id();
 	if (hlock->waittime_stamp) {
-		now = sched_clock();
+		now = lockstat_clock();
 		waittime = now - hlock->waittime_stamp;
 		hlock->holdtime_stamp = now;
 	}

From 066b3aa8454bee3cdc665d86b5de812d8d0513b3 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 9 Sep 2009 15:02:33 +0200
Subject: [PATCH 2/3] oprofile: fix race condition in event_buffer free

Looking at the 2.6.31-rc9 code, it appears there is a race condition
in the event_buffer cleanup code path (shutdown). This could lead to
kernel panic as some CPUs may be operating on the event buffer AFTER
it has been freed. The attached patch solves the problem and makes
sure CPUs check if the buffer is not NULL before they access it as
some may have been spinning on the mutex while the buffer was being
freed.

The race may happen if the buffer is freed during pending reads. But
it is not clear why there are races in add_event_entry() since all
workqueues or handlers are canceled or flushed before the event buffer
is freed.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 drivers/oprofile/event_buffer.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 2b7ae366ceb1..c38adb389064 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -41,6 +41,12 @@ static atomic_t buffer_ready = ATOMIC_INIT(0);
  */
 void add_event_entry(unsigned long value)
 {
+	/*
+	 * catch potential error
+	 */
+	if (!event_buffer)
+		return;
+
 	if (buffer_pos == buffer_size) {
 		atomic_inc(&oprofile_stats.event_lost_overflow);
 		return;
@@ -92,9 +98,10 @@ out:
 
 void free_event_buffer(void)
 {
+	mutex_lock(&buffer_mutex);
 	vfree(event_buffer);
-
 	event_buffer = NULL;
+	mutex_unlock(&buffer_mutex);
 }
 
 
@@ -167,6 +174,11 @@ static ssize_t event_buffer_read(struct file *file, char __user *buf,
 
 	mutex_lock(&buffer_mutex);
 
+	if (!event_buffer) {
+		retval = -EINTR;
+		goto out;
+	}
+
 	atomic_set(&buffer_ready, 0);
 
 	retval = -EFAULT;

From c0868934e536e0ff508f2d359d006b25abc4970d Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 9 Oct 2009 03:17:44 +0200
Subject: [PATCH 3/3] oprofile: warn on freeing event buffer too early

A race shouldn't happen since all workqueues or handlers are canceled
or flushed before the event buffer is freed. A warning is triggered
now if the buffer is freed too early.

Also, this patch adds some comments about event buffer protection,
reworks some code and adds code to clear buffer_pos during alloc and
free of the event buffer.

Cc: David Rientjes <rientjes@google.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 drivers/oprofile/event_buffer.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index c38adb389064..5df60a6b6776 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -35,17 +35,22 @@ static size_t buffer_pos;
 /* atomic_t because wait_event checks it outside of buffer_mutex */
 static atomic_t buffer_ready = ATOMIC_INIT(0);
 
-/* Add an entry to the event buffer. When we
- * get near to the end we wake up the process
- * sleeping on the read() of the file.
+/*
+ * Add an entry to the event buffer. When we get near to the end we
+ * wake up the process sleeping on the read() of the file. To protect
+ * the event_buffer this function may only be called when buffer_mutex
+ * is set.
  */
 void add_event_entry(unsigned long value)
 {
 	/*
-	 * catch potential error
+	 * This shouldn't happen since all workqueues or handlers are
+	 * canceled or flushed before the event buffer is freed.
 	 */
-	if (!event_buffer)
+	if (!event_buffer) {
+		WARN_ON_ONCE(1);
 		return;
+	}
 
 	if (buffer_pos == buffer_size) {
 		atomic_inc(&oprofile_stats.event_lost_overflow);
@@ -75,7 +80,6 @@ void wake_up_buffer_waiter(void)
 
 int alloc_event_buffer(void)
 {
-	int err = -ENOMEM;
 	unsigned long flags;
 
 	spin_lock_irqsave(&oprofilefs_lock, flags);
@@ -86,13 +90,12 @@ int alloc_event_buffer(void)
 	if (buffer_watershed >= buffer_size)
 		return -EINVAL;
 
+	buffer_pos = 0;
 	event_buffer = vmalloc(sizeof(unsigned long) * buffer_size);
 	if (!event_buffer)
-		goto out;
+		return -ENOMEM;
 
-	err = 0;
-out:
-	return err;
+	return 0;
 }
 
 
@@ -100,6 +103,7 @@ void free_event_buffer(void)
 {
 	mutex_lock(&buffer_mutex);
 	vfree(event_buffer);
+	buffer_pos = 0;
 	event_buffer = NULL;
 	mutex_unlock(&buffer_mutex);
 }
@@ -174,6 +178,7 @@ static ssize_t event_buffer_read(struct file *file, char __user *buf,
 
 	mutex_lock(&buffer_mutex);
 
+	/* May happen if the buffer is freed during pending reads. */
 	if (!event_buffer) {
 		retval = -EINTR;
 		goto out;